2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/capability.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/workqueue.h>
29 #include <linux/swap.h>
30 #include <linux/seq_file.h>
32 #include <linux/netfilter.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/mutex.h>
36 #include <net/net_namespace.h>
38 #ifdef CONFIG_IP_VS_IPV6
40 #include <net/ip6_route.h>
42 #include <net/route.h>
44 #include <net/genetlink.h>
46 #include <asm/uaccess.h>
48 #include <net/ip_vs.h>
50 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
51 static DEFINE_MUTEX(__ip_vs_mutex);
53 /* lock for service table */
54 static DEFINE_RWLOCK(__ip_vs_svc_lock);
56 /* lock for table with the real services */
57 static DEFINE_RWLOCK(__ip_vs_rs_lock);
59 /* lock for state and timeout tables */
60 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
62 /* lock for drop entry handling */
63 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
65 /* lock for drop packet handling */
66 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
68 /* 1/rate drop and drop-entry variables */
69 int ip_vs_drop_rate = 0;
70 int ip_vs_drop_counter = 0;
71 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
73 /* number of virtual services */
74 static int ip_vs_num_services = 0;
76 /* sysctl variables */
77 static int sysctl_ip_vs_drop_entry = 0;
78 static int sysctl_ip_vs_drop_packet = 0;
79 static int sysctl_ip_vs_secure_tcp = 0;
80 static int sysctl_ip_vs_amemthresh = 1024;
81 static int sysctl_ip_vs_am_droprate = 10;
82 int sysctl_ip_vs_cache_bypass = 0;
83 int sysctl_ip_vs_expire_nodest_conn = 0;
84 int sysctl_ip_vs_expire_quiescent_template = 0;
85 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86 int sysctl_ip_vs_nat_icmp_send = 0;
89 #ifdef CONFIG_IP_VS_DEBUG
90 static int sysctl_ip_vs_debug_level = 0;
92 int ip_vs_get_debug_level(void)
94 return sysctl_ip_vs_debug_level;
98 #ifdef CONFIG_IP_VS_IPV6
99 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
108 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
111 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
119 * update_defense_level is called from keventd and from sysctl,
120 * so it needs to protect itself from softirqs
122 static void update_defense_level(void)
125 static int old_secure_tcp = 0;
130 /* we only count free and buffered memory (in pages) */
132 availmem = i.freeram + i.bufferram;
133 /* however in linux 2.5 the i.bufferram is total page cache size,
135 /* si_swapinfo(&i); */
136 /* availmem = availmem - (i.totalswap - i.freeswap); */
138 nomem = (availmem < sysctl_ip_vs_amemthresh);
143 spin_lock(&__ip_vs_dropentry_lock);
144 switch (sysctl_ip_vs_drop_entry) {
146 atomic_set(&ip_vs_dropentry, 0);
150 atomic_set(&ip_vs_dropentry, 1);
151 sysctl_ip_vs_drop_entry = 2;
153 atomic_set(&ip_vs_dropentry, 0);
158 atomic_set(&ip_vs_dropentry, 1);
160 atomic_set(&ip_vs_dropentry, 0);
161 sysctl_ip_vs_drop_entry = 1;
165 atomic_set(&ip_vs_dropentry, 1);
168 spin_unlock(&__ip_vs_dropentry_lock);
171 spin_lock(&__ip_vs_droppacket_lock);
172 switch (sysctl_ip_vs_drop_packet) {
178 ip_vs_drop_rate = ip_vs_drop_counter
179 = sysctl_ip_vs_amemthresh /
180 (sysctl_ip_vs_amemthresh-availmem);
181 sysctl_ip_vs_drop_packet = 2;
188 ip_vs_drop_rate = ip_vs_drop_counter
189 = sysctl_ip_vs_amemthresh /
190 (sysctl_ip_vs_amemthresh-availmem);
193 sysctl_ip_vs_drop_packet = 1;
197 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
200 spin_unlock(&__ip_vs_droppacket_lock);
203 write_lock(&__ip_vs_securetcp_lock);
204 switch (sysctl_ip_vs_secure_tcp) {
206 if (old_secure_tcp >= 2)
211 if (old_secure_tcp < 2)
213 sysctl_ip_vs_secure_tcp = 2;
215 if (old_secure_tcp >= 2)
221 if (old_secure_tcp < 2)
224 if (old_secure_tcp >= 2)
226 sysctl_ip_vs_secure_tcp = 1;
230 if (old_secure_tcp < 2)
234 old_secure_tcp = sysctl_ip_vs_secure_tcp;
236 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237 write_unlock(&__ip_vs_securetcp_lock);
244 * Timer for checking the defense
246 #define DEFENSE_TIMER_PERIOD 1*HZ
247 static void defense_work_handler(struct work_struct *work);
248 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
250 static void defense_work_handler(struct work_struct *work)
252 update_defense_level();
253 if (atomic_read(&ip_vs_dropentry))
254 ip_vs_random_dropentry();
256 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
260 ip_vs_use_count_inc(void)
262 return try_module_get(THIS_MODULE);
266 ip_vs_use_count_dec(void)
268 module_put(THIS_MODULE);
273 * Hash table: for virtual service lookups
275 #define IP_VS_SVC_TAB_BITS 8
276 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
279 /* the service table hashed by <protocol, addr, port> */
280 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281 /* the service table hashed by fwmark */
282 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
285 * Hash table: for real service lookups
287 #define IP_VS_RTAB_BITS 4
288 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
291 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
294 * Trash for destinations
296 static LIST_HEAD(ip_vs_dest_trash);
299 * FTP & NULL virtual service counters
301 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
306 * Returns hash value for virtual service
308 static __inline__ unsigned
309 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
312 register unsigned porth = ntohs(port);
313 __be32 addr_fold = addr->ip;
315 #ifdef CONFIG_IP_VS_IPV6
317 addr_fold = addr->ip6[0]^addr->ip6[1]^
318 addr->ip6[2]^addr->ip6[3];
321 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
322 & IP_VS_SVC_TAB_MASK;
326 * Returns hash value of fwmark for virtual service lookup
328 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
330 return fwmark & IP_VS_SVC_TAB_MASK;
334 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335 * or in the ip_vs_svc_fwm_table by fwmark.
336 * Should be called with locked tables.
338 static int ip_vs_svc_hash(struct ip_vs_service *svc)
342 if (svc->flags & IP_VS_SVC_F_HASHED) {
343 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344 "called from %p\n", __builtin_return_address(0));
348 if (svc->fwmark == 0) {
350 * Hash it by <protocol,addr,port> in ip_vs_svc_table
352 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
354 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
357 * Hash it by fwmark in ip_vs_svc_fwm_table
359 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
363 svc->flags |= IP_VS_SVC_F_HASHED;
364 /* increase its refcnt because it is referenced by the svc table */
365 atomic_inc(&svc->refcnt);
371 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372 * Should be called with locked tables.
374 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
376 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378 "called from %p\n", __builtin_return_address(0));
382 if (svc->fwmark == 0) {
383 /* Remove it from the ip_vs_svc_table table */
384 list_del(&svc->s_list);
386 /* Remove it from the ip_vs_svc_fwm_table table */
387 list_del(&svc->f_list);
390 svc->flags &= ~IP_VS_SVC_F_HASHED;
391 atomic_dec(&svc->refcnt);
397 * Get service by {proto,addr,port} in the service table.
399 static inline struct ip_vs_service *
400 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
404 struct ip_vs_service *svc;
406 /* Check for "full" addressed entries */
407 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
409 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
411 && ip_vs_addr_equal(af, &svc->addr, vaddr)
412 && (svc->port == vport)
413 && (svc->protocol == protocol)) {
415 atomic_inc(&svc->usecnt);
425 * Get service by {fwmark} in the service table.
427 static inline struct ip_vs_service *
428 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
431 struct ip_vs_service *svc;
433 /* Check for fwmark addressed entries */
434 hash = ip_vs_svc_fwm_hashkey(fwmark);
436 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
437 if (svc->fwmark == fwmark && svc->af == af) {
439 atomic_inc(&svc->usecnt);
447 struct ip_vs_service *
448 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449 const union nf_inet_addr *vaddr, __be16 vport)
451 struct ip_vs_service *svc;
453 read_lock(&__ip_vs_svc_lock);
456 * Check the table hashed by fwmark first
458 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
462 * Check the table hashed by <protocol,addr,port>
463 * for "full" addressed entries
465 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
468 && protocol == IPPROTO_TCP
469 && atomic_read(&ip_vs_ftpsvc_counter)
470 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
472 * Check if ftp service entry exists, the packet
473 * might belong to FTP data connections.
475 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
479 && atomic_read(&ip_vs_nullsvc_counter)) {
481 * Check if the catch-all port (port zero) exists
483 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
487 read_unlock(&__ip_vs_svc_lock);
489 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490 fwmark, ip_vs_proto_name(protocol),
491 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492 svc ? "hit" : "not hit");
499 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
501 atomic_inc(&svc->refcnt);
506 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
508 struct ip_vs_service *svc = dest->svc;
511 if (atomic_dec_and_test(&svc->refcnt))
517 * Returns hash value for real service
519 static inline unsigned ip_vs_rs_hashkey(int af,
520 const union nf_inet_addr *addr,
523 register unsigned porth = ntohs(port);
524 __be32 addr_fold = addr->ip;
526 #ifdef CONFIG_IP_VS_IPV6
528 addr_fold = addr->ip6[0]^addr->ip6[1]^
529 addr->ip6[2]^addr->ip6[3];
532 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
537 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538 * should be called with locked tables.
540 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
544 if (!list_empty(&dest->d_list)) {
549 * Hash by proto,addr,port,
550 * which are the parameters of the real service.
552 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
554 list_add(&dest->d_list, &ip_vs_rtable[hash]);
560 * UNhashes ip_vs_dest from ip_vs_rtable.
561 * should be called with locked tables.
563 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
566 * Remove it from the ip_vs_rtable table.
568 if (!list_empty(&dest->d_list)) {
569 list_del(&dest->d_list);
570 INIT_LIST_HEAD(&dest->d_list);
577 * Lookup real service by <proto,addr,port> in the real service table.
580 ip_vs_lookup_real_service(int af, __u16 protocol,
581 const union nf_inet_addr *daddr,
585 struct ip_vs_dest *dest;
588 * Check for "full" addressed entries
589 * Return the first found entry
591 hash = ip_vs_rs_hashkey(af, daddr, dport);
593 read_lock(&__ip_vs_rs_lock);
594 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
596 && ip_vs_addr_equal(af, &dest->addr, daddr)
597 && (dest->port == dport)
598 && ((dest->protocol == protocol) ||
601 read_unlock(&__ip_vs_rs_lock);
605 read_unlock(&__ip_vs_rs_lock);
611 * Lookup destination by {addr,port} in the given service
613 static struct ip_vs_dest *
614 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
617 struct ip_vs_dest *dest;
620 * Find the destination for the given service
622 list_for_each_entry(dest, &svc->destinations, n_list) {
623 if ((dest->af == svc->af)
624 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625 && (dest->port == dport)) {
635 * Find destination by {daddr,dport,vaddr,protocol}
636 * Cretaed to be used in ip_vs_process_message() in
637 * the backup synchronization daemon. It finds the
638 * destination to be bound to the received connection
641 * ip_vs_lookup_real_service() looked promissing, but
642 * seems not working as expected.
644 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
646 const union nf_inet_addr *vaddr,
647 __be16 vport, __u16 protocol)
649 struct ip_vs_dest *dest;
650 struct ip_vs_service *svc;
652 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
655 dest = ip_vs_lookup_dest(svc, daddr, dport);
657 atomic_inc(&dest->refcnt);
658 ip_vs_service_put(svc);
663 * Lookup dest by {svc,addr,port} in the destination trash.
664 * The destination trash is used to hold the destinations that are removed
665 * from the service table but are still referenced by some conn entries.
666 * The reason to add the destination trash is when the dest is temporary
667 * down (either by administrator or by monitor program), the dest can be
668 * picked back from the trash, the remaining connections to the dest can
669 * continue, and the counting information of the dest is also useful for
672 static struct ip_vs_dest *
673 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
676 struct ip_vs_dest *dest, *nxt;
679 * Find the destination in trash
681 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
682 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
685 IP_VS_DBG_ADDR(svc->af, &dest->addr),
687 atomic_read(&dest->refcnt));
688 if (dest->af == svc->af &&
689 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
690 dest->port == dport &&
691 dest->vfwmark == svc->fwmark &&
692 dest->protocol == svc->protocol &&
694 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
695 dest->vport == svc->port))) {
701 * Try to purge the destination from trash if not referenced
703 if (atomic_read(&dest->refcnt) == 1) {
704 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
707 IP_VS_DBG_ADDR(svc->af, &dest->addr),
709 list_del(&dest->n_list);
710 ip_vs_dst_reset(dest);
711 __ip_vs_unbind_svc(dest);
721 * Clean up all the destinations in the trash
722 * Called by the ip_vs_control_cleanup()
724 * When the ip_vs_control_clearup is activated by ipvs module exit,
725 * the service tables must have been flushed and all the connections
726 * are expired, and the refcnt of each destination in the trash must
727 * be 1, so we simply release them here.
729 static void ip_vs_trash_cleanup(void)
731 struct ip_vs_dest *dest, *nxt;
733 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734 list_del(&dest->n_list);
735 ip_vs_dst_reset(dest);
736 __ip_vs_unbind_svc(dest);
743 ip_vs_zero_stats(struct ip_vs_stats *stats)
745 spin_lock_bh(&stats->lock);
759 ip_vs_zero_estimator(stats);
761 spin_unlock_bh(&stats->lock);
765 * Update a destination in the given service
768 __ip_vs_update_dest(struct ip_vs_service *svc,
769 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
777 /* check if local node and update the flags */
778 #ifdef CONFIG_IP_VS_IPV6
779 if (svc->af == AF_INET6) {
780 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782 | IP_VS_CONN_F_LOCALNODE;
786 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788 | IP_VS_CONN_F_LOCALNODE;
791 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
796 * Put the real service in ip_vs_rtable if not present.
797 * For now only for NAT!
799 write_lock_bh(&__ip_vs_rs_lock);
801 write_unlock_bh(&__ip_vs_rs_lock);
803 atomic_set(&dest->conn_flags, conn_flags);
805 /* bind the service */
807 __ip_vs_bind_svc(dest, svc);
809 if (dest->svc != svc) {
810 __ip_vs_unbind_svc(dest);
811 ip_vs_zero_stats(&dest->stats);
812 __ip_vs_bind_svc(dest, svc);
816 /* set the dest status flags */
817 dest->flags |= IP_VS_DEST_F_AVAILABLE;
819 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821 dest->u_threshold = udest->u_threshold;
822 dest->l_threshold = udest->l_threshold;
827 * Create a destination for the given service
830 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
831 struct ip_vs_dest **dest_p)
833 struct ip_vs_dest *dest;
838 #ifdef CONFIG_IP_VS_IPV6
839 if (svc->af == AF_INET6) {
840 atype = ipv6_addr_type(&udest->addr.in6);
841 if (!(atype & IPV6_ADDR_UNICAST) &&
842 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
847 atype = inet_addr_type(&init_net, udest->addr.ip);
848 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
852 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
854 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
859 dest->protocol = svc->protocol;
860 dest->vaddr = svc->addr;
861 dest->vport = svc->port;
862 dest->vfwmark = svc->fwmark;
863 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
864 dest->port = udest->port;
866 atomic_set(&dest->activeconns, 0);
867 atomic_set(&dest->inactconns, 0);
868 atomic_set(&dest->persistconns, 0);
869 atomic_set(&dest->refcnt, 0);
871 INIT_LIST_HEAD(&dest->d_list);
872 spin_lock_init(&dest->dst_lock);
873 spin_lock_init(&dest->stats.lock);
874 __ip_vs_update_dest(svc, dest, udest);
875 ip_vs_new_estimator(&dest->stats);
885 * Add a destination into an existing service
888 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
890 struct ip_vs_dest *dest;
891 union nf_inet_addr daddr;
892 __be16 dport = udest->port;
897 if (udest->weight < 0) {
898 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
902 if (udest->l_threshold > udest->u_threshold) {
903 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
904 "upper threshold\n");
908 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
911 * Check if the dest already exists in the list
913 dest = ip_vs_lookup_dest(svc, &daddr, dport);
916 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
921 * Check if the dest already exists in the trash and
922 * is from the same service
924 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
927 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
928 "dest->refcnt=%d, service %u/%s:%u\n",
929 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
930 atomic_read(&dest->refcnt),
932 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
935 __ip_vs_update_dest(svc, dest, udest);
938 * Get the destination from the trash
940 list_del(&dest->n_list);
942 ip_vs_new_estimator(&dest->stats);
944 write_lock_bh(&__ip_vs_svc_lock);
947 * Wait until all other svc users go away.
949 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
951 list_add(&dest->n_list, &svc->destinations);
954 /* call the update_service function of its scheduler */
955 if (svc->scheduler->update_service)
956 svc->scheduler->update_service(svc);
958 write_unlock_bh(&__ip_vs_svc_lock);
963 * Allocate and initialize the dest structure
965 ret = ip_vs_new_dest(svc, udest, &dest);
971 * Add the dest entry into the list
973 atomic_inc(&dest->refcnt);
975 write_lock_bh(&__ip_vs_svc_lock);
978 * Wait until all other svc users go away.
980 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
982 list_add(&dest->n_list, &svc->destinations);
985 /* call the update_service function of its scheduler */
986 if (svc->scheduler->update_service)
987 svc->scheduler->update_service(svc);
989 write_unlock_bh(&__ip_vs_svc_lock);
998 * Edit a destination in the given service
1001 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1003 struct ip_vs_dest *dest;
1004 union nf_inet_addr daddr;
1005 __be16 dport = udest->port;
1009 if (udest->weight < 0) {
1010 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1014 if (udest->l_threshold > udest->u_threshold) {
1015 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1016 "upper threshold\n");
1020 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1023 * Lookup the destination list
1025 dest = ip_vs_lookup_dest(svc, &daddr, dport);
1028 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1032 __ip_vs_update_dest(svc, dest, udest);
1034 write_lock_bh(&__ip_vs_svc_lock);
1036 /* Wait until all other svc users go away */
1037 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1039 /* call the update_service, because server weight may be changed */
1040 if (svc->scheduler->update_service)
1041 svc->scheduler->update_service(svc);
1043 write_unlock_bh(&__ip_vs_svc_lock);
1052 * Delete a destination (must be already unlinked from the service)
1054 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1056 ip_vs_kill_estimator(&dest->stats);
1059 * Remove it from the d-linked list with the real services.
1061 write_lock_bh(&__ip_vs_rs_lock);
1062 ip_vs_rs_unhash(dest);
1063 write_unlock_bh(&__ip_vs_rs_lock);
1066 * Decrease the refcnt of the dest, and free the dest
1067 * if nobody refers to it (refcnt=0). Otherwise, throw
1068 * the destination into the trash.
1070 if (atomic_dec_and_test(&dest->refcnt)) {
1071 ip_vs_dst_reset(dest);
1072 /* simply decrease svc->refcnt here, let the caller check
1073 and release the service if nobody refers to it.
1074 Only user context can release destination and service,
1075 and only one user context can update virtual service at a
1076 time, so the operation here is OK */
1077 atomic_dec(&dest->svc->refcnt);
1080 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1081 "dest->refcnt=%d\n",
1082 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1084 atomic_read(&dest->refcnt));
1085 list_add(&dest->n_list, &ip_vs_dest_trash);
1086 atomic_inc(&dest->refcnt);
1092 * Unlink a destination from the given service
1094 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1095 struct ip_vs_dest *dest,
1098 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1101 * Remove it from the d-linked destination list.
1103 list_del(&dest->n_list);
1107 * Call the update_service function of its scheduler
1109 if (svcupd && svc->scheduler->update_service)
1110 svc->scheduler->update_service(svc);
1115 * Delete a destination server in the given service
1118 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1120 struct ip_vs_dest *dest;
1121 __be16 dport = udest->port;
1125 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1128 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1132 write_lock_bh(&__ip_vs_svc_lock);
1135 * Wait until all other svc users go away.
1137 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1140 * Unlink dest from the service
1142 __ip_vs_unlink_dest(svc, dest, 1);
1144 write_unlock_bh(&__ip_vs_svc_lock);
1147 * Delete the destination
1149 __ip_vs_del_dest(dest);
1158 * Add a service into the service hash table
1161 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1162 struct ip_vs_service **svc_p)
1165 struct ip_vs_scheduler *sched = NULL;
1166 struct ip_vs_service *svc = NULL;
1168 /* increase the module use count */
1169 ip_vs_use_count_inc();
1171 /* Lookup the scheduler by 'u->sched_name' */
1172 sched = ip_vs_scheduler_get(u->sched_name);
1173 if (sched == NULL) {
1174 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1180 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1182 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1187 /* I'm the first user of the service */
1188 atomic_set(&svc->usecnt, 1);
1189 atomic_set(&svc->refcnt, 0);
1192 svc->protocol = u->protocol;
1193 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1194 svc->port = u->port;
1195 svc->fwmark = u->fwmark;
1196 svc->flags = u->flags;
1197 svc->timeout = u->timeout * HZ;
1198 svc->netmask = u->netmask;
1200 INIT_LIST_HEAD(&svc->destinations);
1201 rwlock_init(&svc->sched_lock);
1202 spin_lock_init(&svc->stats.lock);
1204 /* Bind the scheduler */
1205 ret = ip_vs_bind_scheduler(svc, sched);
1210 /* Update the virtual service counters */
1211 if (svc->port == FTPPORT)
1212 atomic_inc(&ip_vs_ftpsvc_counter);
1213 else if (svc->port == 0)
1214 atomic_inc(&ip_vs_nullsvc_counter);
1216 ip_vs_new_estimator(&svc->stats);
1217 ip_vs_num_services++;
1219 /* Hash the service into the service table */
1220 write_lock_bh(&__ip_vs_svc_lock);
1221 ip_vs_svc_hash(svc);
1222 write_unlock_bh(&__ip_vs_svc_lock);
1230 ip_vs_unbind_scheduler(svc);
1233 ip_vs_app_inc_put(svc->inc);
1238 ip_vs_scheduler_put(sched);
1241 /* decrease the module use count */
1242 ip_vs_use_count_dec();
1249 * Edit a service and bind it with a new scheduler
1252 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1254 struct ip_vs_scheduler *sched, *old_sched;
1258 * Lookup the scheduler, by 'u->sched_name'
1260 sched = ip_vs_scheduler_get(u->sched_name);
1261 if (sched == NULL) {
1262 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1268 write_lock_bh(&__ip_vs_svc_lock);
1271 * Wait until all other svc users go away.
1273 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1276 * Set the flags and timeout value
1278 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1279 svc->timeout = u->timeout * HZ;
1280 svc->netmask = u->netmask;
1282 old_sched = svc->scheduler;
1283 if (sched != old_sched) {
1285 * Unbind the old scheduler
1287 if ((ret = ip_vs_unbind_scheduler(svc))) {
1293 * Bind the new scheduler
1295 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1297 * If ip_vs_bind_scheduler fails, restore the old
1299 * The main reason of failure is out of memory.
1301 * The question is if the old scheduler can be
1302 * restored all the time. TODO: if it cannot be
1303 * restored some time, we must delete the service,
1304 * otherwise the system may crash.
1306 ip_vs_bind_scheduler(svc, old_sched);
1313 write_unlock_bh(&__ip_vs_svc_lock);
1316 ip_vs_scheduler_put(old_sched);
1323 * Delete a service from the service list
1324 * - The service must be unlinked, unlocked and not referenced!
1325 * - We are called under _bh lock
1327 static void __ip_vs_del_service(struct ip_vs_service *svc)
1329 struct ip_vs_dest *dest, *nxt;
1330 struct ip_vs_scheduler *old_sched;
1332 ip_vs_num_services--;
1333 ip_vs_kill_estimator(&svc->stats);
1335 /* Unbind scheduler */
1336 old_sched = svc->scheduler;
1337 ip_vs_unbind_scheduler(svc);
1339 ip_vs_scheduler_put(old_sched);
1341 /* Unbind app inc */
1343 ip_vs_app_inc_put(svc->inc);
1348 * Unlink the whole destination list
1350 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1351 __ip_vs_unlink_dest(svc, dest, 0);
1352 __ip_vs_del_dest(dest);
1356 * Update the virtual service counters
1358 if (svc->port == FTPPORT)
1359 atomic_dec(&ip_vs_ftpsvc_counter);
1360 else if (svc->port == 0)
1361 atomic_dec(&ip_vs_nullsvc_counter);
1364 * Free the service if nobody refers to it
1366 if (atomic_read(&svc->refcnt) == 0)
1369 /* decrease the module use count */
1370 ip_vs_use_count_dec();
1374 * Delete a service from the service list
1376 static int ip_vs_del_service(struct ip_vs_service *svc)
1382 * Unhash it from the service table
1384 write_lock_bh(&__ip_vs_svc_lock);
1386 ip_vs_svc_unhash(svc);
1389 * Wait until all the svc users go away.
1391 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1393 __ip_vs_del_service(svc);
1395 write_unlock_bh(&__ip_vs_svc_lock);
1402 * Flush all the virtual services
1404 static int ip_vs_flush(void)
1407 struct ip_vs_service *svc, *nxt;
1410 * Flush the service table hashed by <protocol,addr,port>
1412 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1413 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1414 write_lock_bh(&__ip_vs_svc_lock);
1415 ip_vs_svc_unhash(svc);
1417 * Wait until all the svc users go away.
1419 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1420 __ip_vs_del_service(svc);
1421 write_unlock_bh(&__ip_vs_svc_lock);
1426 * Flush the service table hashed by fwmark
1428 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1429 list_for_each_entry_safe(svc, nxt,
1430 &ip_vs_svc_fwm_table[idx], f_list) {
1431 write_lock_bh(&__ip_vs_svc_lock);
1432 ip_vs_svc_unhash(svc);
1434 * Wait until all the svc users go away.
1436 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1437 __ip_vs_del_service(svc);
1438 write_unlock_bh(&__ip_vs_svc_lock);
1447 * Zero counters in a service or all services
1449 static int ip_vs_zero_service(struct ip_vs_service *svc)
1451 struct ip_vs_dest *dest;
1453 write_lock_bh(&__ip_vs_svc_lock);
1454 list_for_each_entry(dest, &svc->destinations, n_list) {
1455 ip_vs_zero_stats(&dest->stats);
1457 ip_vs_zero_stats(&svc->stats);
1458 write_unlock_bh(&__ip_vs_svc_lock);
1462 static int ip_vs_zero_all(void)
1465 struct ip_vs_service *svc;
1467 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1468 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1469 ip_vs_zero_service(svc);
1473 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1475 ip_vs_zero_service(svc);
1479 ip_vs_zero_stats(&ip_vs_stats);
1485 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1486 void __user *buffer, size_t *lenp, loff_t *ppos)
1488 int *valp = table->data;
1492 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1493 if (write && (*valp != val)) {
1494 if ((*valp < 0) || (*valp > 3)) {
1495 /* Restore the correct value */
1498 update_defense_level();
1506 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1507 void __user *buffer, size_t *lenp, loff_t *ppos)
1509 int *valp = table->data;
1513 /* backup the value first */
1514 memcpy(val, valp, sizeof(val));
1516 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1517 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1518 /* Restore the correct value */
1519 memcpy(valp, val, sizeof(val));
1526 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1529 static struct ctl_table vs_vars[] = {
1531 .procname = "amemthresh",
1532 .data = &sysctl_ip_vs_amemthresh,
1533 .maxlen = sizeof(int),
1535 .proc_handler = &proc_dointvec,
1537 #ifdef CONFIG_IP_VS_DEBUG
1539 .procname = "debug_level",
1540 .data = &sysctl_ip_vs_debug_level,
1541 .maxlen = sizeof(int),
1543 .proc_handler = &proc_dointvec,
1547 .procname = "am_droprate",
1548 .data = &sysctl_ip_vs_am_droprate,
1549 .maxlen = sizeof(int),
1551 .proc_handler = &proc_dointvec,
1554 .procname = "drop_entry",
1555 .data = &sysctl_ip_vs_drop_entry,
1556 .maxlen = sizeof(int),
1558 .proc_handler = &proc_do_defense_mode,
1561 .procname = "drop_packet",
1562 .data = &sysctl_ip_vs_drop_packet,
1563 .maxlen = sizeof(int),
1565 .proc_handler = &proc_do_defense_mode,
1568 .procname = "secure_tcp",
1569 .data = &sysctl_ip_vs_secure_tcp,
1570 .maxlen = sizeof(int),
1572 .proc_handler = &proc_do_defense_mode,
1576 .procname = "timeout_established",
1577 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1578 .maxlen = sizeof(int),
1580 .proc_handler = &proc_dointvec_jiffies,
1583 .procname = "timeout_synsent",
1584 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1585 .maxlen = sizeof(int),
1587 .proc_handler = &proc_dointvec_jiffies,
1590 .procname = "timeout_synrecv",
1591 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1592 .maxlen = sizeof(int),
1594 .proc_handler = &proc_dointvec_jiffies,
1597 .procname = "timeout_finwait",
1598 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1599 .maxlen = sizeof(int),
1601 .proc_handler = &proc_dointvec_jiffies,
1604 .procname = "timeout_timewait",
1605 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1606 .maxlen = sizeof(int),
1608 .proc_handler = &proc_dointvec_jiffies,
1611 .procname = "timeout_close",
1612 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1613 .maxlen = sizeof(int),
1615 .proc_handler = &proc_dointvec_jiffies,
1618 .procname = "timeout_closewait",
1619 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1620 .maxlen = sizeof(int),
1622 .proc_handler = &proc_dointvec_jiffies,
1625 .procname = "timeout_lastack",
1626 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1627 .maxlen = sizeof(int),
1629 .proc_handler = &proc_dointvec_jiffies,
1632 .procname = "timeout_listen",
1633 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1634 .maxlen = sizeof(int),
1636 .proc_handler = &proc_dointvec_jiffies,
1639 .procname = "timeout_synack",
1640 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1641 .maxlen = sizeof(int),
1643 .proc_handler = &proc_dointvec_jiffies,
1646 .procname = "timeout_udp",
1647 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1648 .maxlen = sizeof(int),
1650 .proc_handler = &proc_dointvec_jiffies,
1653 .procname = "timeout_icmp",
1654 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1655 .maxlen = sizeof(int),
1657 .proc_handler = &proc_dointvec_jiffies,
1661 .procname = "cache_bypass",
1662 .data = &sysctl_ip_vs_cache_bypass,
1663 .maxlen = sizeof(int),
1665 .proc_handler = &proc_dointvec,
1668 .procname = "expire_nodest_conn",
1669 .data = &sysctl_ip_vs_expire_nodest_conn,
1670 .maxlen = sizeof(int),
1672 .proc_handler = &proc_dointvec,
1675 .procname = "expire_quiescent_template",
1676 .data = &sysctl_ip_vs_expire_quiescent_template,
1677 .maxlen = sizeof(int),
1679 .proc_handler = &proc_dointvec,
1682 .procname = "sync_threshold",
1683 .data = &sysctl_ip_vs_sync_threshold,
1684 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1686 .proc_handler = &proc_do_sync_threshold,
1689 .procname = "nat_icmp_send",
1690 .data = &sysctl_ip_vs_nat_icmp_send,
1691 .maxlen = sizeof(int),
1693 .proc_handler = &proc_dointvec,
1698 const struct ctl_path net_vs_ctl_path[] = {
1699 { .procname = "net", .ctl_name = CTL_NET, },
1700 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1701 { .procname = "vs", },
1704 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1706 static struct ctl_table_header * sysctl_header;
1708 #ifdef CONFIG_PROC_FS
1711 struct list_head *table;
1716 * Write the contents of the VS rule table to a PROCfs file.
1717 * (It is kept just for backward compatibility)
1719 static inline const char *ip_vs_fwd_name(unsigned flags)
1721 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1722 case IP_VS_CONN_F_LOCALNODE:
1724 case IP_VS_CONN_F_TUNNEL:
1726 case IP_VS_CONN_F_DROUTE:
1734 /* Get the Nth entry in the two lists */
1735 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1737 struct ip_vs_iter *iter = seq->private;
1739 struct ip_vs_service *svc;
1741 /* look in hash by protocol */
1742 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1743 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1745 iter->table = ip_vs_svc_table;
1752 /* keep looking in fwmark */
1753 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1754 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1756 iter->table = ip_vs_svc_fwm_table;
1766 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1769 read_lock_bh(&__ip_vs_svc_lock);
1770 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1774 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1776 struct list_head *e;
1777 struct ip_vs_iter *iter;
1778 struct ip_vs_service *svc;
1781 if (v == SEQ_START_TOKEN)
1782 return ip_vs_info_array(seq,0);
1785 iter = seq->private;
1787 if (iter->table == ip_vs_svc_table) {
1788 /* next service in table hashed by protocol */
1789 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1790 return list_entry(e, struct ip_vs_service, s_list);
1793 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1794 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1800 iter->table = ip_vs_svc_fwm_table;
1805 /* next service in hashed by fwmark */
1806 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1807 return list_entry(e, struct ip_vs_service, f_list);
1810 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1811 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1819 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1821 read_unlock_bh(&__ip_vs_svc_lock);
1825 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1827 if (v == SEQ_START_TOKEN) {
1829 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1830 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1832 "Prot LocalAddress:Port Scheduler Flags\n");
1834 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1836 const struct ip_vs_service *svc = v;
1837 const struct ip_vs_iter *iter = seq->private;
1838 const struct ip_vs_dest *dest;
1840 if (iter->table == ip_vs_svc_table) {
1841 #ifdef CONFIG_IP_VS_IPV6
1842 if (svc->af == AF_INET6)
1843 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1844 ip_vs_proto_name(svc->protocol),
1845 NIP6(svc->addr.in6),
1847 svc->scheduler->name);
1850 seq_printf(seq, "%s %08X:%04X %s ",
1851 ip_vs_proto_name(svc->protocol),
1852 ntohl(svc->addr.ip),
1854 svc->scheduler->name);
1856 seq_printf(seq, "FWM %08X %s ",
1857 svc->fwmark, svc->scheduler->name);
1860 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1861 seq_printf(seq, "persistent %d %08X\n",
1863 ntohl(svc->netmask));
1865 seq_putc(seq, '\n');
1867 list_for_each_entry(dest, &svc->destinations, n_list) {
1868 #ifdef CONFIG_IP_VS_IPV6
1869 if (dest->af == AF_INET6)
1871 " -> [" NIP6_FMT "]:%04X"
1872 " %-7s %-6d %-10d %-10d\n",
1873 NIP6(dest->addr.in6),
1875 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1876 atomic_read(&dest->weight),
1877 atomic_read(&dest->activeconns),
1878 atomic_read(&dest->inactconns));
1883 "%-7s %-6d %-10d %-10d\n",
1884 ntohl(dest->addr.ip),
1886 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1887 atomic_read(&dest->weight),
1888 atomic_read(&dest->activeconns),
1889 atomic_read(&dest->inactconns));
1896 static const struct seq_operations ip_vs_info_seq_ops = {
1897 .start = ip_vs_info_seq_start,
1898 .next = ip_vs_info_seq_next,
1899 .stop = ip_vs_info_seq_stop,
1900 .show = ip_vs_info_seq_show,
1903 static int ip_vs_info_open(struct inode *inode, struct file *file)
1905 return seq_open_private(file, &ip_vs_info_seq_ops,
1906 sizeof(struct ip_vs_iter));
1909 static const struct file_operations ip_vs_info_fops = {
1910 .owner = THIS_MODULE,
1911 .open = ip_vs_info_open,
1913 .llseek = seq_lseek,
1914 .release = seq_release_private,
1919 struct ip_vs_stats ip_vs_stats = {
1920 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1923 #ifdef CONFIG_PROC_FS
1924 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1927 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1929 " Total Incoming Outgoing Incoming Outgoing\n");
1931 " Conns Packets Packets Bytes Bytes\n");
1933 spin_lock_bh(&ip_vs_stats.lock);
1934 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1935 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1936 (unsigned long long) ip_vs_stats.inbytes,
1937 (unsigned long long) ip_vs_stats.outbytes);
1939 /* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1941 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1942 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1947 ip_vs_stats.outbps);
1948 spin_unlock_bh(&ip_vs_stats.lock);
1953 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1955 return single_open(file, ip_vs_stats_show, NULL);
1958 static const struct file_operations ip_vs_stats_fops = {
1959 .owner = THIS_MODULE,
1960 .open = ip_vs_stats_seq_open,
1962 .llseek = seq_lseek,
1963 .release = single_release,
1969 * Set timeout values for tcp tcpfin udp in the timeout_table.
1971 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1973 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1978 #ifdef CONFIG_IP_VS_PROTO_TCP
1979 if (u->tcp_timeout) {
1980 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1981 = u->tcp_timeout * HZ;
1984 if (u->tcp_fin_timeout) {
1985 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1986 = u->tcp_fin_timeout * HZ;
1990 #ifdef CONFIG_IP_VS_PROTO_UDP
1991 if (u->udp_timeout) {
1992 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1993 = u->udp_timeout * HZ;
2000 #define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2001 #define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2002 #define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2003 sizeof(struct ip_vs_dest_user))
2004 #define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2005 #define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2006 #define MAX_ARG_LEN SVCDEST_ARG_LEN
2008 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2009 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2010 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2011 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2012 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2013 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2014 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2015 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2016 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2017 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2018 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2019 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2022 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2023 struct ip_vs_service_user *usvc_compat)
2026 usvc->protocol = usvc_compat->protocol;
2027 usvc->addr.ip = usvc_compat->addr;
2028 usvc->port = usvc_compat->port;
2029 usvc->fwmark = usvc_compat->fwmark;
2031 /* Deep copy of sched_name is not needed here */
2032 usvc->sched_name = usvc_compat->sched_name;
2034 usvc->flags = usvc_compat->flags;
2035 usvc->timeout = usvc_compat->timeout;
2036 usvc->netmask = usvc_compat->netmask;
2039 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2040 struct ip_vs_dest_user *udest_compat)
2042 udest->addr.ip = udest_compat->addr;
2043 udest->port = udest_compat->port;
2044 udest->conn_flags = udest_compat->conn_flags;
2045 udest->weight = udest_compat->weight;
2046 udest->u_threshold = udest_compat->u_threshold;
2047 udest->l_threshold = udest_compat->l_threshold;
2051 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2054 unsigned char arg[MAX_ARG_LEN];
2055 struct ip_vs_service_user *usvc_compat;
2056 struct ip_vs_service_user_kern usvc;
2057 struct ip_vs_service *svc;
2058 struct ip_vs_dest_user *udest_compat;
2059 struct ip_vs_dest_user_kern udest;
2061 if (!capable(CAP_NET_ADMIN))
2064 if (len != set_arglen[SET_CMDID(cmd)]) {
2065 IP_VS_ERR("set_ctl: len %u != %u\n",
2066 len, set_arglen[SET_CMDID(cmd)]);
2070 if (copy_from_user(arg, user, len) != 0)
2073 /* increase the module use count */
2074 ip_vs_use_count_inc();
2076 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2081 if (cmd == IP_VS_SO_SET_FLUSH) {
2082 /* Flush the virtual service */
2083 ret = ip_vs_flush();
2085 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2086 /* Set timeout values for (tcp tcpfin udp) */
2087 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2089 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2090 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2091 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2093 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2094 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2095 ret = stop_sync_thread(dm->state);
2099 usvc_compat = (struct ip_vs_service_user *)arg;
2100 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2102 /* We only use the new structs internally, so copy userspace compat
2103 * structs to extended internal versions */
2104 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2105 ip_vs_copy_udest_compat(&udest, udest_compat);
2107 if (cmd == IP_VS_SO_SET_ZERO) {
2108 /* if no service address is set, zero counters in all */
2109 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2110 ret = ip_vs_zero_all();
2115 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
2116 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2117 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
2118 usvc.protocol, NIPQUAD(usvc.addr.ip),
2119 ntohs(usvc.port), usvc.sched_name);
2124 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2125 if (usvc.fwmark == 0)
2126 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2127 &usvc.addr, usvc.port);
2129 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2131 if (cmd != IP_VS_SO_SET_ADD
2132 && (svc == NULL || svc->protocol != usvc.protocol)) {
2138 case IP_VS_SO_SET_ADD:
2142 ret = ip_vs_add_service(&usvc, &svc);
2144 case IP_VS_SO_SET_EDIT:
2145 ret = ip_vs_edit_service(svc, &usvc);
2147 case IP_VS_SO_SET_DEL:
2148 ret = ip_vs_del_service(svc);
2152 case IP_VS_SO_SET_ZERO:
2153 ret = ip_vs_zero_service(svc);
2155 case IP_VS_SO_SET_ADDDEST:
2156 ret = ip_vs_add_dest(svc, &udest);
2158 case IP_VS_SO_SET_EDITDEST:
2159 ret = ip_vs_edit_dest(svc, &udest);
2161 case IP_VS_SO_SET_DELDEST:
2162 ret = ip_vs_del_dest(svc, &udest);
2169 ip_vs_service_put(svc);
2172 mutex_unlock(&__ip_vs_mutex);
2174 /* decrease the module use count */
2175 ip_vs_use_count_dec();
2182 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2184 spin_lock_bh(&src->lock);
2185 memcpy(dst, src, (char*)&src->lock - (char*)src);
2186 spin_unlock_bh(&src->lock);
2190 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2192 dst->protocol = src->protocol;
2193 dst->addr = src->addr.ip;
2194 dst->port = src->port;
2195 dst->fwmark = src->fwmark;
2196 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2197 dst->flags = src->flags;
2198 dst->timeout = src->timeout / HZ;
2199 dst->netmask = src->netmask;
2200 dst->num_dests = src->num_dests;
2201 ip_vs_copy_stats(&dst->stats, &src->stats);
2205 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2206 struct ip_vs_get_services __user *uptr)
2209 struct ip_vs_service *svc;
2210 struct ip_vs_service_entry entry;
2213 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2214 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2215 if (count >= get->num_services)
2217 memset(&entry, 0, sizeof(entry));
2218 ip_vs_copy_service(&entry, svc);
2219 if (copy_to_user(&uptr->entrytable[count],
2220 &entry, sizeof(entry))) {
2228 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2229 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2230 if (count >= get->num_services)
2232 memset(&entry, 0, sizeof(entry));
2233 ip_vs_copy_service(&entry, svc);
2234 if (copy_to_user(&uptr->entrytable[count],
2235 &entry, sizeof(entry))) {
2247 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2248 struct ip_vs_get_dests __user *uptr)
2250 struct ip_vs_service *svc;
2251 union nf_inet_addr addr = { .ip = get->addr };
2255 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2257 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2262 struct ip_vs_dest *dest;
2263 struct ip_vs_dest_entry entry;
2265 list_for_each_entry(dest, &svc->destinations, n_list) {
2266 if (count >= get->num_dests)
2269 entry.addr = dest->addr.ip;
2270 entry.port = dest->port;
2271 entry.conn_flags = atomic_read(&dest->conn_flags);
2272 entry.weight = atomic_read(&dest->weight);
2273 entry.u_threshold = dest->u_threshold;
2274 entry.l_threshold = dest->l_threshold;
2275 entry.activeconns = atomic_read(&dest->activeconns);
2276 entry.inactconns = atomic_read(&dest->inactconns);
2277 entry.persistconns = atomic_read(&dest->persistconns);
2278 ip_vs_copy_stats(&entry.stats, &dest->stats);
2279 if (copy_to_user(&uptr->entrytable[count],
2280 &entry, sizeof(entry))) {
2286 ip_vs_service_put(svc);
2293 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2295 #ifdef CONFIG_IP_VS_PROTO_TCP
2297 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2298 u->tcp_fin_timeout =
2299 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2301 #ifdef CONFIG_IP_VS_PROTO_UDP
2303 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2308 #define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2309 #define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2310 #define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2311 #define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2312 #define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2313 #define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2314 #define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2316 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2317 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2318 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2319 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2320 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2321 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2322 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2323 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2327 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2329 unsigned char arg[128];
2332 if (!capable(CAP_NET_ADMIN))
2335 if (*len < get_arglen[GET_CMDID(cmd)]) {
2336 IP_VS_ERR("get_ctl: len %u < %u\n",
2337 *len, get_arglen[GET_CMDID(cmd)]);
2341 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2344 if (mutex_lock_interruptible(&__ip_vs_mutex))
2345 return -ERESTARTSYS;
2348 case IP_VS_SO_GET_VERSION:
2352 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2353 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2354 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2358 *len = strlen(buf)+1;
2362 case IP_VS_SO_GET_INFO:
2364 struct ip_vs_getinfo info;
2365 info.version = IP_VS_VERSION_CODE;
2366 info.size = IP_VS_CONN_TAB_SIZE;
2367 info.num_services = ip_vs_num_services;
2368 if (copy_to_user(user, &info, sizeof(info)) != 0)
2373 case IP_VS_SO_GET_SERVICES:
2375 struct ip_vs_get_services *get;
2378 get = (struct ip_vs_get_services *)arg;
2379 size = sizeof(*get) +
2380 sizeof(struct ip_vs_service_entry) * get->num_services;
2382 IP_VS_ERR("length: %u != %u\n", *len, size);
2386 ret = __ip_vs_get_service_entries(get, user);
2390 case IP_VS_SO_GET_SERVICE:
2392 struct ip_vs_service_entry *entry;
2393 struct ip_vs_service *svc;
2394 union nf_inet_addr addr;
2396 entry = (struct ip_vs_service_entry *)arg;
2397 addr.ip = entry->addr;
2399 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2401 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2402 &addr, entry->port);
2404 ip_vs_copy_service(entry, svc);
2405 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2407 ip_vs_service_put(svc);
2413 case IP_VS_SO_GET_DESTS:
2415 struct ip_vs_get_dests *get;
2418 get = (struct ip_vs_get_dests *)arg;
2419 size = sizeof(*get) +
2420 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2422 IP_VS_ERR("length: %u != %u\n", *len, size);
2426 ret = __ip_vs_get_dest_entries(get, user);
2430 case IP_VS_SO_GET_TIMEOUT:
2432 struct ip_vs_timeout_user t;
2434 __ip_vs_get_timeouts(&t);
2435 if (copy_to_user(user, &t, sizeof(t)) != 0)
2440 case IP_VS_SO_GET_DAEMON:
2442 struct ip_vs_daemon_user d[2];
2444 memset(&d, 0, sizeof(d));
2445 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2446 d[0].state = IP_VS_STATE_MASTER;
2447 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2448 d[0].syncid = ip_vs_master_syncid;
2450 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2451 d[1].state = IP_VS_STATE_BACKUP;
2452 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2453 d[1].syncid = ip_vs_backup_syncid;
2455 if (copy_to_user(user, &d, sizeof(d)) != 0)
2465 mutex_unlock(&__ip_vs_mutex);
2470 static struct nf_sockopt_ops ip_vs_sockopts = {
2472 .set_optmin = IP_VS_BASE_CTL,
2473 .set_optmax = IP_VS_SO_SET_MAX+1,
2474 .set = do_ip_vs_set_ctl,
2475 .get_optmin = IP_VS_BASE_CTL,
2476 .get_optmax = IP_VS_SO_GET_MAX+1,
2477 .get = do_ip_vs_get_ctl,
2478 .owner = THIS_MODULE,
2482 * Generic Netlink interface
2485 /* IPVS genetlink family */
2486 static struct genl_family ip_vs_genl_family = {
2487 .id = GENL_ID_GENERATE,
2489 .name = IPVS_GENL_NAME,
2490 .version = IPVS_GENL_VERSION,
2491 .maxattr = IPVS_CMD_MAX,
2494 /* Policy used for first-level command attributes */
2495 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2496 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2497 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2498 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2499 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2500 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2501 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2504 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2505 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2506 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2507 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2508 .len = IP_VS_IFNAME_MAXLEN },
2509 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2512 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2513 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2514 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2515 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2516 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2517 .len = sizeof(union nf_inet_addr) },
2518 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2519 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2520 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2521 .len = IP_VS_SCHEDNAME_MAXLEN },
2522 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2523 .len = sizeof(struct ip_vs_flags) },
2524 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2525 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2526 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2529 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2530 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2531 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2532 .len = sizeof(union nf_inet_addr) },
2533 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2534 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2535 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2536 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2537 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2538 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2539 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2540 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2541 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2544 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2545 struct ip_vs_stats *stats)
2547 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2551 spin_lock_bh(&stats->lock);
2553 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2554 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2555 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2556 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2557 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2558 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2559 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2560 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2561 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2562 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2564 spin_unlock_bh(&stats->lock);
2566 nla_nest_end(skb, nl_stats);
2571 spin_unlock_bh(&stats->lock);
2572 nla_nest_cancel(skb, nl_stats);
2576 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2577 struct ip_vs_service *svc)
2579 struct nlattr *nl_service;
2580 struct ip_vs_flags flags = { .flags = svc->flags,
2583 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2587 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2590 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2592 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2593 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2594 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2597 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2598 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2599 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2600 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2602 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2603 goto nla_put_failure;
2605 nla_nest_end(skb, nl_service);
2610 nla_nest_cancel(skb, nl_service);
2614 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2615 struct ip_vs_service *svc,
2616 struct netlink_callback *cb)
2620 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2621 &ip_vs_genl_family, NLM_F_MULTI,
2622 IPVS_CMD_NEW_SERVICE);
2626 if (ip_vs_genl_fill_service(skb, svc) < 0)
2627 goto nla_put_failure;
2629 return genlmsg_end(skb, hdr);
2632 genlmsg_cancel(skb, hdr);
2636 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2637 struct netlink_callback *cb)
2640 int start = cb->args[0];
2641 struct ip_vs_service *svc;
2643 mutex_lock(&__ip_vs_mutex);
2644 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2645 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2648 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2650 goto nla_put_failure;
2655 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2656 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2659 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2661 goto nla_put_failure;
2667 mutex_unlock(&__ip_vs_mutex);
2673 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2674 struct nlattr *nla, int full_entry)
2676 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2677 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2679 /* Parse mandatory identifying service fields first */
2681 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2684 nla_af = attrs[IPVS_SVC_ATTR_AF];
2685 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2686 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2687 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2688 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2690 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2693 usvc->af = nla_get_u16(nla_af);
2694 /* For now, only support IPv4 */
2695 if (nla_get_u16(nla_af) != AF_INET)
2696 return -EAFNOSUPPORT;
2699 usvc->protocol = IPPROTO_TCP;
2700 usvc->fwmark = nla_get_u32(nla_fwmark);
2702 usvc->protocol = nla_get_u16(nla_protocol);
2703 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2704 usvc->port = nla_get_u16(nla_port);
2708 /* If a full entry was requested, check for the additional fields */
2710 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2712 struct ip_vs_flags flags;
2713 struct ip_vs_service *svc;
2715 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2716 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2717 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2718 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2720 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2723 nla_memcpy(&flags, nla_flags, sizeof(flags));
2725 /* prefill flags from service if it already exists */
2727 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2729 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2730 &usvc->addr, usvc->port);
2732 usvc->flags = svc->flags;
2733 ip_vs_service_put(svc);
2737 /* set new flags from userland */
2738 usvc->flags = (usvc->flags & ~flags.mask) |
2739 (flags.flags & flags.mask);
2740 usvc->sched_name = nla_data(nla_sched);
2741 usvc->timeout = nla_get_u32(nla_timeout);
2742 usvc->netmask = nla_get_u32(nla_netmask);
2748 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2750 struct ip_vs_service_user_kern usvc;
2753 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2755 return ERR_PTR(ret);
2758 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2760 return __ip_vs_service_get(usvc.af, usvc.protocol,
2761 &usvc.addr, usvc.port);
2764 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2766 struct nlattr *nl_dest;
2768 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2772 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2773 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2775 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2776 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2777 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2778 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2779 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2780 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2781 atomic_read(&dest->activeconns));
2782 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2783 atomic_read(&dest->inactconns));
2784 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2785 atomic_read(&dest->persistconns));
2787 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2788 goto nla_put_failure;
2790 nla_nest_end(skb, nl_dest);
2795 nla_nest_cancel(skb, nl_dest);
2799 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2800 struct netlink_callback *cb)
2804 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2805 &ip_vs_genl_family, NLM_F_MULTI,
2810 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2811 goto nla_put_failure;
2813 return genlmsg_end(skb, hdr);
2816 genlmsg_cancel(skb, hdr);
2820 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2821 struct netlink_callback *cb)
2824 int start = cb->args[0];
2825 struct ip_vs_service *svc;
2826 struct ip_vs_dest *dest;
2827 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2829 mutex_lock(&__ip_vs_mutex);
2831 /* Try to find the service for which to dump destinations */
2832 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2833 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2836 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2837 if (IS_ERR(svc) || svc == NULL)
2840 /* Dump the destinations */
2841 list_for_each_entry(dest, &svc->destinations, n_list) {
2844 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2846 goto nla_put_failure;
2852 ip_vs_service_put(svc);
2855 mutex_unlock(&__ip_vs_mutex);
2860 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2861 struct nlattr *nla, int full_entry)
2863 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2864 struct nlattr *nla_addr, *nla_port;
2866 /* Parse mandatory identifying destination fields first */
2868 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2871 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2872 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2874 if (!(nla_addr && nla_port))
2877 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2878 udest->port = nla_get_u16(nla_port);
2880 /* If a full entry was requested, check for the additional fields */
2882 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2885 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2886 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2887 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2888 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2890 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2893 udest->conn_flags = nla_get_u32(nla_fwd)
2894 & IP_VS_CONN_F_FWD_MASK;
2895 udest->weight = nla_get_u32(nla_weight);
2896 udest->u_threshold = nla_get_u32(nla_u_thresh);
2897 udest->l_threshold = nla_get_u32(nla_l_thresh);
2903 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2904 const char *mcast_ifn, __be32 syncid)
2906 struct nlattr *nl_daemon;
2908 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2912 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2913 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2914 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2916 nla_nest_end(skb, nl_daemon);
2921 nla_nest_cancel(skb, nl_daemon);
2925 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2926 const char *mcast_ifn, __be32 syncid,
2927 struct netlink_callback *cb)
2930 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2931 &ip_vs_genl_family, NLM_F_MULTI,
2932 IPVS_CMD_NEW_DAEMON);
2936 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2937 goto nla_put_failure;
2939 return genlmsg_end(skb, hdr);
2942 genlmsg_cancel(skb, hdr);
2946 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2947 struct netlink_callback *cb)
2949 mutex_lock(&__ip_vs_mutex);
2950 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2951 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2952 ip_vs_master_mcast_ifn,
2953 ip_vs_master_syncid, cb) < 0)
2954 goto nla_put_failure;
2959 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2960 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2961 ip_vs_backup_mcast_ifn,
2962 ip_vs_backup_syncid, cb) < 0)
2963 goto nla_put_failure;
2969 mutex_unlock(&__ip_vs_mutex);
2974 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2976 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2977 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2978 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2981 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2982 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2983 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2986 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2988 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2991 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2994 static int ip_vs_genl_set_config(struct nlattr **attrs)
2996 struct ip_vs_timeout_user t;
2998 __ip_vs_get_timeouts(&t);
3000 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3001 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3003 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3005 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3007 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3008 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3010 return ip_vs_set_timeout(&t);
3013 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3015 struct ip_vs_service *svc = NULL;
3016 struct ip_vs_service_user_kern usvc;
3017 struct ip_vs_dest_user_kern udest;
3019 int need_full_svc = 0, need_full_dest = 0;
3021 cmd = info->genlhdr->cmd;
3023 mutex_lock(&__ip_vs_mutex);
3025 if (cmd == IPVS_CMD_FLUSH) {
3026 ret = ip_vs_flush();
3028 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3029 ret = ip_vs_genl_set_config(info->attrs);
3031 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3032 cmd == IPVS_CMD_DEL_DAEMON) {
3034 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3036 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3037 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3038 info->attrs[IPVS_CMD_ATTR_DAEMON],
3039 ip_vs_daemon_policy)) {
3044 if (cmd == IPVS_CMD_NEW_DAEMON)
3045 ret = ip_vs_genl_new_daemon(daemon_attrs);
3047 ret = ip_vs_genl_del_daemon(daemon_attrs);
3049 } else if (cmd == IPVS_CMD_ZERO &&
3050 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3051 ret = ip_vs_zero_all();
3055 /* All following commands require a service argument, so check if we
3056 * received a valid one. We need a full service specification when
3057 * adding / editing a service. Only identifying members otherwise. */
3058 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3061 ret = ip_vs_genl_parse_service(&usvc,
3062 info->attrs[IPVS_CMD_ATTR_SERVICE],
3067 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3068 if (usvc.fwmark == 0)
3069 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3070 &usvc.addr, usvc.port);
3072 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3074 /* Unless we're adding a new service, the service must already exist */
3075 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3080 /* Destination commands require a valid destination argument. For
3081 * adding / editing a destination, we need a full destination
3083 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3084 cmd == IPVS_CMD_DEL_DEST) {
3085 if (cmd != IPVS_CMD_DEL_DEST)
3088 ret = ip_vs_genl_parse_dest(&udest,
3089 info->attrs[IPVS_CMD_ATTR_DEST],
3096 case IPVS_CMD_NEW_SERVICE:
3098 ret = ip_vs_add_service(&usvc, &svc);
3102 case IPVS_CMD_SET_SERVICE:
3103 ret = ip_vs_edit_service(svc, &usvc);
3105 case IPVS_CMD_DEL_SERVICE:
3106 ret = ip_vs_del_service(svc);
3108 case IPVS_CMD_NEW_DEST:
3109 ret = ip_vs_add_dest(svc, &udest);
3111 case IPVS_CMD_SET_DEST:
3112 ret = ip_vs_edit_dest(svc, &udest);
3114 case IPVS_CMD_DEL_DEST:
3115 ret = ip_vs_del_dest(svc, &udest);
3118 ret = ip_vs_zero_service(svc);
3126 ip_vs_service_put(svc);
3127 mutex_unlock(&__ip_vs_mutex);
3132 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3134 struct sk_buff *msg;
3136 int ret, cmd, reply_cmd;
3138 cmd = info->genlhdr->cmd;
3140 if (cmd == IPVS_CMD_GET_SERVICE)
3141 reply_cmd = IPVS_CMD_NEW_SERVICE;
3142 else if (cmd == IPVS_CMD_GET_INFO)
3143 reply_cmd = IPVS_CMD_SET_INFO;
3144 else if (cmd == IPVS_CMD_GET_CONFIG)
3145 reply_cmd = IPVS_CMD_SET_CONFIG;
3147 IP_VS_ERR("unknown Generic Netlink command\n");
3151 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3155 mutex_lock(&__ip_vs_mutex);
3157 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3159 goto nla_put_failure;
3162 case IPVS_CMD_GET_SERVICE:
3164 struct ip_vs_service *svc;
3166 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3171 ret = ip_vs_genl_fill_service(msg, svc);
3172 ip_vs_service_put(svc);
3174 goto nla_put_failure;
3183 case IPVS_CMD_GET_CONFIG:
3185 struct ip_vs_timeout_user t;
3187 __ip_vs_get_timeouts(&t);
3188 #ifdef CONFIG_IP_VS_PROTO_TCP
3189 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3190 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3193 #ifdef CONFIG_IP_VS_PROTO_UDP
3194 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3200 case IPVS_CMD_GET_INFO:
3201 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3202 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3203 IP_VS_CONN_TAB_SIZE);
3207 genlmsg_end(msg, reply);
3208 ret = genlmsg_unicast(msg, info->snd_pid);
3212 IP_VS_ERR("not enough space in Netlink message\n");
3218 mutex_unlock(&__ip_vs_mutex);
3224 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3226 .cmd = IPVS_CMD_NEW_SERVICE,
3227 .flags = GENL_ADMIN_PERM,
3228 .policy = ip_vs_cmd_policy,
3229 .doit = ip_vs_genl_set_cmd,
3232 .cmd = IPVS_CMD_SET_SERVICE,
3233 .flags = GENL_ADMIN_PERM,
3234 .policy = ip_vs_cmd_policy,
3235 .doit = ip_vs_genl_set_cmd,
3238 .cmd = IPVS_CMD_DEL_SERVICE,
3239 .flags = GENL_ADMIN_PERM,
3240 .policy = ip_vs_cmd_policy,
3241 .doit = ip_vs_genl_set_cmd,
3244 .cmd = IPVS_CMD_GET_SERVICE,
3245 .flags = GENL_ADMIN_PERM,
3246 .doit = ip_vs_genl_get_cmd,
3247 .dumpit = ip_vs_genl_dump_services,
3248 .policy = ip_vs_cmd_policy,
3251 .cmd = IPVS_CMD_NEW_DEST,
3252 .flags = GENL_ADMIN_PERM,
3253 .policy = ip_vs_cmd_policy,
3254 .doit = ip_vs_genl_set_cmd,
3257 .cmd = IPVS_CMD_SET_DEST,
3258 .flags = GENL_ADMIN_PERM,
3259 .policy = ip_vs_cmd_policy,
3260 .doit = ip_vs_genl_set_cmd,
3263 .cmd = IPVS_CMD_DEL_DEST,
3264 .flags = GENL_ADMIN_PERM,
3265 .policy = ip_vs_cmd_policy,
3266 .doit = ip_vs_genl_set_cmd,
3269 .cmd = IPVS_CMD_GET_DEST,
3270 .flags = GENL_ADMIN_PERM,
3271 .policy = ip_vs_cmd_policy,
3272 .dumpit = ip_vs_genl_dump_dests,
3275 .cmd = IPVS_CMD_NEW_DAEMON,
3276 .flags = GENL_ADMIN_PERM,
3277 .policy = ip_vs_cmd_policy,
3278 .doit = ip_vs_genl_set_cmd,
3281 .cmd = IPVS_CMD_DEL_DAEMON,
3282 .flags = GENL_ADMIN_PERM,
3283 .policy = ip_vs_cmd_policy,
3284 .doit = ip_vs_genl_set_cmd,
3287 .cmd = IPVS_CMD_GET_DAEMON,
3288 .flags = GENL_ADMIN_PERM,
3289 .dumpit = ip_vs_genl_dump_daemons,
3292 .cmd = IPVS_CMD_SET_CONFIG,
3293 .flags = GENL_ADMIN_PERM,
3294 .policy = ip_vs_cmd_policy,
3295 .doit = ip_vs_genl_set_cmd,
3298 .cmd = IPVS_CMD_GET_CONFIG,
3299 .flags = GENL_ADMIN_PERM,
3300 .doit = ip_vs_genl_get_cmd,
3303 .cmd = IPVS_CMD_GET_INFO,
3304 .flags = GENL_ADMIN_PERM,
3305 .doit = ip_vs_genl_get_cmd,
3308 .cmd = IPVS_CMD_ZERO,
3309 .flags = GENL_ADMIN_PERM,
3310 .policy = ip_vs_cmd_policy,
3311 .doit = ip_vs_genl_set_cmd,
3314 .cmd = IPVS_CMD_FLUSH,
3315 .flags = GENL_ADMIN_PERM,
3316 .doit = ip_vs_genl_set_cmd,
3320 static int __init ip_vs_genl_register(void)
3324 ret = genl_register_family(&ip_vs_genl_family);
3328 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3329 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3336 genl_unregister_family(&ip_vs_genl_family);
3340 static void ip_vs_genl_unregister(void)
3342 genl_unregister_family(&ip_vs_genl_family);
3345 /* End of Generic Netlink interface definitions */
3348 int __init ip_vs_control_init(void)
3355 ret = nf_register_sockopt(&ip_vs_sockopts);
3357 IP_VS_ERR("cannot register sockopt.\n");
3361 ret = ip_vs_genl_register();
3363 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3364 nf_unregister_sockopt(&ip_vs_sockopts);
3368 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3369 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3371 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3373 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3374 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3375 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3376 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3378 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3379 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3382 ip_vs_new_estimator(&ip_vs_stats);
3384 /* Hook the defense timer */
3385 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3392 void ip_vs_control_cleanup(void)
3395 ip_vs_trash_cleanup();
3396 cancel_rearming_delayed_work(&defense_work);
3397 cancel_work_sync(&defense_work.work);
3398 ip_vs_kill_estimator(&ip_vs_stats);
3399 unregister_sysctl_table(sysctl_header);
3400 proc_net_remove(&init_net, "ip_vs_stats");
3401 proc_net_remove(&init_net, "ip_vs");
3402 ip_vs_genl_unregister();
3403 nf_unregister_sockopt(&ip_vs_sockopts);