]> bbs.cooldavid.org Git - net-next-2.6.git/blame_incremental - net/netfilter/ipvs/ip_vs_ctl.c
net: use the macros defined for the members of flowi
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_ctl.c
... / ...
CommitLineData
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#define KMSG_COMPONENT "IPVS"
22#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
23
24#include <linux/module.h>
25#include <linux/init.h>
26#include <linux/types.h>
27#include <linux/capability.h>
28#include <linux/fs.h>
29#include <linux/sysctl.h>
30#include <linux/proc_fs.h>
31#include <linux/workqueue.h>
32#include <linux/swap.h>
33#include <linux/seq_file.h>
34#include <linux/slab.h>
35
36#include <linux/netfilter.h>
37#include <linux/netfilter_ipv4.h>
38#include <linux/mutex.h>
39
40#include <net/net_namespace.h>
41#include <net/ip.h>
42#ifdef CONFIG_IP_VS_IPV6
43#include <net/ipv6.h>
44#include <net/ip6_route.h>
45#endif
46#include <net/route.h>
47#include <net/sock.h>
48#include <net/genetlink.h>
49
50#include <asm/uaccess.h>
51
52#include <net/ip_vs.h>
53
54/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
55static DEFINE_MUTEX(__ip_vs_mutex);
56
57/* lock for service table */
58static DEFINE_RWLOCK(__ip_vs_svc_lock);
59
60/* lock for table with the real services */
61static DEFINE_RWLOCK(__ip_vs_rs_lock);
62
63/* lock for state and timeout tables */
64static DEFINE_SPINLOCK(ip_vs_securetcp_lock);
65
66/* lock for drop entry handling */
67static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
68
69/* lock for drop packet handling */
70static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
71
72/* 1/rate drop and drop-entry variables */
73int ip_vs_drop_rate = 0;
74int ip_vs_drop_counter = 0;
75static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
76
77/* number of virtual services */
78static int ip_vs_num_services = 0;
79
80/* sysctl variables */
81static int sysctl_ip_vs_drop_entry = 0;
82static int sysctl_ip_vs_drop_packet = 0;
83static int sysctl_ip_vs_secure_tcp = 0;
84static int sysctl_ip_vs_amemthresh = 1024;
85static int sysctl_ip_vs_am_droprate = 10;
86int sysctl_ip_vs_cache_bypass = 0;
87int sysctl_ip_vs_expire_nodest_conn = 0;
88int sysctl_ip_vs_expire_quiescent_template = 0;
89int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
90int sysctl_ip_vs_nat_icmp_send = 0;
91#ifdef CONFIG_IP_VS_NFCT
92int sysctl_ip_vs_conntrack;
93#endif
94int sysctl_ip_vs_snat_reroute = 1;
95
96
97#ifdef CONFIG_IP_VS_DEBUG
98static int sysctl_ip_vs_debug_level = 0;
99
100int ip_vs_get_debug_level(void)
101{
102 return sysctl_ip_vs_debug_level;
103}
104#endif
105
106#ifdef CONFIG_IP_VS_IPV6
107/* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
108static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
109{
110 struct rt6_info *rt;
111 struct flowi fl = {
112 .oif = 0,
113 .fl6_dst = *addr,
114 .fl6_src = { .s6_addr32 = {0, 0, 0, 0} },
115 };
116
117 rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
118 if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
119 return 1;
120
121 return 0;
122}
123#endif
124/*
125 * update_defense_level is called from keventd and from sysctl,
126 * so it needs to protect itself from softirqs
127 */
128static void update_defense_level(void)
129{
130 struct sysinfo i;
131 static int old_secure_tcp = 0;
132 int availmem;
133 int nomem;
134 int to_change = -1;
135
136 /* we only count free and buffered memory (in pages) */
137 si_meminfo(&i);
138 availmem = i.freeram + i.bufferram;
139 /* however in linux 2.5 the i.bufferram is total page cache size,
140 we need adjust it */
141 /* si_swapinfo(&i); */
142 /* availmem = availmem - (i.totalswap - i.freeswap); */
143
144 nomem = (availmem < sysctl_ip_vs_amemthresh);
145
146 local_bh_disable();
147
148 /* drop_entry */
149 spin_lock(&__ip_vs_dropentry_lock);
150 switch (sysctl_ip_vs_drop_entry) {
151 case 0:
152 atomic_set(&ip_vs_dropentry, 0);
153 break;
154 case 1:
155 if (nomem) {
156 atomic_set(&ip_vs_dropentry, 1);
157 sysctl_ip_vs_drop_entry = 2;
158 } else {
159 atomic_set(&ip_vs_dropentry, 0);
160 }
161 break;
162 case 2:
163 if (nomem) {
164 atomic_set(&ip_vs_dropentry, 1);
165 } else {
166 atomic_set(&ip_vs_dropentry, 0);
167 sysctl_ip_vs_drop_entry = 1;
168 };
169 break;
170 case 3:
171 atomic_set(&ip_vs_dropentry, 1);
172 break;
173 }
174 spin_unlock(&__ip_vs_dropentry_lock);
175
176 /* drop_packet */
177 spin_lock(&__ip_vs_droppacket_lock);
178 switch (sysctl_ip_vs_drop_packet) {
179 case 0:
180 ip_vs_drop_rate = 0;
181 break;
182 case 1:
183 if (nomem) {
184 ip_vs_drop_rate = ip_vs_drop_counter
185 = sysctl_ip_vs_amemthresh /
186 (sysctl_ip_vs_amemthresh-availmem);
187 sysctl_ip_vs_drop_packet = 2;
188 } else {
189 ip_vs_drop_rate = 0;
190 }
191 break;
192 case 2:
193 if (nomem) {
194 ip_vs_drop_rate = ip_vs_drop_counter
195 = sysctl_ip_vs_amemthresh /
196 (sysctl_ip_vs_amemthresh-availmem);
197 } else {
198 ip_vs_drop_rate = 0;
199 sysctl_ip_vs_drop_packet = 1;
200 }
201 break;
202 case 3:
203 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
204 break;
205 }
206 spin_unlock(&__ip_vs_droppacket_lock);
207
208 /* secure_tcp */
209 spin_lock(&ip_vs_securetcp_lock);
210 switch (sysctl_ip_vs_secure_tcp) {
211 case 0:
212 if (old_secure_tcp >= 2)
213 to_change = 0;
214 break;
215 case 1:
216 if (nomem) {
217 if (old_secure_tcp < 2)
218 to_change = 1;
219 sysctl_ip_vs_secure_tcp = 2;
220 } else {
221 if (old_secure_tcp >= 2)
222 to_change = 0;
223 }
224 break;
225 case 2:
226 if (nomem) {
227 if (old_secure_tcp < 2)
228 to_change = 1;
229 } else {
230 if (old_secure_tcp >= 2)
231 to_change = 0;
232 sysctl_ip_vs_secure_tcp = 1;
233 }
234 break;
235 case 3:
236 if (old_secure_tcp < 2)
237 to_change = 1;
238 break;
239 }
240 old_secure_tcp = sysctl_ip_vs_secure_tcp;
241 if (to_change >= 0)
242 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
243 spin_unlock(&ip_vs_securetcp_lock);
244
245 local_bh_enable();
246}
247
248
249/*
250 * Timer for checking the defense
251 */
252#define DEFENSE_TIMER_PERIOD 1*HZ
253static void defense_work_handler(struct work_struct *work);
254static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
255
256static void defense_work_handler(struct work_struct *work)
257{
258 update_defense_level();
259 if (atomic_read(&ip_vs_dropentry))
260 ip_vs_random_dropentry();
261
262 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
263}
264
265int
266ip_vs_use_count_inc(void)
267{
268 return try_module_get(THIS_MODULE);
269}
270
271void
272ip_vs_use_count_dec(void)
273{
274 module_put(THIS_MODULE);
275}
276
277
278/*
279 * Hash table: for virtual service lookups
280 */
281#define IP_VS_SVC_TAB_BITS 8
282#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
283#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
284
285/* the service table hashed by <protocol, addr, port> */
286static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
287/* the service table hashed by fwmark */
288static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
289
290/*
291 * Hash table: for real service lookups
292 */
293#define IP_VS_RTAB_BITS 4
294#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
295#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
296
297static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
298
299/*
300 * Trash for destinations
301 */
302static LIST_HEAD(ip_vs_dest_trash);
303
304/*
305 * FTP & NULL virtual service counters
306 */
307static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
308static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
309
310
311/*
312 * Returns hash value for virtual service
313 */
314static __inline__ unsigned
315ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
316 __be16 port)
317{
318 register unsigned porth = ntohs(port);
319 __be32 addr_fold = addr->ip;
320
321#ifdef CONFIG_IP_VS_IPV6
322 if (af == AF_INET6)
323 addr_fold = addr->ip6[0]^addr->ip6[1]^
324 addr->ip6[2]^addr->ip6[3];
325#endif
326
327 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
328 & IP_VS_SVC_TAB_MASK;
329}
330
331/*
332 * Returns hash value of fwmark for virtual service lookup
333 */
334static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
335{
336 return fwmark & IP_VS_SVC_TAB_MASK;
337}
338
339/*
340 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
341 * or in the ip_vs_svc_fwm_table by fwmark.
342 * Should be called with locked tables.
343 */
344static int ip_vs_svc_hash(struct ip_vs_service *svc)
345{
346 unsigned hash;
347
348 if (svc->flags & IP_VS_SVC_F_HASHED) {
349 pr_err("%s(): request for already hashed, called from %pF\n",
350 __func__, __builtin_return_address(0));
351 return 0;
352 }
353
354 if (svc->fwmark == 0) {
355 /*
356 * Hash it by <protocol,addr,port> in ip_vs_svc_table
357 */
358 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
359 svc->port);
360 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
361 } else {
362 /*
363 * Hash it by fwmark in ip_vs_svc_fwm_table
364 */
365 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
366 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
367 }
368
369 svc->flags |= IP_VS_SVC_F_HASHED;
370 /* increase its refcnt because it is referenced by the svc table */
371 atomic_inc(&svc->refcnt);
372 return 1;
373}
374
375
376/*
377 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
378 * Should be called with locked tables.
379 */
380static int ip_vs_svc_unhash(struct ip_vs_service *svc)
381{
382 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
383 pr_err("%s(): request for unhash flagged, called from %pF\n",
384 __func__, __builtin_return_address(0));
385 return 0;
386 }
387
388 if (svc->fwmark == 0) {
389 /* Remove it from the ip_vs_svc_table table */
390 list_del(&svc->s_list);
391 } else {
392 /* Remove it from the ip_vs_svc_fwm_table table */
393 list_del(&svc->f_list);
394 }
395
396 svc->flags &= ~IP_VS_SVC_F_HASHED;
397 atomic_dec(&svc->refcnt);
398 return 1;
399}
400
401
402/*
403 * Get service by {proto,addr,port} in the service table.
404 */
405static inline struct ip_vs_service *
406__ip_vs_service_find(int af, __u16 protocol, const union nf_inet_addr *vaddr,
407 __be16 vport)
408{
409 unsigned hash;
410 struct ip_vs_service *svc;
411
412 /* Check for "full" addressed entries */
413 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
414
415 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
416 if ((svc->af == af)
417 && ip_vs_addr_equal(af, &svc->addr, vaddr)
418 && (svc->port == vport)
419 && (svc->protocol == protocol)) {
420 /* HIT */
421 return svc;
422 }
423 }
424
425 return NULL;
426}
427
428
429/*
430 * Get service by {fwmark} in the service table.
431 */
432static inline struct ip_vs_service *
433__ip_vs_svc_fwm_find(int af, __u32 fwmark)
434{
435 unsigned hash;
436 struct ip_vs_service *svc;
437
438 /* Check for fwmark addressed entries */
439 hash = ip_vs_svc_fwm_hashkey(fwmark);
440
441 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
442 if (svc->fwmark == fwmark && svc->af == af) {
443 /* HIT */
444 return svc;
445 }
446 }
447
448 return NULL;
449}
450
451struct ip_vs_service *
452ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
453 const union nf_inet_addr *vaddr, __be16 vport)
454{
455 struct ip_vs_service *svc;
456
457 read_lock(&__ip_vs_svc_lock);
458
459 /*
460 * Check the table hashed by fwmark first
461 */
462 if (fwmark && (svc = __ip_vs_svc_fwm_find(af, fwmark)))
463 goto out;
464
465 /*
466 * Check the table hashed by <protocol,addr,port>
467 * for "full" addressed entries
468 */
469 svc = __ip_vs_service_find(af, protocol, vaddr, vport);
470
471 if (svc == NULL
472 && protocol == IPPROTO_TCP
473 && atomic_read(&ip_vs_ftpsvc_counter)
474 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
475 /*
476 * Check if ftp service entry exists, the packet
477 * might belong to FTP data connections.
478 */
479 svc = __ip_vs_service_find(af, protocol, vaddr, FTPPORT);
480 }
481
482 if (svc == NULL
483 && atomic_read(&ip_vs_nullsvc_counter)) {
484 /*
485 * Check if the catch-all port (port zero) exists
486 */
487 svc = __ip_vs_service_find(af, protocol, vaddr, 0);
488 }
489
490 out:
491 if (svc)
492 atomic_inc(&svc->usecnt);
493 read_unlock(&__ip_vs_svc_lock);
494
495 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
496 fwmark, ip_vs_proto_name(protocol),
497 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
498 svc ? "hit" : "not hit");
499
500 return svc;
501}
502
503
504static inline void
505__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
506{
507 atomic_inc(&svc->refcnt);
508 dest->svc = svc;
509}
510
511static void
512__ip_vs_unbind_svc(struct ip_vs_dest *dest)
513{
514 struct ip_vs_service *svc = dest->svc;
515
516 dest->svc = NULL;
517 if (atomic_dec_and_test(&svc->refcnt)) {
518 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
519 svc->fwmark,
520 IP_VS_DBG_ADDR(svc->af, &svc->addr),
521 ntohs(svc->port), atomic_read(&svc->usecnt));
522 kfree(svc);
523 }
524}
525
526
527/*
528 * Returns hash value for real service
529 */
530static inline unsigned ip_vs_rs_hashkey(int af,
531 const union nf_inet_addr *addr,
532 __be16 port)
533{
534 register unsigned porth = ntohs(port);
535 __be32 addr_fold = addr->ip;
536
537#ifdef CONFIG_IP_VS_IPV6
538 if (af == AF_INET6)
539 addr_fold = addr->ip6[0]^addr->ip6[1]^
540 addr->ip6[2]^addr->ip6[3];
541#endif
542
543 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
544 & IP_VS_RTAB_MASK;
545}
546
547/*
548 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
549 * should be called with locked tables.
550 */
551static int ip_vs_rs_hash(struct ip_vs_dest *dest)
552{
553 unsigned hash;
554
555 if (!list_empty(&dest->d_list)) {
556 return 0;
557 }
558
559 /*
560 * Hash by proto,addr,port,
561 * which are the parameters of the real service.
562 */
563 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
564
565 list_add(&dest->d_list, &ip_vs_rtable[hash]);
566
567 return 1;
568}
569
570/*
571 * UNhashes ip_vs_dest from ip_vs_rtable.
572 * should be called with locked tables.
573 */
574static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
575{
576 /*
577 * Remove it from the ip_vs_rtable table.
578 */
579 if (!list_empty(&dest->d_list)) {
580 list_del(&dest->d_list);
581 INIT_LIST_HEAD(&dest->d_list);
582 }
583
584 return 1;
585}
586
587/*
588 * Lookup real service by <proto,addr,port> in the real service table.
589 */
590struct ip_vs_dest *
591ip_vs_lookup_real_service(int af, __u16 protocol,
592 const union nf_inet_addr *daddr,
593 __be16 dport)
594{
595 unsigned hash;
596 struct ip_vs_dest *dest;
597
598 /*
599 * Check for "full" addressed entries
600 * Return the first found entry
601 */
602 hash = ip_vs_rs_hashkey(af, daddr, dport);
603
604 read_lock(&__ip_vs_rs_lock);
605 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
606 if ((dest->af == af)
607 && ip_vs_addr_equal(af, &dest->addr, daddr)
608 && (dest->port == dport)
609 && ((dest->protocol == protocol) ||
610 dest->vfwmark)) {
611 /* HIT */
612 read_unlock(&__ip_vs_rs_lock);
613 return dest;
614 }
615 }
616 read_unlock(&__ip_vs_rs_lock);
617
618 return NULL;
619}
620
621/*
622 * Lookup destination by {addr,port} in the given service
623 */
624static struct ip_vs_dest *
625ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
626 __be16 dport)
627{
628 struct ip_vs_dest *dest;
629
630 /*
631 * Find the destination for the given service
632 */
633 list_for_each_entry(dest, &svc->destinations, n_list) {
634 if ((dest->af == svc->af)
635 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
636 && (dest->port == dport)) {
637 /* HIT */
638 return dest;
639 }
640 }
641
642 return NULL;
643}
644
645/*
646 * Find destination by {daddr,dport,vaddr,protocol}
647 * Cretaed to be used in ip_vs_process_message() in
648 * the backup synchronization daemon. It finds the
649 * destination to be bound to the received connection
650 * on the backup.
651 *
652 * ip_vs_lookup_real_service() looked promissing, but
653 * seems not working as expected.
654 */
655struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
656 __be16 dport,
657 const union nf_inet_addr *vaddr,
658 __be16 vport, __u16 protocol)
659{
660 struct ip_vs_dest *dest;
661 struct ip_vs_service *svc;
662
663 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
664 if (!svc)
665 return NULL;
666 dest = ip_vs_lookup_dest(svc, daddr, dport);
667 if (dest)
668 atomic_inc(&dest->refcnt);
669 ip_vs_service_put(svc);
670 return dest;
671}
672
673/*
674 * Lookup dest by {svc,addr,port} in the destination trash.
675 * The destination trash is used to hold the destinations that are removed
676 * from the service table but are still referenced by some conn entries.
677 * The reason to add the destination trash is when the dest is temporary
678 * down (either by administrator or by monitor program), the dest can be
679 * picked back from the trash, the remaining connections to the dest can
680 * continue, and the counting information of the dest is also useful for
681 * scheduling.
682 */
683static struct ip_vs_dest *
684ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
685 __be16 dport)
686{
687 struct ip_vs_dest *dest, *nxt;
688
689 /*
690 * Find the destination in trash
691 */
692 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
693 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
694 "dest->refcnt=%d\n",
695 dest->vfwmark,
696 IP_VS_DBG_ADDR(svc->af, &dest->addr),
697 ntohs(dest->port),
698 atomic_read(&dest->refcnt));
699 if (dest->af == svc->af &&
700 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
701 dest->port == dport &&
702 dest->vfwmark == svc->fwmark &&
703 dest->protocol == svc->protocol &&
704 (svc->fwmark ||
705 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
706 dest->vport == svc->port))) {
707 /* HIT */
708 return dest;
709 }
710
711 /*
712 * Try to purge the destination from trash if not referenced
713 */
714 if (atomic_read(&dest->refcnt) == 1) {
715 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
716 "from trash\n",
717 dest->vfwmark,
718 IP_VS_DBG_ADDR(svc->af, &dest->addr),
719 ntohs(dest->port));
720 list_del(&dest->n_list);
721 ip_vs_dst_reset(dest);
722 __ip_vs_unbind_svc(dest);
723 kfree(dest);
724 }
725 }
726
727 return NULL;
728}
729
730
731/*
732 * Clean up all the destinations in the trash
733 * Called by the ip_vs_control_cleanup()
734 *
735 * When the ip_vs_control_clearup is activated by ipvs module exit,
736 * the service tables must have been flushed and all the connections
737 * are expired, and the refcnt of each destination in the trash must
738 * be 1, so we simply release them here.
739 */
740static void ip_vs_trash_cleanup(void)
741{
742 struct ip_vs_dest *dest, *nxt;
743
744 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
745 list_del(&dest->n_list);
746 ip_vs_dst_reset(dest);
747 __ip_vs_unbind_svc(dest);
748 kfree(dest);
749 }
750}
751
752
753static void
754ip_vs_zero_stats(struct ip_vs_stats *stats)
755{
756 spin_lock_bh(&stats->lock);
757
758 memset(&stats->ustats, 0, sizeof(stats->ustats));
759 ip_vs_zero_estimator(stats);
760
761 spin_unlock_bh(&stats->lock);
762}
763
764/*
765 * Update a destination in the given service
766 */
767static void
768__ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
769 struct ip_vs_dest_user_kern *udest, int add)
770{
771 int conn_flags;
772
773 /* set the weight and the flags */
774 atomic_set(&dest->weight, udest->weight);
775 conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK;
776 conn_flags |= IP_VS_CONN_F_INACTIVE;
777
778 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
779 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) {
780 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
781 } else {
782 /*
783 * Put the real service in ip_vs_rtable if not present.
784 * For now only for NAT!
785 */
786 write_lock_bh(&__ip_vs_rs_lock);
787 ip_vs_rs_hash(dest);
788 write_unlock_bh(&__ip_vs_rs_lock);
789 }
790 atomic_set(&dest->conn_flags, conn_flags);
791
792 /* bind the service */
793 if (!dest->svc) {
794 __ip_vs_bind_svc(dest, svc);
795 } else {
796 if (dest->svc != svc) {
797 __ip_vs_unbind_svc(dest);
798 ip_vs_zero_stats(&dest->stats);
799 __ip_vs_bind_svc(dest, svc);
800 }
801 }
802
803 /* set the dest status flags */
804 dest->flags |= IP_VS_DEST_F_AVAILABLE;
805
806 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
807 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
808 dest->u_threshold = udest->u_threshold;
809 dest->l_threshold = udest->l_threshold;
810
811 spin_lock(&dest->dst_lock);
812 ip_vs_dst_reset(dest);
813 spin_unlock(&dest->dst_lock);
814
815 if (add)
816 ip_vs_new_estimator(&dest->stats);
817
818 write_lock_bh(&__ip_vs_svc_lock);
819
820 /* Wait until all other svc users go away */
821 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
822
823 if (add) {
824 list_add(&dest->n_list, &svc->destinations);
825 svc->num_dests++;
826 }
827
828 /* call the update_service, because server weight may be changed */
829 if (svc->scheduler->update_service)
830 svc->scheduler->update_service(svc);
831
832 write_unlock_bh(&__ip_vs_svc_lock);
833}
834
835
836/*
837 * Create a destination for the given service
838 */
839static int
840ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
841 struct ip_vs_dest **dest_p)
842{
843 struct ip_vs_dest *dest;
844 unsigned atype;
845
846 EnterFunction(2);
847
848#ifdef CONFIG_IP_VS_IPV6
849 if (svc->af == AF_INET6) {
850 atype = ipv6_addr_type(&udest->addr.in6);
851 if ((!(atype & IPV6_ADDR_UNICAST) ||
852 atype & IPV6_ADDR_LINKLOCAL) &&
853 !__ip_vs_addr_is_local_v6(&udest->addr.in6))
854 return -EINVAL;
855 } else
856#endif
857 {
858 atype = inet_addr_type(&init_net, udest->addr.ip);
859 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
860 return -EINVAL;
861 }
862
863 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_KERNEL);
864 if (dest == NULL) {
865 pr_err("%s(): no memory.\n", __func__);
866 return -ENOMEM;
867 }
868
869 dest->af = svc->af;
870 dest->protocol = svc->protocol;
871 dest->vaddr = svc->addr;
872 dest->vport = svc->port;
873 dest->vfwmark = svc->fwmark;
874 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
875 dest->port = udest->port;
876
877 atomic_set(&dest->activeconns, 0);
878 atomic_set(&dest->inactconns, 0);
879 atomic_set(&dest->persistconns, 0);
880 atomic_set(&dest->refcnt, 1);
881
882 INIT_LIST_HEAD(&dest->d_list);
883 spin_lock_init(&dest->dst_lock);
884 spin_lock_init(&dest->stats.lock);
885 __ip_vs_update_dest(svc, dest, udest, 1);
886
887 *dest_p = dest;
888
889 LeaveFunction(2);
890 return 0;
891}
892
893
894/*
895 * Add a destination into an existing service
896 */
897static int
898ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
899{
900 struct ip_vs_dest *dest;
901 union nf_inet_addr daddr;
902 __be16 dport = udest->port;
903 int ret;
904
905 EnterFunction(2);
906
907 if (udest->weight < 0) {
908 pr_err("%s(): server weight less than zero\n", __func__);
909 return -ERANGE;
910 }
911
912 if (udest->l_threshold > udest->u_threshold) {
913 pr_err("%s(): lower threshold is higher than upper threshold\n",
914 __func__);
915 return -ERANGE;
916 }
917
918 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
919
920 /*
921 * Check if the dest already exists in the list
922 */
923 dest = ip_vs_lookup_dest(svc, &daddr, dport);
924
925 if (dest != NULL) {
926 IP_VS_DBG(1, "%s(): dest already exists\n", __func__);
927 return -EEXIST;
928 }
929
930 /*
931 * Check if the dest already exists in the trash and
932 * is from the same service
933 */
934 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
935
936 if (dest != NULL) {
937 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
938 "dest->refcnt=%d, service %u/%s:%u\n",
939 IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
940 atomic_read(&dest->refcnt),
941 dest->vfwmark,
942 IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
943 ntohs(dest->vport));
944
945 /*
946 * Get the destination from the trash
947 */
948 list_del(&dest->n_list);
949
950 __ip_vs_update_dest(svc, dest, udest, 1);
951 ret = 0;
952 } else {
953 /*
954 * Allocate and initialize the dest structure
955 */
956 ret = ip_vs_new_dest(svc, udest, &dest);
957 }
958 LeaveFunction(2);
959
960 return ret;
961}
962
963
964/*
965 * Edit a destination in the given service
966 */
967static int
968ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
969{
970 struct ip_vs_dest *dest;
971 union nf_inet_addr daddr;
972 __be16 dport = udest->port;
973
974 EnterFunction(2);
975
976 if (udest->weight < 0) {
977 pr_err("%s(): server weight less than zero\n", __func__);
978 return -ERANGE;
979 }
980
981 if (udest->l_threshold > udest->u_threshold) {
982 pr_err("%s(): lower threshold is higher than upper threshold\n",
983 __func__);
984 return -ERANGE;
985 }
986
987 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
988
989 /*
990 * Lookup the destination list
991 */
992 dest = ip_vs_lookup_dest(svc, &daddr, dport);
993
994 if (dest == NULL) {
995 IP_VS_DBG(1, "%s(): dest doesn't exist\n", __func__);
996 return -ENOENT;
997 }
998
999 __ip_vs_update_dest(svc, dest, udest, 0);
1000 LeaveFunction(2);
1001
1002 return 0;
1003}
1004
1005
1006/*
1007 * Delete a destination (must be already unlinked from the service)
1008 */
1009static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1010{
1011 ip_vs_kill_estimator(&dest->stats);
1012
1013 /*
1014 * Remove it from the d-linked list with the real services.
1015 */
1016 write_lock_bh(&__ip_vs_rs_lock);
1017 ip_vs_rs_unhash(dest);
1018 write_unlock_bh(&__ip_vs_rs_lock);
1019
1020 /*
1021 * Decrease the refcnt of the dest, and free the dest
1022 * if nobody refers to it (refcnt=0). Otherwise, throw
1023 * the destination into the trash.
1024 */
1025 if (atomic_dec_and_test(&dest->refcnt)) {
1026 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u\n",
1027 dest->vfwmark,
1028 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1029 ntohs(dest->port));
1030 ip_vs_dst_reset(dest);
1031 /* simply decrease svc->refcnt here, let the caller check
1032 and release the service if nobody refers to it.
1033 Only user context can release destination and service,
1034 and only one user context can update virtual service at a
1035 time, so the operation here is OK */
1036 atomic_dec(&dest->svc->refcnt);
1037 kfree(dest);
1038 } else {
1039 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1040 "dest->refcnt=%d\n",
1041 IP_VS_DBG_ADDR(dest->af, &dest->addr),
1042 ntohs(dest->port),
1043 atomic_read(&dest->refcnt));
1044 list_add(&dest->n_list, &ip_vs_dest_trash);
1045 atomic_inc(&dest->refcnt);
1046 }
1047}
1048
1049
1050/*
1051 * Unlink a destination from the given service
1052 */
1053static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1054 struct ip_vs_dest *dest,
1055 int svcupd)
1056{
1057 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1058
1059 /*
1060 * Remove it from the d-linked destination list.
1061 */
1062 list_del(&dest->n_list);
1063 svc->num_dests--;
1064
1065 /*
1066 * Call the update_service function of its scheduler
1067 */
1068 if (svcupd && svc->scheduler->update_service)
1069 svc->scheduler->update_service(svc);
1070}
1071
1072
1073/*
1074 * Delete a destination server in the given service
1075 */
1076static int
1077ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1078{
1079 struct ip_vs_dest *dest;
1080 __be16 dport = udest->port;
1081
1082 EnterFunction(2);
1083
1084 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1085
1086 if (dest == NULL) {
1087 IP_VS_DBG(1, "%s(): destination not found!\n", __func__);
1088 return -ENOENT;
1089 }
1090
1091 write_lock_bh(&__ip_vs_svc_lock);
1092
1093 /*
1094 * Wait until all other svc users go away.
1095 */
1096 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1097
1098 /*
1099 * Unlink dest from the service
1100 */
1101 __ip_vs_unlink_dest(svc, dest, 1);
1102
1103 write_unlock_bh(&__ip_vs_svc_lock);
1104
1105 /*
1106 * Delete the destination
1107 */
1108 __ip_vs_del_dest(dest);
1109
1110 LeaveFunction(2);
1111
1112 return 0;
1113}
1114
1115
1116/*
1117 * Add a service into the service hash table
1118 */
1119static int
1120ip_vs_add_service(struct ip_vs_service_user_kern *u,
1121 struct ip_vs_service **svc_p)
1122{
1123 int ret = 0;
1124 struct ip_vs_scheduler *sched = NULL;
1125 struct ip_vs_pe *pe = NULL;
1126 struct ip_vs_service *svc = NULL;
1127
1128 /* increase the module use count */
1129 ip_vs_use_count_inc();
1130
1131 /* Lookup the scheduler by 'u->sched_name' */
1132 sched = ip_vs_scheduler_get(u->sched_name);
1133 if (sched == NULL) {
1134 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1135 ret = -ENOENT;
1136 goto out_err;
1137 }
1138
1139 if (u->pe_name && *u->pe_name) {
1140 pe = ip_vs_pe_get(u->pe_name);
1141 if (pe == NULL) {
1142 pr_info("persistence engine module ip_vs_pe_%s "
1143 "not found\n", u->pe_name);
1144 ret = -ENOENT;
1145 goto out_err;
1146 }
1147 }
1148
1149#ifdef CONFIG_IP_VS_IPV6
1150 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1151 ret = -EINVAL;
1152 goto out_err;
1153 }
1154#endif
1155
1156 svc = kzalloc(sizeof(struct ip_vs_service), GFP_KERNEL);
1157 if (svc == NULL) {
1158 IP_VS_DBG(1, "%s(): no memory\n", __func__);
1159 ret = -ENOMEM;
1160 goto out_err;
1161 }
1162
1163 /* I'm the first user of the service */
1164 atomic_set(&svc->usecnt, 0);
1165 atomic_set(&svc->refcnt, 0);
1166
1167 svc->af = u->af;
1168 svc->protocol = u->protocol;
1169 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1170 svc->port = u->port;
1171 svc->fwmark = u->fwmark;
1172 svc->flags = u->flags;
1173 svc->timeout = u->timeout * HZ;
1174 svc->netmask = u->netmask;
1175
1176 INIT_LIST_HEAD(&svc->destinations);
1177 rwlock_init(&svc->sched_lock);
1178 spin_lock_init(&svc->stats.lock);
1179
1180 /* Bind the scheduler */
1181 ret = ip_vs_bind_scheduler(svc, sched);
1182 if (ret)
1183 goto out_err;
1184 sched = NULL;
1185
1186 /* Bind the ct retriever */
1187 ip_vs_bind_pe(svc, pe);
1188 pe = NULL;
1189
1190 /* Update the virtual service counters */
1191 if (svc->port == FTPPORT)
1192 atomic_inc(&ip_vs_ftpsvc_counter);
1193 else if (svc->port == 0)
1194 atomic_inc(&ip_vs_nullsvc_counter);
1195
1196 ip_vs_new_estimator(&svc->stats);
1197
1198 /* Count only IPv4 services for old get/setsockopt interface */
1199 if (svc->af == AF_INET)
1200 ip_vs_num_services++;
1201
1202 /* Hash the service into the service table */
1203 write_lock_bh(&__ip_vs_svc_lock);
1204 ip_vs_svc_hash(svc);
1205 write_unlock_bh(&__ip_vs_svc_lock);
1206
1207 *svc_p = svc;
1208 return 0;
1209
1210 out_err:
1211 if (svc != NULL) {
1212 ip_vs_unbind_scheduler(svc);
1213 if (svc->inc) {
1214 local_bh_disable();
1215 ip_vs_app_inc_put(svc->inc);
1216 local_bh_enable();
1217 }
1218 kfree(svc);
1219 }
1220 ip_vs_scheduler_put(sched);
1221 ip_vs_pe_put(pe);
1222
1223 /* decrease the module use count */
1224 ip_vs_use_count_dec();
1225
1226 return ret;
1227}
1228
1229
1230/*
1231 * Edit a service and bind it with a new scheduler
1232 */
1233static int
1234ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1235{
1236 struct ip_vs_scheduler *sched, *old_sched;
1237 struct ip_vs_pe *pe = NULL, *old_pe = NULL;
1238 int ret = 0;
1239
1240 /*
1241 * Lookup the scheduler, by 'u->sched_name'
1242 */
1243 sched = ip_vs_scheduler_get(u->sched_name);
1244 if (sched == NULL) {
1245 pr_info("Scheduler module ip_vs_%s not found\n", u->sched_name);
1246 return -ENOENT;
1247 }
1248 old_sched = sched;
1249
1250 if (u->pe_name && *u->pe_name) {
1251 pe = ip_vs_pe_get(u->pe_name);
1252 if (pe == NULL) {
1253 pr_info("persistence engine module ip_vs_pe_%s "
1254 "not found\n", u->pe_name);
1255 ret = -ENOENT;
1256 goto out;
1257 }
1258 old_pe = pe;
1259 }
1260
1261#ifdef CONFIG_IP_VS_IPV6
1262 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) {
1263 ret = -EINVAL;
1264 goto out;
1265 }
1266#endif
1267
1268 write_lock_bh(&__ip_vs_svc_lock);
1269
1270 /*
1271 * Wait until all other svc users go away.
1272 */
1273 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1274
1275 /*
1276 * Set the flags and timeout value
1277 */
1278 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1279 svc->timeout = u->timeout * HZ;
1280 svc->netmask = u->netmask;
1281
1282 old_sched = svc->scheduler;
1283 if (sched != old_sched) {
1284 /*
1285 * Unbind the old scheduler
1286 */
1287 if ((ret = ip_vs_unbind_scheduler(svc))) {
1288 old_sched = sched;
1289 goto out_unlock;
1290 }
1291
1292 /*
1293 * Bind the new scheduler
1294 */
1295 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1296 /*
1297 * If ip_vs_bind_scheduler fails, restore the old
1298 * scheduler.
1299 * The main reason of failure is out of memory.
1300 *
1301 * The question is if the old scheduler can be
1302 * restored all the time. TODO: if it cannot be
1303 * restored some time, we must delete the service,
1304 * otherwise the system may crash.
1305 */
1306 ip_vs_bind_scheduler(svc, old_sched);
1307 old_sched = sched;
1308 goto out_unlock;
1309 }
1310 }
1311
1312 old_pe = svc->pe;
1313 if (pe != old_pe) {
1314 ip_vs_unbind_pe(svc);
1315 ip_vs_bind_pe(svc, pe);
1316 }
1317
1318 out_unlock:
1319 write_unlock_bh(&__ip_vs_svc_lock);
1320 out:
1321 ip_vs_scheduler_put(old_sched);
1322 ip_vs_pe_put(old_pe);
1323 return ret;
1324}
1325
1326
1327/*
1328 * Delete a service from the service list
1329 * - The service must be unlinked, unlocked and not referenced!
1330 * - We are called under _bh lock
1331 */
1332static void __ip_vs_del_service(struct ip_vs_service *svc)
1333{
1334 struct ip_vs_dest *dest, *nxt;
1335 struct ip_vs_scheduler *old_sched;
1336 struct ip_vs_pe *old_pe;
1337
1338 pr_info("%s: enter\n", __func__);
1339
1340 /* Count only IPv4 services for old get/setsockopt interface */
1341 if (svc->af == AF_INET)
1342 ip_vs_num_services--;
1343
1344 ip_vs_kill_estimator(&svc->stats);
1345
1346 /* Unbind scheduler */
1347 old_sched = svc->scheduler;
1348 ip_vs_unbind_scheduler(svc);
1349 ip_vs_scheduler_put(old_sched);
1350
1351 /* Unbind persistence engine */
1352 old_pe = svc->pe;
1353 ip_vs_unbind_pe(svc);
1354 ip_vs_pe_put(old_pe);
1355
1356 /* Unbind app inc */
1357 if (svc->inc) {
1358 ip_vs_app_inc_put(svc->inc);
1359 svc->inc = NULL;
1360 }
1361
1362 /*
1363 * Unlink the whole destination list
1364 */
1365 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1366 __ip_vs_unlink_dest(svc, dest, 0);
1367 __ip_vs_del_dest(dest);
1368 }
1369
1370 /*
1371 * Update the virtual service counters
1372 */
1373 if (svc->port == FTPPORT)
1374 atomic_dec(&ip_vs_ftpsvc_counter);
1375 else if (svc->port == 0)
1376 atomic_dec(&ip_vs_nullsvc_counter);
1377
1378 /*
1379 * Free the service if nobody refers to it
1380 */
1381 if (atomic_read(&svc->refcnt) == 0) {
1382 IP_VS_DBG_BUF(3, "Removing service %u/%s:%u usecnt=%d\n",
1383 svc->fwmark,
1384 IP_VS_DBG_ADDR(svc->af, &svc->addr),
1385 ntohs(svc->port), atomic_read(&svc->usecnt));
1386 kfree(svc);
1387 }
1388
1389 /* decrease the module use count */
1390 ip_vs_use_count_dec();
1391}
1392
1393/*
1394 * Unlink a service from list and try to delete it if its refcnt reached 0
1395 */
1396static void ip_vs_unlink_service(struct ip_vs_service *svc)
1397{
1398 /*
1399 * Unhash it from the service table
1400 */
1401 write_lock_bh(&__ip_vs_svc_lock);
1402
1403 ip_vs_svc_unhash(svc);
1404
1405 /*
1406 * Wait until all the svc users go away.
1407 */
1408 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1409
1410 __ip_vs_del_service(svc);
1411
1412 write_unlock_bh(&__ip_vs_svc_lock);
1413}
1414
1415/*
1416 * Delete a service from the service list
1417 */
1418static int ip_vs_del_service(struct ip_vs_service *svc)
1419{
1420 if (svc == NULL)
1421 return -EEXIST;
1422 ip_vs_unlink_service(svc);
1423
1424 return 0;
1425}
1426
1427
1428/*
1429 * Flush all the virtual services
1430 */
1431static int ip_vs_flush(void)
1432{
1433 int idx;
1434 struct ip_vs_service *svc, *nxt;
1435
1436 /*
1437 * Flush the service table hashed by <protocol,addr,port>
1438 */
1439 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1440 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1441 ip_vs_unlink_service(svc);
1442 }
1443 }
1444
1445 /*
1446 * Flush the service table hashed by fwmark
1447 */
1448 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1449 list_for_each_entry_safe(svc, nxt,
1450 &ip_vs_svc_fwm_table[idx], f_list) {
1451 ip_vs_unlink_service(svc);
1452 }
1453 }
1454
1455 return 0;
1456}
1457
1458
1459/*
1460 * Zero counters in a service or all services
1461 */
1462static int ip_vs_zero_service(struct ip_vs_service *svc)
1463{
1464 struct ip_vs_dest *dest;
1465
1466 write_lock_bh(&__ip_vs_svc_lock);
1467 list_for_each_entry(dest, &svc->destinations, n_list) {
1468 ip_vs_zero_stats(&dest->stats);
1469 }
1470 ip_vs_zero_stats(&svc->stats);
1471 write_unlock_bh(&__ip_vs_svc_lock);
1472 return 0;
1473}
1474
1475static int ip_vs_zero_all(void)
1476{
1477 int idx;
1478 struct ip_vs_service *svc;
1479
1480 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1481 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1482 ip_vs_zero_service(svc);
1483 }
1484 }
1485
1486 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1487 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1488 ip_vs_zero_service(svc);
1489 }
1490 }
1491
1492 ip_vs_zero_stats(&ip_vs_stats);
1493 return 0;
1494}
1495
1496
1497static int
1498proc_do_defense_mode(ctl_table *table, int write,
1499 void __user *buffer, size_t *lenp, loff_t *ppos)
1500{
1501 int *valp = table->data;
1502 int val = *valp;
1503 int rc;
1504
1505 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1506 if (write && (*valp != val)) {
1507 if ((*valp < 0) || (*valp > 3)) {
1508 /* Restore the correct value */
1509 *valp = val;
1510 } else {
1511 update_defense_level();
1512 }
1513 }
1514 return rc;
1515}
1516
1517
1518static int
1519proc_do_sync_threshold(ctl_table *table, int write,
1520 void __user *buffer, size_t *lenp, loff_t *ppos)
1521{
1522 int *valp = table->data;
1523 int val[2];
1524 int rc;
1525
1526 /* backup the value first */
1527 memcpy(val, valp, sizeof(val));
1528
1529 rc = proc_dointvec(table, write, buffer, lenp, ppos);
1530 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1531 /* Restore the correct value */
1532 memcpy(valp, val, sizeof(val));
1533 }
1534 return rc;
1535}
1536
1537
1538/*
1539 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1540 */
1541
1542static struct ctl_table vs_vars[] = {
1543 {
1544 .procname = "amemthresh",
1545 .data = &sysctl_ip_vs_amemthresh,
1546 .maxlen = sizeof(int),
1547 .mode = 0644,
1548 .proc_handler = proc_dointvec,
1549 },
1550#ifdef CONFIG_IP_VS_DEBUG
1551 {
1552 .procname = "debug_level",
1553 .data = &sysctl_ip_vs_debug_level,
1554 .maxlen = sizeof(int),
1555 .mode = 0644,
1556 .proc_handler = proc_dointvec,
1557 },
1558#endif
1559 {
1560 .procname = "am_droprate",
1561 .data = &sysctl_ip_vs_am_droprate,
1562 .maxlen = sizeof(int),
1563 .mode = 0644,
1564 .proc_handler = proc_dointvec,
1565 },
1566 {
1567 .procname = "drop_entry",
1568 .data = &sysctl_ip_vs_drop_entry,
1569 .maxlen = sizeof(int),
1570 .mode = 0644,
1571 .proc_handler = proc_do_defense_mode,
1572 },
1573 {
1574 .procname = "drop_packet",
1575 .data = &sysctl_ip_vs_drop_packet,
1576 .maxlen = sizeof(int),
1577 .mode = 0644,
1578 .proc_handler = proc_do_defense_mode,
1579 },
1580#ifdef CONFIG_IP_VS_NFCT
1581 {
1582 .procname = "conntrack",
1583 .data = &sysctl_ip_vs_conntrack,
1584 .maxlen = sizeof(int),
1585 .mode = 0644,
1586 .proc_handler = &proc_dointvec,
1587 },
1588#endif
1589 {
1590 .procname = "secure_tcp",
1591 .data = &sysctl_ip_vs_secure_tcp,
1592 .maxlen = sizeof(int),
1593 .mode = 0644,
1594 .proc_handler = proc_do_defense_mode,
1595 },
1596 {
1597 .procname = "snat_reroute",
1598 .data = &sysctl_ip_vs_snat_reroute,
1599 .maxlen = sizeof(int),
1600 .mode = 0644,
1601 .proc_handler = &proc_dointvec,
1602 },
1603#if 0
1604 {
1605 .procname = "timeout_established",
1606 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1607 .maxlen = sizeof(int),
1608 .mode = 0644,
1609 .proc_handler = proc_dointvec_jiffies,
1610 },
1611 {
1612 .procname = "timeout_synsent",
1613 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1614 .maxlen = sizeof(int),
1615 .mode = 0644,
1616 .proc_handler = proc_dointvec_jiffies,
1617 },
1618 {
1619 .procname = "timeout_synrecv",
1620 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1621 .maxlen = sizeof(int),
1622 .mode = 0644,
1623 .proc_handler = proc_dointvec_jiffies,
1624 },
1625 {
1626 .procname = "timeout_finwait",
1627 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1628 .maxlen = sizeof(int),
1629 .mode = 0644,
1630 .proc_handler = proc_dointvec_jiffies,
1631 },
1632 {
1633 .procname = "timeout_timewait",
1634 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1635 .maxlen = sizeof(int),
1636 .mode = 0644,
1637 .proc_handler = proc_dointvec_jiffies,
1638 },
1639 {
1640 .procname = "timeout_close",
1641 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1642 .maxlen = sizeof(int),
1643 .mode = 0644,
1644 .proc_handler = proc_dointvec_jiffies,
1645 },
1646 {
1647 .procname = "timeout_closewait",
1648 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1649 .maxlen = sizeof(int),
1650 .mode = 0644,
1651 .proc_handler = proc_dointvec_jiffies,
1652 },
1653 {
1654 .procname = "timeout_lastack",
1655 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1656 .maxlen = sizeof(int),
1657 .mode = 0644,
1658 .proc_handler = proc_dointvec_jiffies,
1659 },
1660 {
1661 .procname = "timeout_listen",
1662 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1663 .maxlen = sizeof(int),
1664 .mode = 0644,
1665 .proc_handler = proc_dointvec_jiffies,
1666 },
1667 {
1668 .procname = "timeout_synack",
1669 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1670 .maxlen = sizeof(int),
1671 .mode = 0644,
1672 .proc_handler = proc_dointvec_jiffies,
1673 },
1674 {
1675 .procname = "timeout_udp",
1676 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1677 .maxlen = sizeof(int),
1678 .mode = 0644,
1679 .proc_handler = proc_dointvec_jiffies,
1680 },
1681 {
1682 .procname = "timeout_icmp",
1683 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1684 .maxlen = sizeof(int),
1685 .mode = 0644,
1686 .proc_handler = proc_dointvec_jiffies,
1687 },
1688#endif
1689 {
1690 .procname = "cache_bypass",
1691 .data = &sysctl_ip_vs_cache_bypass,
1692 .maxlen = sizeof(int),
1693 .mode = 0644,
1694 .proc_handler = proc_dointvec,
1695 },
1696 {
1697 .procname = "expire_nodest_conn",
1698 .data = &sysctl_ip_vs_expire_nodest_conn,
1699 .maxlen = sizeof(int),
1700 .mode = 0644,
1701 .proc_handler = proc_dointvec,
1702 },
1703 {
1704 .procname = "expire_quiescent_template",
1705 .data = &sysctl_ip_vs_expire_quiescent_template,
1706 .maxlen = sizeof(int),
1707 .mode = 0644,
1708 .proc_handler = proc_dointvec,
1709 },
1710 {
1711 .procname = "sync_threshold",
1712 .data = &sysctl_ip_vs_sync_threshold,
1713 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1714 .mode = 0644,
1715 .proc_handler = proc_do_sync_threshold,
1716 },
1717 {
1718 .procname = "nat_icmp_send",
1719 .data = &sysctl_ip_vs_nat_icmp_send,
1720 .maxlen = sizeof(int),
1721 .mode = 0644,
1722 .proc_handler = proc_dointvec,
1723 },
1724 { }
1725};
1726
1727const struct ctl_path net_vs_ctl_path[] = {
1728 { .procname = "net", },
1729 { .procname = "ipv4", },
1730 { .procname = "vs", },
1731 { }
1732};
1733EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1734
1735static struct ctl_table_header * sysctl_header;
1736
1737#ifdef CONFIG_PROC_FS
1738
1739struct ip_vs_iter {
1740 struct list_head *table;
1741 int bucket;
1742};
1743
1744/*
1745 * Write the contents of the VS rule table to a PROCfs file.
1746 * (It is kept just for backward compatibility)
1747 */
1748static inline const char *ip_vs_fwd_name(unsigned flags)
1749{
1750 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1751 case IP_VS_CONN_F_LOCALNODE:
1752 return "Local";
1753 case IP_VS_CONN_F_TUNNEL:
1754 return "Tunnel";
1755 case IP_VS_CONN_F_DROUTE:
1756 return "Route";
1757 default:
1758 return "Masq";
1759 }
1760}
1761
1762
1763/* Get the Nth entry in the two lists */
1764static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1765{
1766 struct ip_vs_iter *iter = seq->private;
1767 int idx;
1768 struct ip_vs_service *svc;
1769
1770 /* look in hash by protocol */
1771 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1772 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1773 if (pos-- == 0){
1774 iter->table = ip_vs_svc_table;
1775 iter->bucket = idx;
1776 return svc;
1777 }
1778 }
1779 }
1780
1781 /* keep looking in fwmark */
1782 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1783 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1784 if (pos-- == 0) {
1785 iter->table = ip_vs_svc_fwm_table;
1786 iter->bucket = idx;
1787 return svc;
1788 }
1789 }
1790 }
1791
1792 return NULL;
1793}
1794
1795static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1796__acquires(__ip_vs_svc_lock)
1797{
1798
1799 read_lock_bh(&__ip_vs_svc_lock);
1800 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1801}
1802
1803
1804static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1805{
1806 struct list_head *e;
1807 struct ip_vs_iter *iter;
1808 struct ip_vs_service *svc;
1809
1810 ++*pos;
1811 if (v == SEQ_START_TOKEN)
1812 return ip_vs_info_array(seq,0);
1813
1814 svc = v;
1815 iter = seq->private;
1816
1817 if (iter->table == ip_vs_svc_table) {
1818 /* next service in table hashed by protocol */
1819 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1820 return list_entry(e, struct ip_vs_service, s_list);
1821
1822
1823 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1824 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1825 s_list) {
1826 return svc;
1827 }
1828 }
1829
1830 iter->table = ip_vs_svc_fwm_table;
1831 iter->bucket = -1;
1832 goto scan_fwmark;
1833 }
1834
1835 /* next service in hashed by fwmark */
1836 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1837 return list_entry(e, struct ip_vs_service, f_list);
1838
1839 scan_fwmark:
1840 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1841 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1842 f_list)
1843 return svc;
1844 }
1845
1846 return NULL;
1847}
1848
1849static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1850__releases(__ip_vs_svc_lock)
1851{
1852 read_unlock_bh(&__ip_vs_svc_lock);
1853}
1854
1855
1856static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1857{
1858 if (v == SEQ_START_TOKEN) {
1859 seq_printf(seq,
1860 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1861 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
1862 seq_puts(seq,
1863 "Prot LocalAddress:Port Scheduler Flags\n");
1864 seq_puts(seq,
1865 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1866 } else {
1867 const struct ip_vs_service *svc = v;
1868 const struct ip_vs_iter *iter = seq->private;
1869 const struct ip_vs_dest *dest;
1870
1871 if (iter->table == ip_vs_svc_table) {
1872#ifdef CONFIG_IP_VS_IPV6
1873 if (svc->af == AF_INET6)
1874 seq_printf(seq, "%s [%pI6]:%04X %s ",
1875 ip_vs_proto_name(svc->protocol),
1876 &svc->addr.in6,
1877 ntohs(svc->port),
1878 svc->scheduler->name);
1879 else
1880#endif
1881 seq_printf(seq, "%s %08X:%04X %s %s ",
1882 ip_vs_proto_name(svc->protocol),
1883 ntohl(svc->addr.ip),
1884 ntohs(svc->port),
1885 svc->scheduler->name,
1886 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1887 } else {
1888 seq_printf(seq, "FWM %08X %s %s",
1889 svc->fwmark, svc->scheduler->name,
1890 (svc->flags & IP_VS_SVC_F_ONEPACKET)?"ops ":"");
1891 }
1892
1893 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1894 seq_printf(seq, "persistent %d %08X\n",
1895 svc->timeout,
1896 ntohl(svc->netmask));
1897 else
1898 seq_putc(seq, '\n');
1899
1900 list_for_each_entry(dest, &svc->destinations, n_list) {
1901#ifdef CONFIG_IP_VS_IPV6
1902 if (dest->af == AF_INET6)
1903 seq_printf(seq,
1904 " -> [%pI6]:%04X"
1905 " %-7s %-6d %-10d %-10d\n",
1906 &dest->addr.in6,
1907 ntohs(dest->port),
1908 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1909 atomic_read(&dest->weight),
1910 atomic_read(&dest->activeconns),
1911 atomic_read(&dest->inactconns));
1912 else
1913#endif
1914 seq_printf(seq,
1915 " -> %08X:%04X "
1916 "%-7s %-6d %-10d %-10d\n",
1917 ntohl(dest->addr.ip),
1918 ntohs(dest->port),
1919 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1920 atomic_read(&dest->weight),
1921 atomic_read(&dest->activeconns),
1922 atomic_read(&dest->inactconns));
1923
1924 }
1925 }
1926 return 0;
1927}
1928
1929static const struct seq_operations ip_vs_info_seq_ops = {
1930 .start = ip_vs_info_seq_start,
1931 .next = ip_vs_info_seq_next,
1932 .stop = ip_vs_info_seq_stop,
1933 .show = ip_vs_info_seq_show,
1934};
1935
1936static int ip_vs_info_open(struct inode *inode, struct file *file)
1937{
1938 return seq_open_private(file, &ip_vs_info_seq_ops,
1939 sizeof(struct ip_vs_iter));
1940}
1941
1942static const struct file_operations ip_vs_info_fops = {
1943 .owner = THIS_MODULE,
1944 .open = ip_vs_info_open,
1945 .read = seq_read,
1946 .llseek = seq_lseek,
1947 .release = seq_release_private,
1948};
1949
1950#endif
1951
1952struct ip_vs_stats ip_vs_stats = {
1953 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1954};
1955
1956#ifdef CONFIG_PROC_FS
1957static int ip_vs_stats_show(struct seq_file *seq, void *v)
1958{
1959
1960/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1961 seq_puts(seq,
1962 " Total Incoming Outgoing Incoming Outgoing\n");
1963 seq_printf(seq,
1964 " Conns Packets Packets Bytes Bytes\n");
1965
1966 spin_lock_bh(&ip_vs_stats.lock);
1967 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.ustats.conns,
1968 ip_vs_stats.ustats.inpkts, ip_vs_stats.ustats.outpkts,
1969 (unsigned long long) ip_vs_stats.ustats.inbytes,
1970 (unsigned long long) ip_vs_stats.ustats.outbytes);
1971
1972/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1973 seq_puts(seq,
1974 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1975 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1976 ip_vs_stats.ustats.cps,
1977 ip_vs_stats.ustats.inpps,
1978 ip_vs_stats.ustats.outpps,
1979 ip_vs_stats.ustats.inbps,
1980 ip_vs_stats.ustats.outbps);
1981 spin_unlock_bh(&ip_vs_stats.lock);
1982
1983 return 0;
1984}
1985
1986static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1987{
1988 return single_open(file, ip_vs_stats_show, NULL);
1989}
1990
1991static const struct file_operations ip_vs_stats_fops = {
1992 .owner = THIS_MODULE,
1993 .open = ip_vs_stats_seq_open,
1994 .read = seq_read,
1995 .llseek = seq_lseek,
1996 .release = single_release,
1997};
1998
1999#endif
2000
2001/*
2002 * Set timeout values for tcp tcpfin udp in the timeout_table.
2003 */
2004static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2005{
2006 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2007 u->tcp_timeout,
2008 u->tcp_fin_timeout,
2009 u->udp_timeout);
2010
2011#ifdef CONFIG_IP_VS_PROTO_TCP
2012 if (u->tcp_timeout) {
2013 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2014 = u->tcp_timeout * HZ;
2015 }
2016
2017 if (u->tcp_fin_timeout) {
2018 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2019 = u->tcp_fin_timeout * HZ;
2020 }
2021#endif
2022
2023#ifdef CONFIG_IP_VS_PROTO_UDP
2024 if (u->udp_timeout) {
2025 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2026 = u->udp_timeout * HZ;
2027 }
2028#endif
2029 return 0;
2030}
2031
2032
2033#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2034#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
2035#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
2036 sizeof(struct ip_vs_dest_user))
2037#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2038#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
2039#define MAX_ARG_LEN SVCDEST_ARG_LEN
2040
2041static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2042 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
2043 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
2044 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
2045 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
2046 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
2047 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
2048 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
2049 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
2050 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
2051 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
2052 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
2053};
2054
2055static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2056 struct ip_vs_service_user *usvc_compat)
2057{
2058 memset(usvc, 0, sizeof(*usvc));
2059
2060 usvc->af = AF_INET;
2061 usvc->protocol = usvc_compat->protocol;
2062 usvc->addr.ip = usvc_compat->addr;
2063 usvc->port = usvc_compat->port;
2064 usvc->fwmark = usvc_compat->fwmark;
2065
2066 /* Deep copy of sched_name is not needed here */
2067 usvc->sched_name = usvc_compat->sched_name;
2068
2069 usvc->flags = usvc_compat->flags;
2070 usvc->timeout = usvc_compat->timeout;
2071 usvc->netmask = usvc_compat->netmask;
2072}
2073
2074static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2075 struct ip_vs_dest_user *udest_compat)
2076{
2077 memset(udest, 0, sizeof(*udest));
2078
2079 udest->addr.ip = udest_compat->addr;
2080 udest->port = udest_compat->port;
2081 udest->conn_flags = udest_compat->conn_flags;
2082 udest->weight = udest_compat->weight;
2083 udest->u_threshold = udest_compat->u_threshold;
2084 udest->l_threshold = udest_compat->l_threshold;
2085}
2086
2087static int
2088do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2089{
2090 int ret;
2091 unsigned char arg[MAX_ARG_LEN];
2092 struct ip_vs_service_user *usvc_compat;
2093 struct ip_vs_service_user_kern usvc;
2094 struct ip_vs_service *svc;
2095 struct ip_vs_dest_user *udest_compat;
2096 struct ip_vs_dest_user_kern udest;
2097
2098 if (!capable(CAP_NET_ADMIN))
2099 return -EPERM;
2100
2101 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_SET_MAX)
2102 return -EINVAL;
2103 if (len < 0 || len > MAX_ARG_LEN)
2104 return -EINVAL;
2105 if (len != set_arglen[SET_CMDID(cmd)]) {
2106 pr_err("set_ctl: len %u != %u\n",
2107 len, set_arglen[SET_CMDID(cmd)]);
2108 return -EINVAL;
2109 }
2110
2111 if (copy_from_user(arg, user, len) != 0)
2112 return -EFAULT;
2113
2114 /* increase the module use count */
2115 ip_vs_use_count_inc();
2116
2117 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2118 ret = -ERESTARTSYS;
2119 goto out_dec;
2120 }
2121
2122 if (cmd == IP_VS_SO_SET_FLUSH) {
2123 /* Flush the virtual service */
2124 ret = ip_vs_flush();
2125 goto out_unlock;
2126 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2127 /* Set timeout values for (tcp tcpfin udp) */
2128 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2129 goto out_unlock;
2130 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2131 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2132 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2133 goto out_unlock;
2134 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2135 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2136 ret = stop_sync_thread(dm->state);
2137 goto out_unlock;
2138 }
2139
2140 usvc_compat = (struct ip_vs_service_user *)arg;
2141 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2142
2143 /* We only use the new structs internally, so copy userspace compat
2144 * structs to extended internal versions */
2145 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2146 ip_vs_copy_udest_compat(&udest, udest_compat);
2147
2148 if (cmd == IP_VS_SO_SET_ZERO) {
2149 /* if no service address is set, zero counters in all */
2150 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2151 ret = ip_vs_zero_all();
2152 goto out_unlock;
2153 }
2154 }
2155
2156 /* Check for valid protocol: TCP or UDP or SCTP, even for fwmark!=0 */
2157 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP &&
2158 usvc.protocol != IPPROTO_SCTP) {
2159 pr_err("set_ctl: invalid protocol: %d %pI4:%d %s\n",
2160 usvc.protocol, &usvc.addr.ip,
2161 ntohs(usvc.port), usvc.sched_name);
2162 ret = -EFAULT;
2163 goto out_unlock;
2164 }
2165
2166 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2167 if (usvc.fwmark == 0)
2168 svc = __ip_vs_service_find(usvc.af, usvc.protocol,
2169 &usvc.addr, usvc.port);
2170 else
2171 svc = __ip_vs_svc_fwm_find(usvc.af, usvc.fwmark);
2172
2173 if (cmd != IP_VS_SO_SET_ADD
2174 && (svc == NULL || svc->protocol != usvc.protocol)) {
2175 ret = -ESRCH;
2176 goto out_unlock;
2177 }
2178
2179 switch (cmd) {
2180 case IP_VS_SO_SET_ADD:
2181 if (svc != NULL)
2182 ret = -EEXIST;
2183 else
2184 ret = ip_vs_add_service(&usvc, &svc);
2185 break;
2186 case IP_VS_SO_SET_EDIT:
2187 ret = ip_vs_edit_service(svc, &usvc);
2188 break;
2189 case IP_VS_SO_SET_DEL:
2190 ret = ip_vs_del_service(svc);
2191 if (!ret)
2192 goto out_unlock;
2193 break;
2194 case IP_VS_SO_SET_ZERO:
2195 ret = ip_vs_zero_service(svc);
2196 break;
2197 case IP_VS_SO_SET_ADDDEST:
2198 ret = ip_vs_add_dest(svc, &udest);
2199 break;
2200 case IP_VS_SO_SET_EDITDEST:
2201 ret = ip_vs_edit_dest(svc, &udest);
2202 break;
2203 case IP_VS_SO_SET_DELDEST:
2204 ret = ip_vs_del_dest(svc, &udest);
2205 break;
2206 default:
2207 ret = -EINVAL;
2208 }
2209
2210 out_unlock:
2211 mutex_unlock(&__ip_vs_mutex);
2212 out_dec:
2213 /* decrease the module use count */
2214 ip_vs_use_count_dec();
2215
2216 return ret;
2217}
2218
2219
2220static void
2221ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2222{
2223 spin_lock_bh(&src->lock);
2224 memcpy(dst, &src->ustats, sizeof(*dst));
2225 spin_unlock_bh(&src->lock);
2226}
2227
2228static void
2229ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2230{
2231 dst->protocol = src->protocol;
2232 dst->addr = src->addr.ip;
2233 dst->port = src->port;
2234 dst->fwmark = src->fwmark;
2235 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2236 dst->flags = src->flags;
2237 dst->timeout = src->timeout / HZ;
2238 dst->netmask = src->netmask;
2239 dst->num_dests = src->num_dests;
2240 ip_vs_copy_stats(&dst->stats, &src->stats);
2241}
2242
2243static inline int
2244__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2245 struct ip_vs_get_services __user *uptr)
2246{
2247 int idx, count=0;
2248 struct ip_vs_service *svc;
2249 struct ip_vs_service_entry entry;
2250 int ret = 0;
2251
2252 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2253 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2254 /* Only expose IPv4 entries to old interface */
2255 if (svc->af != AF_INET)
2256 continue;
2257
2258 if (count >= get->num_services)
2259 goto out;
2260 memset(&entry, 0, sizeof(entry));
2261 ip_vs_copy_service(&entry, svc);
2262 if (copy_to_user(&uptr->entrytable[count],
2263 &entry, sizeof(entry))) {
2264 ret = -EFAULT;
2265 goto out;
2266 }
2267 count++;
2268 }
2269 }
2270
2271 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2272 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2273 /* Only expose IPv4 entries to old interface */
2274 if (svc->af != AF_INET)
2275 continue;
2276
2277 if (count >= get->num_services)
2278 goto out;
2279 memset(&entry, 0, sizeof(entry));
2280 ip_vs_copy_service(&entry, svc);
2281 if (copy_to_user(&uptr->entrytable[count],
2282 &entry, sizeof(entry))) {
2283 ret = -EFAULT;
2284 goto out;
2285 }
2286 count++;
2287 }
2288 }
2289 out:
2290 return ret;
2291}
2292
2293static inline int
2294__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2295 struct ip_vs_get_dests __user *uptr)
2296{
2297 struct ip_vs_service *svc;
2298 union nf_inet_addr addr = { .ip = get->addr };
2299 int ret = 0;
2300
2301 if (get->fwmark)
2302 svc = __ip_vs_svc_fwm_find(AF_INET, get->fwmark);
2303 else
2304 svc = __ip_vs_service_find(AF_INET, get->protocol, &addr,
2305 get->port);
2306
2307 if (svc) {
2308 int count = 0;
2309 struct ip_vs_dest *dest;
2310 struct ip_vs_dest_entry entry;
2311
2312 list_for_each_entry(dest, &svc->destinations, n_list) {
2313 if (count >= get->num_dests)
2314 break;
2315
2316 entry.addr = dest->addr.ip;
2317 entry.port = dest->port;
2318 entry.conn_flags = atomic_read(&dest->conn_flags);
2319 entry.weight = atomic_read(&dest->weight);
2320 entry.u_threshold = dest->u_threshold;
2321 entry.l_threshold = dest->l_threshold;
2322 entry.activeconns = atomic_read(&dest->activeconns);
2323 entry.inactconns = atomic_read(&dest->inactconns);
2324 entry.persistconns = atomic_read(&dest->persistconns);
2325 ip_vs_copy_stats(&entry.stats, &dest->stats);
2326 if (copy_to_user(&uptr->entrytable[count],
2327 &entry, sizeof(entry))) {
2328 ret = -EFAULT;
2329 break;
2330 }
2331 count++;
2332 }
2333 } else
2334 ret = -ESRCH;
2335 return ret;
2336}
2337
2338static inline void
2339__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2340{
2341#ifdef CONFIG_IP_VS_PROTO_TCP
2342 u->tcp_timeout =
2343 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2344 u->tcp_fin_timeout =
2345 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2346#endif
2347#ifdef CONFIG_IP_VS_PROTO_UDP
2348 u->udp_timeout =
2349 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2350#endif
2351}
2352
2353
2354#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2355#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2356#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2357#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2358#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2359#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2360#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2361
2362static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2363 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2364 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2365 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2366 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2367 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2368 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2369 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2370};
2371
2372static int
2373do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2374{
2375 unsigned char arg[128];
2376 int ret = 0;
2377 unsigned int copylen;
2378
2379 if (!capable(CAP_NET_ADMIN))
2380 return -EPERM;
2381
2382 if (cmd < IP_VS_BASE_CTL || cmd > IP_VS_SO_GET_MAX)
2383 return -EINVAL;
2384
2385 if (*len < get_arglen[GET_CMDID(cmd)]) {
2386 pr_err("get_ctl: len %u < %u\n",
2387 *len, get_arglen[GET_CMDID(cmd)]);
2388 return -EINVAL;
2389 }
2390
2391 copylen = get_arglen[GET_CMDID(cmd)];
2392 if (copylen > 128)
2393 return -EINVAL;
2394
2395 if (copy_from_user(arg, user, copylen) != 0)
2396 return -EFAULT;
2397
2398 if (mutex_lock_interruptible(&__ip_vs_mutex))
2399 return -ERESTARTSYS;
2400
2401 switch (cmd) {
2402 case IP_VS_SO_GET_VERSION:
2403 {
2404 char buf[64];
2405
2406 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2407 NVERSION(IP_VS_VERSION_CODE), ip_vs_conn_tab_size);
2408 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2409 ret = -EFAULT;
2410 goto out;
2411 }
2412 *len = strlen(buf)+1;
2413 }
2414 break;
2415
2416 case IP_VS_SO_GET_INFO:
2417 {
2418 struct ip_vs_getinfo info;
2419 info.version = IP_VS_VERSION_CODE;
2420 info.size = ip_vs_conn_tab_size;
2421 info.num_services = ip_vs_num_services;
2422 if (copy_to_user(user, &info, sizeof(info)) != 0)
2423 ret = -EFAULT;
2424 }
2425 break;
2426
2427 case IP_VS_SO_GET_SERVICES:
2428 {
2429 struct ip_vs_get_services *get;
2430 int size;
2431
2432 get = (struct ip_vs_get_services *)arg;
2433 size = sizeof(*get) +
2434 sizeof(struct ip_vs_service_entry) * get->num_services;
2435 if (*len != size) {
2436 pr_err("length: %u != %u\n", *len, size);
2437 ret = -EINVAL;
2438 goto out;
2439 }
2440 ret = __ip_vs_get_service_entries(get, user);
2441 }
2442 break;
2443
2444 case IP_VS_SO_GET_SERVICE:
2445 {
2446 struct ip_vs_service_entry *entry;
2447 struct ip_vs_service *svc;
2448 union nf_inet_addr addr;
2449
2450 entry = (struct ip_vs_service_entry *)arg;
2451 addr.ip = entry->addr;
2452 if (entry->fwmark)
2453 svc = __ip_vs_svc_fwm_find(AF_INET, entry->fwmark);
2454 else
2455 svc = __ip_vs_service_find(AF_INET, entry->protocol,
2456 &addr, entry->port);
2457 if (svc) {
2458 ip_vs_copy_service(entry, svc);
2459 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2460 ret = -EFAULT;
2461 } else
2462 ret = -ESRCH;
2463 }
2464 break;
2465
2466 case IP_VS_SO_GET_DESTS:
2467 {
2468 struct ip_vs_get_dests *get;
2469 int size;
2470
2471 get = (struct ip_vs_get_dests *)arg;
2472 size = sizeof(*get) +
2473 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2474 if (*len != size) {
2475 pr_err("length: %u != %u\n", *len, size);
2476 ret = -EINVAL;
2477 goto out;
2478 }
2479 ret = __ip_vs_get_dest_entries(get, user);
2480 }
2481 break;
2482
2483 case IP_VS_SO_GET_TIMEOUT:
2484 {
2485 struct ip_vs_timeout_user t;
2486
2487 __ip_vs_get_timeouts(&t);
2488 if (copy_to_user(user, &t, sizeof(t)) != 0)
2489 ret = -EFAULT;
2490 }
2491 break;
2492
2493 case IP_VS_SO_GET_DAEMON:
2494 {
2495 struct ip_vs_daemon_user d[2];
2496
2497 memset(&d, 0, sizeof(d));
2498 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2499 d[0].state = IP_VS_STATE_MASTER;
2500 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2501 d[0].syncid = ip_vs_master_syncid;
2502 }
2503 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2504 d[1].state = IP_VS_STATE_BACKUP;
2505 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2506 d[1].syncid = ip_vs_backup_syncid;
2507 }
2508 if (copy_to_user(user, &d, sizeof(d)) != 0)
2509 ret = -EFAULT;
2510 }
2511 break;
2512
2513 default:
2514 ret = -EINVAL;
2515 }
2516
2517 out:
2518 mutex_unlock(&__ip_vs_mutex);
2519 return ret;
2520}
2521
2522
2523static struct nf_sockopt_ops ip_vs_sockopts = {
2524 .pf = PF_INET,
2525 .set_optmin = IP_VS_BASE_CTL,
2526 .set_optmax = IP_VS_SO_SET_MAX+1,
2527 .set = do_ip_vs_set_ctl,
2528 .get_optmin = IP_VS_BASE_CTL,
2529 .get_optmax = IP_VS_SO_GET_MAX+1,
2530 .get = do_ip_vs_get_ctl,
2531 .owner = THIS_MODULE,
2532};
2533
2534/*
2535 * Generic Netlink interface
2536 */
2537
2538/* IPVS genetlink family */
2539static struct genl_family ip_vs_genl_family = {
2540 .id = GENL_ID_GENERATE,
2541 .hdrsize = 0,
2542 .name = IPVS_GENL_NAME,
2543 .version = IPVS_GENL_VERSION,
2544 .maxattr = IPVS_CMD_MAX,
2545};
2546
2547/* Policy used for first-level command attributes */
2548static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2549 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2550 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2551 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2552 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2553 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2554 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2555};
2556
2557/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2558static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2559 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2560 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2561 .len = IP_VS_IFNAME_MAXLEN },
2562 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2563};
2564
2565/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2566static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2567 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2568 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2569 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2570 .len = sizeof(union nf_inet_addr) },
2571 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2572 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2573 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2574 .len = IP_VS_SCHEDNAME_MAXLEN },
2575 [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING,
2576 .len = IP_VS_PENAME_MAXLEN },
2577 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2578 .len = sizeof(struct ip_vs_flags) },
2579 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2580 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2581 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2582};
2583
2584/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2585static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2586 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2587 .len = sizeof(union nf_inet_addr) },
2588 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2589 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2590 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2591 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2592 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2593 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2594 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2595 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2596 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2597};
2598
2599static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2600 struct ip_vs_stats *stats)
2601{
2602 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2603 if (!nl_stats)
2604 return -EMSGSIZE;
2605
2606 spin_lock_bh(&stats->lock);
2607
2608 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
2609 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
2610 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
2611 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
2612 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
2613 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
2614 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
2615 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
2616 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
2617 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
2618
2619 spin_unlock_bh(&stats->lock);
2620
2621 nla_nest_end(skb, nl_stats);
2622
2623 return 0;
2624
2625nla_put_failure:
2626 spin_unlock_bh(&stats->lock);
2627 nla_nest_cancel(skb, nl_stats);
2628 return -EMSGSIZE;
2629}
2630
2631static int ip_vs_genl_fill_service(struct sk_buff *skb,
2632 struct ip_vs_service *svc)
2633{
2634 struct nlattr *nl_service;
2635 struct ip_vs_flags flags = { .flags = svc->flags,
2636 .mask = ~0 };
2637
2638 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2639 if (!nl_service)
2640 return -EMSGSIZE;
2641
2642 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2643
2644 if (svc->fwmark) {
2645 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2646 } else {
2647 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2648 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2649 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2650 }
2651
2652 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2653 if (svc->pe)
2654 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name);
2655 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2656 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2657 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2658
2659 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2660 goto nla_put_failure;
2661
2662 nla_nest_end(skb, nl_service);
2663
2664 return 0;
2665
2666nla_put_failure:
2667 nla_nest_cancel(skb, nl_service);
2668 return -EMSGSIZE;
2669}
2670
2671static int ip_vs_genl_dump_service(struct sk_buff *skb,
2672 struct ip_vs_service *svc,
2673 struct netlink_callback *cb)
2674{
2675 void *hdr;
2676
2677 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2678 &ip_vs_genl_family, NLM_F_MULTI,
2679 IPVS_CMD_NEW_SERVICE);
2680 if (!hdr)
2681 return -EMSGSIZE;
2682
2683 if (ip_vs_genl_fill_service(skb, svc) < 0)
2684 goto nla_put_failure;
2685
2686 return genlmsg_end(skb, hdr);
2687
2688nla_put_failure:
2689 genlmsg_cancel(skb, hdr);
2690 return -EMSGSIZE;
2691}
2692
2693static int ip_vs_genl_dump_services(struct sk_buff *skb,
2694 struct netlink_callback *cb)
2695{
2696 int idx = 0, i;
2697 int start = cb->args[0];
2698 struct ip_vs_service *svc;
2699
2700 mutex_lock(&__ip_vs_mutex);
2701 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2702 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2703 if (++idx <= start)
2704 continue;
2705 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2706 idx--;
2707 goto nla_put_failure;
2708 }
2709 }
2710 }
2711
2712 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2713 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2714 if (++idx <= start)
2715 continue;
2716 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2717 idx--;
2718 goto nla_put_failure;
2719 }
2720 }
2721 }
2722
2723nla_put_failure:
2724 mutex_unlock(&__ip_vs_mutex);
2725 cb->args[0] = idx;
2726
2727 return skb->len;
2728}
2729
2730static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2731 struct nlattr *nla, int full_entry,
2732 struct ip_vs_service **ret_svc)
2733{
2734 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2735 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2736 struct ip_vs_service *svc;
2737
2738 /* Parse mandatory identifying service fields first */
2739 if (nla == NULL ||
2740 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2741 return -EINVAL;
2742
2743 nla_af = attrs[IPVS_SVC_ATTR_AF];
2744 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2745 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2746 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2747 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2748
2749 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2750 return -EINVAL;
2751
2752 memset(usvc, 0, sizeof(*usvc));
2753
2754 usvc->af = nla_get_u16(nla_af);
2755#ifdef CONFIG_IP_VS_IPV6
2756 if (usvc->af != AF_INET && usvc->af != AF_INET6)
2757#else
2758 if (usvc->af != AF_INET)
2759#endif
2760 return -EAFNOSUPPORT;
2761
2762 if (nla_fwmark) {
2763 usvc->protocol = IPPROTO_TCP;
2764 usvc->fwmark = nla_get_u32(nla_fwmark);
2765 } else {
2766 usvc->protocol = nla_get_u16(nla_protocol);
2767 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2768 usvc->port = nla_get_u16(nla_port);
2769 usvc->fwmark = 0;
2770 }
2771
2772 if (usvc->fwmark)
2773 svc = __ip_vs_svc_fwm_find(usvc->af, usvc->fwmark);
2774 else
2775 svc = __ip_vs_service_find(usvc->af, usvc->protocol,
2776 &usvc->addr, usvc->port);
2777 *ret_svc = svc;
2778
2779 /* If a full entry was requested, check for the additional fields */
2780 if (full_entry) {
2781 struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout,
2782 *nla_netmask;
2783 struct ip_vs_flags flags;
2784
2785 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2786 nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME];
2787 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2788 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2789 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2790
2791 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2792 return -EINVAL;
2793
2794 nla_memcpy(&flags, nla_flags, sizeof(flags));
2795
2796 /* prefill flags from service if it already exists */
2797 if (svc)
2798 usvc->flags = svc->flags;
2799
2800 /* set new flags from userland */
2801 usvc->flags = (usvc->flags & ~flags.mask) |
2802 (flags.flags & flags.mask);
2803 usvc->sched_name = nla_data(nla_sched);
2804 usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL;
2805 usvc->timeout = nla_get_u32(nla_timeout);
2806 usvc->netmask = nla_get_u32(nla_netmask);
2807 }
2808
2809 return 0;
2810}
2811
2812static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2813{
2814 struct ip_vs_service_user_kern usvc;
2815 struct ip_vs_service *svc;
2816 int ret;
2817
2818 ret = ip_vs_genl_parse_service(&usvc, nla, 0, &svc);
2819 return ret ? ERR_PTR(ret) : svc;
2820}
2821
2822static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2823{
2824 struct nlattr *nl_dest;
2825
2826 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2827 if (!nl_dest)
2828 return -EMSGSIZE;
2829
2830 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2831 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2832
2833 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2834 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2835 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2836 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2837 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2838 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2839 atomic_read(&dest->activeconns));
2840 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2841 atomic_read(&dest->inactconns));
2842 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2843 atomic_read(&dest->persistconns));
2844
2845 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2846 goto nla_put_failure;
2847
2848 nla_nest_end(skb, nl_dest);
2849
2850 return 0;
2851
2852nla_put_failure:
2853 nla_nest_cancel(skb, nl_dest);
2854 return -EMSGSIZE;
2855}
2856
2857static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2858 struct netlink_callback *cb)
2859{
2860 void *hdr;
2861
2862 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2863 &ip_vs_genl_family, NLM_F_MULTI,
2864 IPVS_CMD_NEW_DEST);
2865 if (!hdr)
2866 return -EMSGSIZE;
2867
2868 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2869 goto nla_put_failure;
2870
2871 return genlmsg_end(skb, hdr);
2872
2873nla_put_failure:
2874 genlmsg_cancel(skb, hdr);
2875 return -EMSGSIZE;
2876}
2877
2878static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2879 struct netlink_callback *cb)
2880{
2881 int idx = 0;
2882 int start = cb->args[0];
2883 struct ip_vs_service *svc;
2884 struct ip_vs_dest *dest;
2885 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2886
2887 mutex_lock(&__ip_vs_mutex);
2888
2889 /* Try to find the service for which to dump destinations */
2890 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2891 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2892 goto out_err;
2893
2894 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2895 if (IS_ERR(svc) || svc == NULL)
2896 goto out_err;
2897
2898 /* Dump the destinations */
2899 list_for_each_entry(dest, &svc->destinations, n_list) {
2900 if (++idx <= start)
2901 continue;
2902 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2903 idx--;
2904 goto nla_put_failure;
2905 }
2906 }
2907
2908nla_put_failure:
2909 cb->args[0] = idx;
2910
2911out_err:
2912 mutex_unlock(&__ip_vs_mutex);
2913
2914 return skb->len;
2915}
2916
2917static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2918 struct nlattr *nla, int full_entry)
2919{
2920 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2921 struct nlattr *nla_addr, *nla_port;
2922
2923 /* Parse mandatory identifying destination fields first */
2924 if (nla == NULL ||
2925 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2926 return -EINVAL;
2927
2928 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2929 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2930
2931 if (!(nla_addr && nla_port))
2932 return -EINVAL;
2933
2934 memset(udest, 0, sizeof(*udest));
2935
2936 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2937 udest->port = nla_get_u16(nla_port);
2938
2939 /* If a full entry was requested, check for the additional fields */
2940 if (full_entry) {
2941 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2942 *nla_l_thresh;
2943
2944 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2945 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2946 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2947 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2948
2949 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2950 return -EINVAL;
2951
2952 udest->conn_flags = nla_get_u32(nla_fwd)
2953 & IP_VS_CONN_F_FWD_MASK;
2954 udest->weight = nla_get_u32(nla_weight);
2955 udest->u_threshold = nla_get_u32(nla_u_thresh);
2956 udest->l_threshold = nla_get_u32(nla_l_thresh);
2957 }
2958
2959 return 0;
2960}
2961
2962static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2963 const char *mcast_ifn, __be32 syncid)
2964{
2965 struct nlattr *nl_daemon;
2966
2967 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2968 if (!nl_daemon)
2969 return -EMSGSIZE;
2970
2971 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2972 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2973 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2974
2975 nla_nest_end(skb, nl_daemon);
2976
2977 return 0;
2978
2979nla_put_failure:
2980 nla_nest_cancel(skb, nl_daemon);
2981 return -EMSGSIZE;
2982}
2983
2984static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2985 const char *mcast_ifn, __be32 syncid,
2986 struct netlink_callback *cb)
2987{
2988 void *hdr;
2989 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2990 &ip_vs_genl_family, NLM_F_MULTI,
2991 IPVS_CMD_NEW_DAEMON);
2992 if (!hdr)
2993 return -EMSGSIZE;
2994
2995 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2996 goto nla_put_failure;
2997
2998 return genlmsg_end(skb, hdr);
2999
3000nla_put_failure:
3001 genlmsg_cancel(skb, hdr);
3002 return -EMSGSIZE;
3003}
3004
3005static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
3006 struct netlink_callback *cb)
3007{
3008 mutex_lock(&__ip_vs_mutex);
3009 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
3010 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
3011 ip_vs_master_mcast_ifn,
3012 ip_vs_master_syncid, cb) < 0)
3013 goto nla_put_failure;
3014
3015 cb->args[0] = 1;
3016 }
3017
3018 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3019 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3020 ip_vs_backup_mcast_ifn,
3021 ip_vs_backup_syncid, cb) < 0)
3022 goto nla_put_failure;
3023
3024 cb->args[1] = 1;
3025 }
3026
3027nla_put_failure:
3028 mutex_unlock(&__ip_vs_mutex);
3029
3030 return skb->len;
3031}
3032
3033static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3034{
3035 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3036 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3037 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3038 return -EINVAL;
3039
3040 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3041 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3042 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3043}
3044
3045static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3046{
3047 if (!attrs[IPVS_DAEMON_ATTR_STATE])
3048 return -EINVAL;
3049
3050 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3051}
3052
3053static int ip_vs_genl_set_config(struct nlattr **attrs)
3054{
3055 struct ip_vs_timeout_user t;
3056
3057 __ip_vs_get_timeouts(&t);
3058
3059 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3060 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3061
3062 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3063 t.tcp_fin_timeout =
3064 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3065
3066 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3067 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3068
3069 return ip_vs_set_timeout(&t);
3070}
3071
3072static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3073{
3074 struct ip_vs_service *svc = NULL;
3075 struct ip_vs_service_user_kern usvc;
3076 struct ip_vs_dest_user_kern udest;
3077 int ret = 0, cmd;
3078 int need_full_svc = 0, need_full_dest = 0;
3079
3080 cmd = info->genlhdr->cmd;
3081
3082 mutex_lock(&__ip_vs_mutex);
3083
3084 if (cmd == IPVS_CMD_FLUSH) {
3085 ret = ip_vs_flush();
3086 goto out;
3087 } else if (cmd == IPVS_CMD_SET_CONFIG) {
3088 ret = ip_vs_genl_set_config(info->attrs);
3089 goto out;
3090 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3091 cmd == IPVS_CMD_DEL_DAEMON) {
3092
3093 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3094
3095 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3096 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3097 info->attrs[IPVS_CMD_ATTR_DAEMON],
3098 ip_vs_daemon_policy)) {
3099 ret = -EINVAL;
3100 goto out;
3101 }
3102
3103 if (cmd == IPVS_CMD_NEW_DAEMON)
3104 ret = ip_vs_genl_new_daemon(daemon_attrs);
3105 else
3106 ret = ip_vs_genl_del_daemon(daemon_attrs);
3107 goto out;
3108 } else if (cmd == IPVS_CMD_ZERO &&
3109 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3110 ret = ip_vs_zero_all();
3111 goto out;
3112 }
3113
3114 /* All following commands require a service argument, so check if we
3115 * received a valid one. We need a full service specification when
3116 * adding / editing a service. Only identifying members otherwise. */
3117 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3118 need_full_svc = 1;
3119
3120 ret = ip_vs_genl_parse_service(&usvc,
3121 info->attrs[IPVS_CMD_ATTR_SERVICE],
3122 need_full_svc, &svc);
3123 if (ret)
3124 goto out;
3125
3126 /* Unless we're adding a new service, the service must already exist */
3127 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3128 ret = -ESRCH;
3129 goto out;
3130 }
3131
3132 /* Destination commands require a valid destination argument. For
3133 * adding / editing a destination, we need a full destination
3134 * specification. */
3135 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3136 cmd == IPVS_CMD_DEL_DEST) {
3137 if (cmd != IPVS_CMD_DEL_DEST)
3138 need_full_dest = 1;
3139
3140 ret = ip_vs_genl_parse_dest(&udest,
3141 info->attrs[IPVS_CMD_ATTR_DEST],
3142 need_full_dest);
3143 if (ret)
3144 goto out;
3145 }
3146
3147 switch (cmd) {
3148 case IPVS_CMD_NEW_SERVICE:
3149 if (svc == NULL)
3150 ret = ip_vs_add_service(&usvc, &svc);
3151 else
3152 ret = -EEXIST;
3153 break;
3154 case IPVS_CMD_SET_SERVICE:
3155 ret = ip_vs_edit_service(svc, &usvc);
3156 break;
3157 case IPVS_CMD_DEL_SERVICE:
3158 ret = ip_vs_del_service(svc);
3159 /* do not use svc, it can be freed */
3160 break;
3161 case IPVS_CMD_NEW_DEST:
3162 ret = ip_vs_add_dest(svc, &udest);
3163 break;
3164 case IPVS_CMD_SET_DEST:
3165 ret = ip_vs_edit_dest(svc, &udest);
3166 break;
3167 case IPVS_CMD_DEL_DEST:
3168 ret = ip_vs_del_dest(svc, &udest);
3169 break;
3170 case IPVS_CMD_ZERO:
3171 ret = ip_vs_zero_service(svc);
3172 break;
3173 default:
3174 ret = -EINVAL;
3175 }
3176
3177out:
3178 mutex_unlock(&__ip_vs_mutex);
3179
3180 return ret;
3181}
3182
3183static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3184{
3185 struct sk_buff *msg;
3186 void *reply;
3187 int ret, cmd, reply_cmd;
3188
3189 cmd = info->genlhdr->cmd;
3190
3191 if (cmd == IPVS_CMD_GET_SERVICE)
3192 reply_cmd = IPVS_CMD_NEW_SERVICE;
3193 else if (cmd == IPVS_CMD_GET_INFO)
3194 reply_cmd = IPVS_CMD_SET_INFO;
3195 else if (cmd == IPVS_CMD_GET_CONFIG)
3196 reply_cmd = IPVS_CMD_SET_CONFIG;
3197 else {
3198 pr_err("unknown Generic Netlink command\n");
3199 return -EINVAL;
3200 }
3201
3202 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3203 if (!msg)
3204 return -ENOMEM;
3205
3206 mutex_lock(&__ip_vs_mutex);
3207
3208 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3209 if (reply == NULL)
3210 goto nla_put_failure;
3211
3212 switch (cmd) {
3213 case IPVS_CMD_GET_SERVICE:
3214 {
3215 struct ip_vs_service *svc;
3216
3217 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3218 if (IS_ERR(svc)) {
3219 ret = PTR_ERR(svc);
3220 goto out_err;
3221 } else if (svc) {
3222 ret = ip_vs_genl_fill_service(msg, svc);
3223 if (ret)
3224 goto nla_put_failure;
3225 } else {
3226 ret = -ESRCH;
3227 goto out_err;
3228 }
3229
3230 break;
3231 }
3232
3233 case IPVS_CMD_GET_CONFIG:
3234 {
3235 struct ip_vs_timeout_user t;
3236
3237 __ip_vs_get_timeouts(&t);
3238#ifdef CONFIG_IP_VS_PROTO_TCP
3239 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3240 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3241 t.tcp_fin_timeout);
3242#endif
3243#ifdef CONFIG_IP_VS_PROTO_UDP
3244 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3245#endif
3246
3247 break;
3248 }
3249
3250 case IPVS_CMD_GET_INFO:
3251 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3252 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3253 ip_vs_conn_tab_size);
3254 break;
3255 }
3256
3257 genlmsg_end(msg, reply);
3258 ret = genlmsg_reply(msg, info);
3259 goto out;
3260
3261nla_put_failure:
3262 pr_err("not enough space in Netlink message\n");
3263 ret = -EMSGSIZE;
3264
3265out_err:
3266 nlmsg_free(msg);
3267out:
3268 mutex_unlock(&__ip_vs_mutex);
3269
3270 return ret;
3271}
3272
3273
3274static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3275 {
3276 .cmd = IPVS_CMD_NEW_SERVICE,
3277 .flags = GENL_ADMIN_PERM,
3278 .policy = ip_vs_cmd_policy,
3279 .doit = ip_vs_genl_set_cmd,
3280 },
3281 {
3282 .cmd = IPVS_CMD_SET_SERVICE,
3283 .flags = GENL_ADMIN_PERM,
3284 .policy = ip_vs_cmd_policy,
3285 .doit = ip_vs_genl_set_cmd,
3286 },
3287 {
3288 .cmd = IPVS_CMD_DEL_SERVICE,
3289 .flags = GENL_ADMIN_PERM,
3290 .policy = ip_vs_cmd_policy,
3291 .doit = ip_vs_genl_set_cmd,
3292 },
3293 {
3294 .cmd = IPVS_CMD_GET_SERVICE,
3295 .flags = GENL_ADMIN_PERM,
3296 .doit = ip_vs_genl_get_cmd,
3297 .dumpit = ip_vs_genl_dump_services,
3298 .policy = ip_vs_cmd_policy,
3299 },
3300 {
3301 .cmd = IPVS_CMD_NEW_DEST,
3302 .flags = GENL_ADMIN_PERM,
3303 .policy = ip_vs_cmd_policy,
3304 .doit = ip_vs_genl_set_cmd,
3305 },
3306 {
3307 .cmd = IPVS_CMD_SET_DEST,
3308 .flags = GENL_ADMIN_PERM,
3309 .policy = ip_vs_cmd_policy,
3310 .doit = ip_vs_genl_set_cmd,
3311 },
3312 {
3313 .cmd = IPVS_CMD_DEL_DEST,
3314 .flags = GENL_ADMIN_PERM,
3315 .policy = ip_vs_cmd_policy,
3316 .doit = ip_vs_genl_set_cmd,
3317 },
3318 {
3319 .cmd = IPVS_CMD_GET_DEST,
3320 .flags = GENL_ADMIN_PERM,
3321 .policy = ip_vs_cmd_policy,
3322 .dumpit = ip_vs_genl_dump_dests,
3323 },
3324 {
3325 .cmd = IPVS_CMD_NEW_DAEMON,
3326 .flags = GENL_ADMIN_PERM,
3327 .policy = ip_vs_cmd_policy,
3328 .doit = ip_vs_genl_set_cmd,
3329 },
3330 {
3331 .cmd = IPVS_CMD_DEL_DAEMON,
3332 .flags = GENL_ADMIN_PERM,
3333 .policy = ip_vs_cmd_policy,
3334 .doit = ip_vs_genl_set_cmd,
3335 },
3336 {
3337 .cmd = IPVS_CMD_GET_DAEMON,
3338 .flags = GENL_ADMIN_PERM,
3339 .dumpit = ip_vs_genl_dump_daemons,
3340 },
3341 {
3342 .cmd = IPVS_CMD_SET_CONFIG,
3343 .flags = GENL_ADMIN_PERM,
3344 .policy = ip_vs_cmd_policy,
3345 .doit = ip_vs_genl_set_cmd,
3346 },
3347 {
3348 .cmd = IPVS_CMD_GET_CONFIG,
3349 .flags = GENL_ADMIN_PERM,
3350 .doit = ip_vs_genl_get_cmd,
3351 },
3352 {
3353 .cmd = IPVS_CMD_GET_INFO,
3354 .flags = GENL_ADMIN_PERM,
3355 .doit = ip_vs_genl_get_cmd,
3356 },
3357 {
3358 .cmd = IPVS_CMD_ZERO,
3359 .flags = GENL_ADMIN_PERM,
3360 .policy = ip_vs_cmd_policy,
3361 .doit = ip_vs_genl_set_cmd,
3362 },
3363 {
3364 .cmd = IPVS_CMD_FLUSH,
3365 .flags = GENL_ADMIN_PERM,
3366 .doit = ip_vs_genl_set_cmd,
3367 },
3368};
3369
3370static int __init ip_vs_genl_register(void)
3371{
3372 return genl_register_family_with_ops(&ip_vs_genl_family,
3373 ip_vs_genl_ops, ARRAY_SIZE(ip_vs_genl_ops));
3374}
3375
3376static void ip_vs_genl_unregister(void)
3377{
3378 genl_unregister_family(&ip_vs_genl_family);
3379}
3380
3381/* End of Generic Netlink interface definitions */
3382
3383
3384int __init ip_vs_control_init(void)
3385{
3386 int ret;
3387 int idx;
3388
3389 EnterFunction(2);
3390
3391 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3392 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3393 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3394 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3395 }
3396 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3397 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3398 }
3399 smp_wmb();
3400
3401 ret = nf_register_sockopt(&ip_vs_sockopts);
3402 if (ret) {
3403 pr_err("cannot register sockopt.\n");
3404 return ret;
3405 }
3406
3407 ret = ip_vs_genl_register();
3408 if (ret) {
3409 pr_err("cannot register Generic Netlink interface.\n");
3410 nf_unregister_sockopt(&ip_vs_sockopts);
3411 return ret;
3412 }
3413
3414 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3415 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3416
3417 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3418
3419 ip_vs_new_estimator(&ip_vs_stats);
3420
3421 /* Hook the defense timer */
3422 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3423
3424 LeaveFunction(2);
3425 return 0;
3426}
3427
3428
3429void ip_vs_control_cleanup(void)
3430{
3431 EnterFunction(2);
3432 ip_vs_trash_cleanup();
3433 cancel_rearming_delayed_work(&defense_work);
3434 cancel_work_sync(&defense_work.work);
3435 ip_vs_kill_estimator(&ip_vs_stats);
3436 unregister_sysctl_table(sysctl_header);
3437 proc_net_remove(&init_net, "ip_vs_stats");
3438 proc_net_remove(&init_net, "ip_vs");
3439 ip_vs_genl_unregister();
3440 nf_unregister_sockopt(&ip_vs_sockopts);
3441 LeaveFunction(2);
3442}