]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Convert __ip_vs_svc_get() and __ip_vs_fwm_get()
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4 39#include <net/sock.h>
9a812198 40#include <net/genetlink.h>
1da177e4
LT
41
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 47static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
48
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
af9debd4
JA
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
1da177e4
LT
97 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
af9debd4
JA
116 local_bh_disable();
117
1da177e4
LT
118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
214
215 local_bh_enable();
1da177e4
LT
216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 225
c4028958 226static void defense_work_handler(struct work_struct *work)
1da177e4
LT
227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
b18610de
JV
285ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
286 __be16 port)
1da177e4
LT
287{
288 register unsigned porth = ntohs(port);
b18610de 289 __be32 addr_fold = addr->ip;
1da177e4 290
b18610de
JV
291#ifdef CONFIG_IP_VS_IPV6
292 if (af == AF_INET6)
293 addr_fold = addr->ip6[0]^addr->ip6[1]^
294 addr->ip6[2]^addr->ip6[3];
295#endif
296
297 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
298 & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Returns hash value of fwmark for virtual service lookup
303 */
304static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
305{
306 return fwmark & IP_VS_SVC_TAB_MASK;
307}
308
309/*
310 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
311 * or in the ip_vs_svc_fwm_table by fwmark.
312 * Should be called with locked tables.
313 */
314static int ip_vs_svc_hash(struct ip_vs_service *svc)
315{
316 unsigned hash;
317
318 if (svc->flags & IP_VS_SVC_F_HASHED) {
319 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
320 "called from %p\n", __builtin_return_address(0));
321 return 0;
322 }
323
324 if (svc->fwmark == 0) {
325 /*
326 * Hash it by <protocol,addr,port> in ip_vs_svc_table
327 */
b18610de 328 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 329 svc->port);
1da177e4
LT
330 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
331 } else {
332 /*
333 * Hash it by fwmark in ip_vs_svc_fwm_table
334 */
335 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
336 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
337 }
338
339 svc->flags |= IP_VS_SVC_F_HASHED;
340 /* increase its refcnt because it is referenced by the svc table */
341 atomic_inc(&svc->refcnt);
342 return 1;
343}
344
345
346/*
347 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
348 * Should be called with locked tables.
349 */
350static int ip_vs_svc_unhash(struct ip_vs_service *svc)
351{
352 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
353 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
354 "called from %p\n", __builtin_return_address(0));
355 return 0;
356 }
357
358 if (svc->fwmark == 0) {
359 /* Remove it from the ip_vs_svc_table table */
360 list_del(&svc->s_list);
361 } else {
362 /* Remove it from the ip_vs_svc_fwm_table table */
363 list_del(&svc->f_list);
364 }
365
366 svc->flags &= ~IP_VS_SVC_F_HASHED;
367 atomic_dec(&svc->refcnt);
368 return 1;
369}
370
371
372/*
373 * Get service by {proto,addr,port} in the service table.
374 */
b18610de
JV
375static inline struct ip_vs_service *
376__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
377 __be16 vport)
1da177e4
LT
378{
379 unsigned hash;
380 struct ip_vs_service *svc;
381
382 /* Check for "full" addressed entries */
b18610de 383 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
384
385 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
386 if ((svc->af == af)
387 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
388 && (svc->port == vport)
389 && (svc->protocol == protocol)) {
390 /* HIT */
391 atomic_inc(&svc->usecnt);
392 return svc;
393 }
394 }
395
396 return NULL;
397}
398
399
400/*
401 * Get service by {fwmark} in the service table.
402 */
b18610de
JV
403static inline struct ip_vs_service *
404__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
405{
406 unsigned hash;
407 struct ip_vs_service *svc;
408
409 /* Check for fwmark addressed entries */
410 hash = ip_vs_svc_fwm_hashkey(fwmark);
411
412 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 413 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423struct ip_vs_service *
014d730d 424ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport)
1da177e4
LT
425{
426 struct ip_vs_service *svc;
b18610de 427 union nf_inet_addr _vaddr = { .ip = vaddr };
1da177e4
LT
428 read_lock(&__ip_vs_svc_lock);
429
430 /*
431 * Check the table hashed by fwmark first
432 */
b18610de 433 if (fwmark && (svc = __ip_vs_svc_fwm_get(AF_INET, fwmark)))
1da177e4
LT
434 goto out;
435
436 /*
437 * Check the table hashed by <protocol,addr,port>
438 * for "full" addressed entries
439 */
b18610de 440 svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, vport);
1da177e4
LT
441
442 if (svc == NULL
443 && protocol == IPPROTO_TCP
444 && atomic_read(&ip_vs_ftpsvc_counter)
445 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
446 /*
447 * Check if ftp service entry exists, the packet
448 * might belong to FTP data connections.
449 */
b18610de 450 svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, FTPPORT);
1da177e4
LT
451 }
452
453 if (svc == NULL
454 && atomic_read(&ip_vs_nullsvc_counter)) {
455 /*
456 * Check if the catch-all port (port zero) exists
457 */
b18610de 458 svc = __ip_vs_service_get(AF_INET, protocol, &_vaddr, 0);
1da177e4
LT
459 }
460
461 out:
462 read_unlock(&__ip_vs_svc_lock);
463
4b5bdf5c 464 IP_VS_DBG(9, "lookup service: fwm %u %s %u.%u.%u.%u:%u %s\n",
1da177e4
LT
465 fwmark, ip_vs_proto_name(protocol),
466 NIPQUAD(vaddr), ntohs(vport),
467 svc?"hit":"not hit");
468
469 return svc;
470}
471
472
473static inline void
474__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
475{
476 atomic_inc(&svc->refcnt);
477 dest->svc = svc;
478}
479
480static inline void
481__ip_vs_unbind_svc(struct ip_vs_dest *dest)
482{
483 struct ip_vs_service *svc = dest->svc;
484
485 dest->svc = NULL;
486 if (atomic_dec_and_test(&svc->refcnt))
487 kfree(svc);
488}
489
490
491/*
492 * Returns hash value for real service
493 */
014d730d 494static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
1da177e4
LT
495{
496 register unsigned porth = ntohs(port);
497
498 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
499 & IP_VS_RTAB_MASK;
500}
501
502/*
503 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
504 * should be called with locked tables.
505 */
506static int ip_vs_rs_hash(struct ip_vs_dest *dest)
507{
508 unsigned hash;
509
510 if (!list_empty(&dest->d_list)) {
511 return 0;
512 }
513
514 /*
515 * Hash by proto,addr,port,
516 * which are the parameters of the real service.
517 */
e7ade46a 518 hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
1da177e4
LT
519 list_add(&dest->d_list, &ip_vs_rtable[hash]);
520
521 return 1;
522}
523
524/*
525 * UNhashes ip_vs_dest from ip_vs_rtable.
526 * should be called with locked tables.
527 */
528static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
529{
530 /*
531 * Remove it from the ip_vs_rtable table.
532 */
533 if (!list_empty(&dest->d_list)) {
534 list_del(&dest->d_list);
535 INIT_LIST_HEAD(&dest->d_list);
536 }
537
538 return 1;
539}
540
541/*
542 * Lookup real service by <proto,addr,port> in the real service table.
543 */
544struct ip_vs_dest *
014d730d 545ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
1da177e4
LT
546{
547 unsigned hash;
548 struct ip_vs_dest *dest;
549
550 /*
551 * Check for "full" addressed entries
552 * Return the first found entry
553 */
554 hash = ip_vs_rs_hashkey(daddr, dport);
555
556 read_lock(&__ip_vs_rs_lock);
557 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
e7ade46a 558 if ((dest->addr.ip == daddr)
1da177e4
LT
559 && (dest->port == dport)
560 && ((dest->protocol == protocol) ||
561 dest->vfwmark)) {
562 /* HIT */
563 read_unlock(&__ip_vs_rs_lock);
564 return dest;
565 }
566 }
567 read_unlock(&__ip_vs_rs_lock);
568
569 return NULL;
570}
571
572/*
573 * Lookup destination by {addr,port} in the given service
574 */
575static struct ip_vs_dest *
014d730d 576ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
577{
578 struct ip_vs_dest *dest;
579
580 /*
581 * Find the destination for the given service
582 */
583 list_for_each_entry(dest, &svc->destinations, n_list) {
e7ade46a 584 if ((dest->addr.ip == daddr) && (dest->port == dport)) {
1da177e4
LT
585 /* HIT */
586 return dest;
587 }
588 }
589
590 return NULL;
591}
592
1e356f9c
RB
593/*
594 * Find destination by {daddr,dport,vaddr,protocol}
595 * Cretaed to be used in ip_vs_process_message() in
596 * the backup synchronization daemon. It finds the
597 * destination to be bound to the received connection
598 * on the backup.
599 *
600 * ip_vs_lookup_real_service() looked promissing, but
601 * seems not working as expected.
602 */
603struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
604 __be32 vaddr, __be16 vport, __u16 protocol)
605{
606 struct ip_vs_dest *dest;
607 struct ip_vs_service *svc;
608
609 svc = ip_vs_service_get(0, protocol, vaddr, vport);
610 if (!svc)
611 return NULL;
612 dest = ip_vs_lookup_dest(svc, daddr, dport);
613 if (dest)
614 atomic_inc(&dest->refcnt);
615 ip_vs_service_put(svc);
616 return dest;
617}
1da177e4
LT
618
619/*
620 * Lookup dest by {svc,addr,port} in the destination trash.
621 * The destination trash is used to hold the destinations that are removed
622 * from the service table but are still referenced by some conn entries.
623 * The reason to add the destination trash is when the dest is temporary
624 * down (either by administrator or by monitor program), the dest can be
625 * picked back from the trash, the remaining connections to the dest can
626 * continue, and the counting information of the dest is also useful for
627 * scheduling.
628 */
629static struct ip_vs_dest *
014d730d 630ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
631{
632 struct ip_vs_dest *dest, *nxt;
633
634 /*
635 * Find the destination in trash
636 */
637 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
638 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
4b5bdf5c 639 "dest->refcnt=%d\n",
1da177e4 640 dest->vfwmark,
e7ade46a 641 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4 642 atomic_read(&dest->refcnt));
e7ade46a 643 if (dest->addr.ip == daddr &&
1da177e4
LT
644 dest->port == dport &&
645 dest->vfwmark == svc->fwmark &&
646 dest->protocol == svc->protocol &&
647 (svc->fwmark ||
e7ade46a 648 (dest->vaddr.ip == svc->addr.ip &&
1da177e4
LT
649 dest->vport == svc->port))) {
650 /* HIT */
651 return dest;
652 }
653
654 /*
655 * Try to purge the destination from trash if not referenced
656 */
657 if (atomic_read(&dest->refcnt) == 1) {
658 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
659 "from trash\n",
660 dest->vfwmark,
e7ade46a 661 NIPQUAD(dest->addr.ip), ntohs(dest->port));
1da177e4
LT
662 list_del(&dest->n_list);
663 ip_vs_dst_reset(dest);
664 __ip_vs_unbind_svc(dest);
665 kfree(dest);
666 }
667 }
668
669 return NULL;
670}
671
672
673/*
674 * Clean up all the destinations in the trash
675 * Called by the ip_vs_control_cleanup()
676 *
677 * When the ip_vs_control_clearup is activated by ipvs module exit,
678 * the service tables must have been flushed and all the connections
679 * are expired, and the refcnt of each destination in the trash must
680 * be 1, so we simply release them here.
681 */
682static void ip_vs_trash_cleanup(void)
683{
684 struct ip_vs_dest *dest, *nxt;
685
686 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
687 list_del(&dest->n_list);
688 ip_vs_dst_reset(dest);
689 __ip_vs_unbind_svc(dest);
690 kfree(dest);
691 }
692}
693
694
695static void
696ip_vs_zero_stats(struct ip_vs_stats *stats)
697{
698 spin_lock_bh(&stats->lock);
e93615d0
SH
699
700 stats->conns = 0;
701 stats->inpkts = 0;
702 stats->outpkts = 0;
703 stats->inbytes = 0;
704 stats->outbytes = 0;
705
706 stats->cps = 0;
707 stats->inpps = 0;
708 stats->outpps = 0;
709 stats->inbps = 0;
710 stats->outbps = 0;
711
1da177e4 712 ip_vs_zero_estimator(stats);
e93615d0 713
3a14a313 714 spin_unlock_bh(&stats->lock);
1da177e4
LT
715}
716
717/*
718 * Update a destination in the given service
719 */
720static void
721__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 722 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
723{
724 int conn_flags;
725
726 /* set the weight and the flags */
727 atomic_set(&dest->weight, udest->weight);
728 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
729
730 /* check if local node and update the flags */
c860c6b1 731 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
1da177e4
LT
732 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
733 | IP_VS_CONN_F_LOCALNODE;
734 }
735
736 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
737 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
738 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
739 } else {
740 /*
741 * Put the real service in ip_vs_rtable if not present.
742 * For now only for NAT!
743 */
744 write_lock_bh(&__ip_vs_rs_lock);
745 ip_vs_rs_hash(dest);
746 write_unlock_bh(&__ip_vs_rs_lock);
747 }
748 atomic_set(&dest->conn_flags, conn_flags);
749
750 /* bind the service */
751 if (!dest->svc) {
752 __ip_vs_bind_svc(dest, svc);
753 } else {
754 if (dest->svc != svc) {
755 __ip_vs_unbind_svc(dest);
756 ip_vs_zero_stats(&dest->stats);
757 __ip_vs_bind_svc(dest, svc);
758 }
759 }
760
761 /* set the dest status flags */
762 dest->flags |= IP_VS_DEST_F_AVAILABLE;
763
764 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
765 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
766 dest->u_threshold = udest->u_threshold;
767 dest->l_threshold = udest->l_threshold;
768}
769
770
771/*
772 * Create a destination for the given service
773 */
774static int
c860c6b1 775ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
776 struct ip_vs_dest **dest_p)
777{
778 struct ip_vs_dest *dest;
779 unsigned atype;
780
781 EnterFunction(2);
782
c860c6b1 783 atype = inet_addr_type(&init_net, udest->addr.ip);
1da177e4
LT
784 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
785 return -EINVAL;
786
0da974f4 787 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
788 if (dest == NULL) {
789 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
790 return -ENOMEM;
791 }
1da177e4 792
c860c6b1 793 dest->af = svc->af;
1da177e4 794 dest->protocol = svc->protocol;
c860c6b1 795 dest->vaddr = svc->addr;
1da177e4
LT
796 dest->vport = svc->port;
797 dest->vfwmark = svc->fwmark;
c860c6b1 798 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
799 dest->port = udest->port;
800
801 atomic_set(&dest->activeconns, 0);
802 atomic_set(&dest->inactconns, 0);
803 atomic_set(&dest->persistconns, 0);
804 atomic_set(&dest->refcnt, 0);
805
806 INIT_LIST_HEAD(&dest->d_list);
807 spin_lock_init(&dest->dst_lock);
808 spin_lock_init(&dest->stats.lock);
809 __ip_vs_update_dest(svc, dest, udest);
810 ip_vs_new_estimator(&dest->stats);
811
812 *dest_p = dest;
813
814 LeaveFunction(2);
815 return 0;
816}
817
818
819/*
820 * Add a destination into an existing service
821 */
822static int
c860c6b1 823ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
824{
825 struct ip_vs_dest *dest;
c860c6b1 826 union nf_inet_addr daddr;
014d730d 827 __be16 dport = udest->port;
1da177e4
LT
828 int ret;
829
830 EnterFunction(2);
831
832 if (udest->weight < 0) {
833 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
834 return -ERANGE;
835 }
836
837 if (udest->l_threshold > udest->u_threshold) {
838 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
839 "upper threshold\n");
840 return -ERANGE;
841 }
842
c860c6b1
JV
843 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
844
1da177e4
LT
845 /*
846 * Check if the dest already exists in the list
847 */
c860c6b1 848 dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
1da177e4
LT
849 if (dest != NULL) {
850 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
851 return -EEXIST;
852 }
853
854 /*
855 * Check if the dest already exists in the trash and
856 * is from the same service
857 */
c860c6b1 858 dest = ip_vs_trash_get_dest(svc, daddr.ip, dport);
1da177e4
LT
859 if (dest != NULL) {
860 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 861 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
862 NIPQUAD(daddr), ntohs(dport),
863 atomic_read(&dest->refcnt),
864 dest->vfwmark,
e7ade46a 865 NIPQUAD(dest->vaddr.ip),
1da177e4
LT
866 ntohs(dest->vport));
867 __ip_vs_update_dest(svc, dest, udest);
868
869 /*
870 * Get the destination from the trash
871 */
872 list_del(&dest->n_list);
873
874 ip_vs_new_estimator(&dest->stats);
875
876 write_lock_bh(&__ip_vs_svc_lock);
877
878 /*
879 * Wait until all other svc users go away.
880 */
881 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
882
883 list_add(&dest->n_list, &svc->destinations);
884 svc->num_dests++;
885
886 /* call the update_service function of its scheduler */
82dfb6f3
SW
887 if (svc->scheduler->update_service)
888 svc->scheduler->update_service(svc);
1da177e4
LT
889
890 write_unlock_bh(&__ip_vs_svc_lock);
891 return 0;
892 }
893
894 /*
895 * Allocate and initialize the dest structure
896 */
897 ret = ip_vs_new_dest(svc, udest, &dest);
898 if (ret) {
899 return ret;
900 }
901
902 /*
903 * Add the dest entry into the list
904 */
905 atomic_inc(&dest->refcnt);
906
907 write_lock_bh(&__ip_vs_svc_lock);
908
909 /*
910 * Wait until all other svc users go away.
911 */
912 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
913
914 list_add(&dest->n_list, &svc->destinations);
915 svc->num_dests++;
916
917 /* call the update_service function of its scheduler */
82dfb6f3
SW
918 if (svc->scheduler->update_service)
919 svc->scheduler->update_service(svc);
1da177e4
LT
920
921 write_unlock_bh(&__ip_vs_svc_lock);
922
923 LeaveFunction(2);
924
925 return 0;
926}
927
928
929/*
930 * Edit a destination in the given service
931 */
932static int
c860c6b1 933ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
934{
935 struct ip_vs_dest *dest;
c860c6b1 936 union nf_inet_addr daddr;
014d730d 937 __be16 dport = udest->port;
1da177e4
LT
938
939 EnterFunction(2);
940
941 if (udest->weight < 0) {
942 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
943 return -ERANGE;
944 }
945
946 if (udest->l_threshold > udest->u_threshold) {
947 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
948 "upper threshold\n");
949 return -ERANGE;
950 }
951
c860c6b1
JV
952 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
953
1da177e4
LT
954 /*
955 * Lookup the destination list
956 */
c860c6b1 957 dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
1da177e4
LT
958 if (dest == NULL) {
959 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
960 return -ENOENT;
961 }
962
963 __ip_vs_update_dest(svc, dest, udest);
964
965 write_lock_bh(&__ip_vs_svc_lock);
966
967 /* Wait until all other svc users go away */
cae7ca3d 968 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
969
970 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
971 if (svc->scheduler->update_service)
972 svc->scheduler->update_service(svc);
1da177e4
LT
973
974 write_unlock_bh(&__ip_vs_svc_lock);
975
976 LeaveFunction(2);
977
978 return 0;
979}
980
981
982/*
983 * Delete a destination (must be already unlinked from the service)
984 */
985static void __ip_vs_del_dest(struct ip_vs_dest *dest)
986{
987 ip_vs_kill_estimator(&dest->stats);
988
989 /*
990 * Remove it from the d-linked list with the real services.
991 */
992 write_lock_bh(&__ip_vs_rs_lock);
993 ip_vs_rs_unhash(dest);
994 write_unlock_bh(&__ip_vs_rs_lock);
995
996 /*
997 * Decrease the refcnt of the dest, and free the dest
998 * if nobody refers to it (refcnt=0). Otherwise, throw
999 * the destination into the trash.
1000 */
1001 if (atomic_dec_and_test(&dest->refcnt)) {
1002 ip_vs_dst_reset(dest);
1003 /* simply decrease svc->refcnt here, let the caller check
1004 and release the service if nobody refers to it.
1005 Only user context can release destination and service,
1006 and only one user context can update virtual service at a
1007 time, so the operation here is OK */
1008 atomic_dec(&dest->svc->refcnt);
1009 kfree(dest);
1010 } else {
4b5bdf5c
RN
1011 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1012 "dest->refcnt=%d\n",
e7ade46a 1013 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1014 atomic_read(&dest->refcnt));
1015 list_add(&dest->n_list, &ip_vs_dest_trash);
1016 atomic_inc(&dest->refcnt);
1017 }
1018}
1019
1020
1021/*
1022 * Unlink a destination from the given service
1023 */
1024static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1025 struct ip_vs_dest *dest,
1026 int svcupd)
1027{
1028 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1029
1030 /*
1031 * Remove it from the d-linked destination list.
1032 */
1033 list_del(&dest->n_list);
1034 svc->num_dests--;
82dfb6f3
SW
1035
1036 /*
1037 * Call the update_service function of its scheduler
1038 */
1039 if (svcupd && svc->scheduler->update_service)
1040 svc->scheduler->update_service(svc);
1da177e4
LT
1041}
1042
1043
1044/*
1045 * Delete a destination server in the given service
1046 */
1047static int
c860c6b1 1048ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1049{
1050 struct ip_vs_dest *dest;
014d730d 1051 __be16 dport = udest->port;
1da177e4
LT
1052
1053 EnterFunction(2);
1054
c860c6b1
JV
1055 dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport);
1056
1da177e4
LT
1057 if (dest == NULL) {
1058 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1059 return -ENOENT;
1060 }
1061
1062 write_lock_bh(&__ip_vs_svc_lock);
1063
1064 /*
1065 * Wait until all other svc users go away.
1066 */
1067 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1068
1069 /*
1070 * Unlink dest from the service
1071 */
1072 __ip_vs_unlink_dest(svc, dest, 1);
1073
1074 write_unlock_bh(&__ip_vs_svc_lock);
1075
1076 /*
1077 * Delete the destination
1078 */
1079 __ip_vs_del_dest(dest);
1080
1081 LeaveFunction(2);
1082
1083 return 0;
1084}
1085
1086
1087/*
1088 * Add a service into the service hash table
1089 */
1090static int
c860c6b1
JV
1091ip_vs_add_service(struct ip_vs_service_user_kern *u,
1092 struct ip_vs_service **svc_p)
1da177e4
LT
1093{
1094 int ret = 0;
1095 struct ip_vs_scheduler *sched = NULL;
1096 struct ip_vs_service *svc = NULL;
1097
1098 /* increase the module use count */
1099 ip_vs_use_count_inc();
1100
1101 /* Lookup the scheduler by 'u->sched_name' */
1102 sched = ip_vs_scheduler_get(u->sched_name);
1103 if (sched == NULL) {
1104 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1105 u->sched_name);
1106 ret = -ENOENT;
1107 goto out_mod_dec;
1108 }
1109
0da974f4 1110 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1111 if (svc == NULL) {
1112 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1113 ret = -ENOMEM;
1114 goto out_err;
1115 }
1da177e4
LT
1116
1117 /* I'm the first user of the service */
1118 atomic_set(&svc->usecnt, 1);
1119 atomic_set(&svc->refcnt, 0);
1120
c860c6b1 1121 svc->af = u->af;
1da177e4 1122 svc->protocol = u->protocol;
c860c6b1 1123 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1124 svc->port = u->port;
1125 svc->fwmark = u->fwmark;
1126 svc->flags = u->flags;
1127 svc->timeout = u->timeout * HZ;
1128 svc->netmask = u->netmask;
1129
1130 INIT_LIST_HEAD(&svc->destinations);
1131 rwlock_init(&svc->sched_lock);
1132 spin_lock_init(&svc->stats.lock);
1133
1134 /* Bind the scheduler */
1135 ret = ip_vs_bind_scheduler(svc, sched);
1136 if (ret)
1137 goto out_err;
1138 sched = NULL;
1139
1140 /* Update the virtual service counters */
1141 if (svc->port == FTPPORT)
1142 atomic_inc(&ip_vs_ftpsvc_counter);
1143 else if (svc->port == 0)
1144 atomic_inc(&ip_vs_nullsvc_counter);
1145
1146 ip_vs_new_estimator(&svc->stats);
1147 ip_vs_num_services++;
1148
1149 /* Hash the service into the service table */
1150 write_lock_bh(&__ip_vs_svc_lock);
1151 ip_vs_svc_hash(svc);
1152 write_unlock_bh(&__ip_vs_svc_lock);
1153
1154 *svc_p = svc;
1155 return 0;
1156
1157 out_err:
1158 if (svc != NULL) {
1159 if (svc->scheduler)
1160 ip_vs_unbind_scheduler(svc);
1161 if (svc->inc) {
1162 local_bh_disable();
1163 ip_vs_app_inc_put(svc->inc);
1164 local_bh_enable();
1165 }
1166 kfree(svc);
1167 }
1168 ip_vs_scheduler_put(sched);
1169
1170 out_mod_dec:
1171 /* decrease the module use count */
1172 ip_vs_use_count_dec();
1173
1174 return ret;
1175}
1176
1177
1178/*
1179 * Edit a service and bind it with a new scheduler
1180 */
1181static int
c860c6b1 1182ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1183{
1184 struct ip_vs_scheduler *sched, *old_sched;
1185 int ret = 0;
1186
1187 /*
1188 * Lookup the scheduler, by 'u->sched_name'
1189 */
1190 sched = ip_vs_scheduler_get(u->sched_name);
1191 if (sched == NULL) {
1192 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1193 u->sched_name);
1194 return -ENOENT;
1195 }
1196 old_sched = sched;
1197
1198 write_lock_bh(&__ip_vs_svc_lock);
1199
1200 /*
1201 * Wait until all other svc users go away.
1202 */
1203 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1204
1205 /*
1206 * Set the flags and timeout value
1207 */
1208 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1209 svc->timeout = u->timeout * HZ;
1210 svc->netmask = u->netmask;
1211
1212 old_sched = svc->scheduler;
1213 if (sched != old_sched) {
1214 /*
1215 * Unbind the old scheduler
1216 */
1217 if ((ret = ip_vs_unbind_scheduler(svc))) {
1218 old_sched = sched;
1219 goto out;
1220 }
1221
1222 /*
1223 * Bind the new scheduler
1224 */
1225 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1226 /*
1227 * If ip_vs_bind_scheduler fails, restore the old
1228 * scheduler.
1229 * The main reason of failure is out of memory.
1230 *
1231 * The question is if the old scheduler can be
1232 * restored all the time. TODO: if it cannot be
1233 * restored some time, we must delete the service,
1234 * otherwise the system may crash.
1235 */
1236 ip_vs_bind_scheduler(svc, old_sched);
1237 old_sched = sched;
1238 goto out;
1239 }
1240 }
1241
1242 out:
1243 write_unlock_bh(&__ip_vs_svc_lock);
1244
1245 if (old_sched)
1246 ip_vs_scheduler_put(old_sched);
1247
1248 return ret;
1249}
1250
1251
1252/*
1253 * Delete a service from the service list
1254 * - The service must be unlinked, unlocked and not referenced!
1255 * - We are called under _bh lock
1256 */
1257static void __ip_vs_del_service(struct ip_vs_service *svc)
1258{
1259 struct ip_vs_dest *dest, *nxt;
1260 struct ip_vs_scheduler *old_sched;
1261
1262 ip_vs_num_services--;
1263 ip_vs_kill_estimator(&svc->stats);
1264
1265 /* Unbind scheduler */
1266 old_sched = svc->scheduler;
1267 ip_vs_unbind_scheduler(svc);
1268 if (old_sched)
1269 ip_vs_scheduler_put(old_sched);
1270
1271 /* Unbind app inc */
1272 if (svc->inc) {
1273 ip_vs_app_inc_put(svc->inc);
1274 svc->inc = NULL;
1275 }
1276
1277 /*
1278 * Unlink the whole destination list
1279 */
1280 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1281 __ip_vs_unlink_dest(svc, dest, 0);
1282 __ip_vs_del_dest(dest);
1283 }
1284
1285 /*
1286 * Update the virtual service counters
1287 */
1288 if (svc->port == FTPPORT)
1289 atomic_dec(&ip_vs_ftpsvc_counter);
1290 else if (svc->port == 0)
1291 atomic_dec(&ip_vs_nullsvc_counter);
1292
1293 /*
1294 * Free the service if nobody refers to it
1295 */
1296 if (atomic_read(&svc->refcnt) == 0)
1297 kfree(svc);
1298
1299 /* decrease the module use count */
1300 ip_vs_use_count_dec();
1301}
1302
1303/*
1304 * Delete a service from the service list
1305 */
1306static int ip_vs_del_service(struct ip_vs_service *svc)
1307{
1308 if (svc == NULL)
1309 return -EEXIST;
1310
1311 /*
1312 * Unhash it from the service table
1313 */
1314 write_lock_bh(&__ip_vs_svc_lock);
1315
1316 ip_vs_svc_unhash(svc);
1317
1318 /*
1319 * Wait until all the svc users go away.
1320 */
1321 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1322
1323 __ip_vs_del_service(svc);
1324
1325 write_unlock_bh(&__ip_vs_svc_lock);
1326
1327 return 0;
1328}
1329
1330
1331/*
1332 * Flush all the virtual services
1333 */
1334static int ip_vs_flush(void)
1335{
1336 int idx;
1337 struct ip_vs_service *svc, *nxt;
1338
1339 /*
1340 * Flush the service table hashed by <protocol,addr,port>
1341 */
1342 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1343 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1344 write_lock_bh(&__ip_vs_svc_lock);
1345 ip_vs_svc_unhash(svc);
1346 /*
1347 * Wait until all the svc users go away.
1348 */
1349 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1350 __ip_vs_del_service(svc);
1351 write_unlock_bh(&__ip_vs_svc_lock);
1352 }
1353 }
1354
1355 /*
1356 * Flush the service table hashed by fwmark
1357 */
1358 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1359 list_for_each_entry_safe(svc, nxt,
1360 &ip_vs_svc_fwm_table[idx], f_list) {
1361 write_lock_bh(&__ip_vs_svc_lock);
1362 ip_vs_svc_unhash(svc);
1363 /*
1364 * Wait until all the svc users go away.
1365 */
1366 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1367 __ip_vs_del_service(svc);
1368 write_unlock_bh(&__ip_vs_svc_lock);
1369 }
1370 }
1371
1372 return 0;
1373}
1374
1375
1376/*
1377 * Zero counters in a service or all services
1378 */
1379static int ip_vs_zero_service(struct ip_vs_service *svc)
1380{
1381 struct ip_vs_dest *dest;
1382
1383 write_lock_bh(&__ip_vs_svc_lock);
1384 list_for_each_entry(dest, &svc->destinations, n_list) {
1385 ip_vs_zero_stats(&dest->stats);
1386 }
1387 ip_vs_zero_stats(&svc->stats);
1388 write_unlock_bh(&__ip_vs_svc_lock);
1389 return 0;
1390}
1391
1392static int ip_vs_zero_all(void)
1393{
1394 int idx;
1395 struct ip_vs_service *svc;
1396
1397 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1398 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1399 ip_vs_zero_service(svc);
1400 }
1401 }
1402
1403 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1404 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1405 ip_vs_zero_service(svc);
1406 }
1407 }
1408
1409 ip_vs_zero_stats(&ip_vs_stats);
1410 return 0;
1411}
1412
1413
1414static int
1415proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1416 void __user *buffer, size_t *lenp, loff_t *ppos)
1417{
1418 int *valp = table->data;
1419 int val = *valp;
1420 int rc;
1421
1422 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1423 if (write && (*valp != val)) {
1424 if ((*valp < 0) || (*valp > 3)) {
1425 /* Restore the correct value */
1426 *valp = val;
1427 } else {
1da177e4 1428 update_defense_level();
1da177e4
LT
1429 }
1430 }
1431 return rc;
1432}
1433
1434
1435static int
1436proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1437 void __user *buffer, size_t *lenp, loff_t *ppos)
1438{
1439 int *valp = table->data;
1440 int val[2];
1441 int rc;
1442
1443 /* backup the value first */
1444 memcpy(val, valp, sizeof(val));
1445
1446 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1447 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1448 /* Restore the correct value */
1449 memcpy(valp, val, sizeof(val));
1450 }
1451 return rc;
1452}
1453
1454
1455/*
1456 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1457 */
1458
1459static struct ctl_table vs_vars[] = {
1460 {
1da177e4
LT
1461 .procname = "amemthresh",
1462 .data = &sysctl_ip_vs_amemthresh,
1463 .maxlen = sizeof(int),
1464 .mode = 0644,
1465 .proc_handler = &proc_dointvec,
1466 },
1467#ifdef CONFIG_IP_VS_DEBUG
1468 {
1da177e4
LT
1469 .procname = "debug_level",
1470 .data = &sysctl_ip_vs_debug_level,
1471 .maxlen = sizeof(int),
1472 .mode = 0644,
1473 .proc_handler = &proc_dointvec,
1474 },
1475#endif
1476 {
1da177e4
LT
1477 .procname = "am_droprate",
1478 .data = &sysctl_ip_vs_am_droprate,
1479 .maxlen = sizeof(int),
1480 .mode = 0644,
1481 .proc_handler = &proc_dointvec,
1482 },
1483 {
1da177e4
LT
1484 .procname = "drop_entry",
1485 .data = &sysctl_ip_vs_drop_entry,
1486 .maxlen = sizeof(int),
1487 .mode = 0644,
1488 .proc_handler = &proc_do_defense_mode,
1489 },
1490 {
1da177e4
LT
1491 .procname = "drop_packet",
1492 .data = &sysctl_ip_vs_drop_packet,
1493 .maxlen = sizeof(int),
1494 .mode = 0644,
1495 .proc_handler = &proc_do_defense_mode,
1496 },
1497 {
1da177e4
LT
1498 .procname = "secure_tcp",
1499 .data = &sysctl_ip_vs_secure_tcp,
1500 .maxlen = sizeof(int),
1501 .mode = 0644,
1502 .proc_handler = &proc_do_defense_mode,
1503 },
1504#if 0
1505 {
1da177e4
LT
1506 .procname = "timeout_established",
1507 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1508 .maxlen = sizeof(int),
1509 .mode = 0644,
1510 .proc_handler = &proc_dointvec_jiffies,
1511 },
1512 {
1da177e4
LT
1513 .procname = "timeout_synsent",
1514 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1515 .maxlen = sizeof(int),
1516 .mode = 0644,
1517 .proc_handler = &proc_dointvec_jiffies,
1518 },
1519 {
1da177e4
LT
1520 .procname = "timeout_synrecv",
1521 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1522 .maxlen = sizeof(int),
1523 .mode = 0644,
1524 .proc_handler = &proc_dointvec_jiffies,
1525 },
1526 {
1da177e4
LT
1527 .procname = "timeout_finwait",
1528 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1529 .maxlen = sizeof(int),
1530 .mode = 0644,
1531 .proc_handler = &proc_dointvec_jiffies,
1532 },
1533 {
1da177e4
LT
1534 .procname = "timeout_timewait",
1535 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1536 .maxlen = sizeof(int),
1537 .mode = 0644,
1538 .proc_handler = &proc_dointvec_jiffies,
1539 },
1540 {
1da177e4
LT
1541 .procname = "timeout_close",
1542 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1543 .maxlen = sizeof(int),
1544 .mode = 0644,
1545 .proc_handler = &proc_dointvec_jiffies,
1546 },
1547 {
1da177e4
LT
1548 .procname = "timeout_closewait",
1549 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1550 .maxlen = sizeof(int),
1551 .mode = 0644,
1552 .proc_handler = &proc_dointvec_jiffies,
1553 },
1554 {
1da177e4
LT
1555 .procname = "timeout_lastack",
1556 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1557 .maxlen = sizeof(int),
1558 .mode = 0644,
1559 .proc_handler = &proc_dointvec_jiffies,
1560 },
1561 {
1da177e4
LT
1562 .procname = "timeout_listen",
1563 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1564 .maxlen = sizeof(int),
1565 .mode = 0644,
1566 .proc_handler = &proc_dointvec_jiffies,
1567 },
1568 {
1da177e4
LT
1569 .procname = "timeout_synack",
1570 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1571 .maxlen = sizeof(int),
1572 .mode = 0644,
1573 .proc_handler = &proc_dointvec_jiffies,
1574 },
1575 {
1da177e4
LT
1576 .procname = "timeout_udp",
1577 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1578 .maxlen = sizeof(int),
1579 .mode = 0644,
1580 .proc_handler = &proc_dointvec_jiffies,
1581 },
1582 {
1da177e4
LT
1583 .procname = "timeout_icmp",
1584 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1585 .maxlen = sizeof(int),
1586 .mode = 0644,
1587 .proc_handler = &proc_dointvec_jiffies,
1588 },
1589#endif
1590 {
1da177e4
LT
1591 .procname = "cache_bypass",
1592 .data = &sysctl_ip_vs_cache_bypass,
1593 .maxlen = sizeof(int),
1594 .mode = 0644,
1595 .proc_handler = &proc_dointvec,
1596 },
1597 {
1da177e4
LT
1598 .procname = "expire_nodest_conn",
1599 .data = &sysctl_ip_vs_expire_nodest_conn,
1600 .maxlen = sizeof(int),
1601 .mode = 0644,
1602 .proc_handler = &proc_dointvec,
1603 },
1604 {
1da177e4
LT
1605 .procname = "expire_quiescent_template",
1606 .data = &sysctl_ip_vs_expire_quiescent_template,
1607 .maxlen = sizeof(int),
1608 .mode = 0644,
1609 .proc_handler = &proc_dointvec,
1610 },
1611 {
1da177e4
LT
1612 .procname = "sync_threshold",
1613 .data = &sysctl_ip_vs_sync_threshold,
1614 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1615 .mode = 0644,
1616 .proc_handler = &proc_do_sync_threshold,
1617 },
1618 {
1da177e4
LT
1619 .procname = "nat_icmp_send",
1620 .data = &sysctl_ip_vs_nat_icmp_send,
1621 .maxlen = sizeof(int),
1622 .mode = 0644,
1623 .proc_handler = &proc_dointvec,
1624 },
1625 { .ctl_name = 0 }
1626};
1627
5587da55 1628const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1629 { .procname = "net", .ctl_name = CTL_NET, },
1630 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1631 { .procname = "vs", },
1632 { }
1da177e4 1633};
90754f8e 1634EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1635
1636static struct ctl_table_header * sysctl_header;
1637
1638#ifdef CONFIG_PROC_FS
1639
1640struct ip_vs_iter {
1641 struct list_head *table;
1642 int bucket;
1643};
1644
1645/*
1646 * Write the contents of the VS rule table to a PROCfs file.
1647 * (It is kept just for backward compatibility)
1648 */
1649static inline const char *ip_vs_fwd_name(unsigned flags)
1650{
1651 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1652 case IP_VS_CONN_F_LOCALNODE:
1653 return "Local";
1654 case IP_VS_CONN_F_TUNNEL:
1655 return "Tunnel";
1656 case IP_VS_CONN_F_DROUTE:
1657 return "Route";
1658 default:
1659 return "Masq";
1660 }
1661}
1662
1663
1664/* Get the Nth entry in the two lists */
1665static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1666{
1667 struct ip_vs_iter *iter = seq->private;
1668 int idx;
1669 struct ip_vs_service *svc;
1670
1671 /* look in hash by protocol */
1672 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1673 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1674 if (pos-- == 0){
1675 iter->table = ip_vs_svc_table;
1676 iter->bucket = idx;
1677 return svc;
1678 }
1679 }
1680 }
1681
1682 /* keep looking in fwmark */
1683 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1684 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1685 if (pos-- == 0) {
1686 iter->table = ip_vs_svc_fwm_table;
1687 iter->bucket = idx;
1688 return svc;
1689 }
1690 }
1691 }
1692
1693 return NULL;
1694}
1695
1696static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1697{
1698
1699 read_lock_bh(&__ip_vs_svc_lock);
1700 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1701}
1702
1703
1704static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1705{
1706 struct list_head *e;
1707 struct ip_vs_iter *iter;
1708 struct ip_vs_service *svc;
1709
1710 ++*pos;
1711 if (v == SEQ_START_TOKEN)
1712 return ip_vs_info_array(seq,0);
1713
1714 svc = v;
1715 iter = seq->private;
1716
1717 if (iter->table == ip_vs_svc_table) {
1718 /* next service in table hashed by protocol */
1719 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1720 return list_entry(e, struct ip_vs_service, s_list);
1721
1722
1723 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1724 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1725 s_list) {
1726 return svc;
1727 }
1728 }
1729
1730 iter->table = ip_vs_svc_fwm_table;
1731 iter->bucket = -1;
1732 goto scan_fwmark;
1733 }
1734
1735 /* next service in hashed by fwmark */
1736 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1737 return list_entry(e, struct ip_vs_service, f_list);
1738
1739 scan_fwmark:
1740 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1741 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1742 f_list)
1743 return svc;
1744 }
1745
1746 return NULL;
1747}
1748
1749static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1750{
1751 read_unlock_bh(&__ip_vs_svc_lock);
1752}
1753
1754
1755static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1756{
1757 if (v == SEQ_START_TOKEN) {
1758 seq_printf(seq,
1759 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1760 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1761 seq_puts(seq,
1762 "Prot LocalAddress:Port Scheduler Flags\n");
1763 seq_puts(seq,
1764 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1765 } else {
1766 const struct ip_vs_service *svc = v;
1767 const struct ip_vs_iter *iter = seq->private;
1768 const struct ip_vs_dest *dest;
1769
1770 if (iter->table == ip_vs_svc_table)
1771 seq_printf(seq, "%s %08X:%04X %s ",
1772 ip_vs_proto_name(svc->protocol),
e7ade46a 1773 ntohl(svc->addr.ip),
1da177e4
LT
1774 ntohs(svc->port),
1775 svc->scheduler->name);
1776 else
1777 seq_printf(seq, "FWM %08X %s ",
1778 svc->fwmark, svc->scheduler->name);
1779
1780 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1781 seq_printf(seq, "persistent %d %08X\n",
1782 svc->timeout,
1783 ntohl(svc->netmask));
1784 else
1785 seq_putc(seq, '\n');
1786
1787 list_for_each_entry(dest, &svc->destinations, n_list) {
1788 seq_printf(seq,
1789 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
e7ade46a 1790 ntohl(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1791 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1792 atomic_read(&dest->weight),
1793 atomic_read(&dest->activeconns),
1794 atomic_read(&dest->inactconns));
1795 }
1796 }
1797 return 0;
1798}
1799
56b3d975 1800static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1801 .start = ip_vs_info_seq_start,
1802 .next = ip_vs_info_seq_next,
1803 .stop = ip_vs_info_seq_stop,
1804 .show = ip_vs_info_seq_show,
1805};
1806
1807static int ip_vs_info_open(struct inode *inode, struct file *file)
1808{
cf7732e4
PE
1809 return seq_open_private(file, &ip_vs_info_seq_ops,
1810 sizeof(struct ip_vs_iter));
1da177e4
LT
1811}
1812
9a32144e 1813static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1814 .owner = THIS_MODULE,
1815 .open = ip_vs_info_open,
1816 .read = seq_read,
1817 .llseek = seq_lseek,
1818 .release = seq_release_private,
1819};
1820
1821#endif
1822
519e49e8
SW
1823struct ip_vs_stats ip_vs_stats = {
1824 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1825};
1da177e4
LT
1826
1827#ifdef CONFIG_PROC_FS
1828static int ip_vs_stats_show(struct seq_file *seq, void *v)
1829{
1830
1831/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1832 seq_puts(seq,
1833 " Total Incoming Outgoing Incoming Outgoing\n");
1834 seq_printf(seq,
1835 " Conns Packets Packets Bytes Bytes\n");
1836
1837 spin_lock_bh(&ip_vs_stats.lock);
1838 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1839 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1840 (unsigned long long) ip_vs_stats.inbytes,
1841 (unsigned long long) ip_vs_stats.outbytes);
1842
1843/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1844 seq_puts(seq,
1845 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1846 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1847 ip_vs_stats.cps,
1848 ip_vs_stats.inpps,
1849 ip_vs_stats.outpps,
1850 ip_vs_stats.inbps,
1851 ip_vs_stats.outbps);
1852 spin_unlock_bh(&ip_vs_stats.lock);
1853
1854 return 0;
1855}
1856
1857static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1858{
1859 return single_open(file, ip_vs_stats_show, NULL);
1860}
1861
9a32144e 1862static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1863 .owner = THIS_MODULE,
1864 .open = ip_vs_stats_seq_open,
1865 .read = seq_read,
1866 .llseek = seq_lseek,
1867 .release = single_release,
1868};
1869
1870#endif
1871
1872/*
1873 * Set timeout values for tcp tcpfin udp in the timeout_table.
1874 */
1875static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1876{
1877 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1878 u->tcp_timeout,
1879 u->tcp_fin_timeout,
1880 u->udp_timeout);
1881
1882#ifdef CONFIG_IP_VS_PROTO_TCP
1883 if (u->tcp_timeout) {
1884 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1885 = u->tcp_timeout * HZ;
1886 }
1887
1888 if (u->tcp_fin_timeout) {
1889 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1890 = u->tcp_fin_timeout * HZ;
1891 }
1892#endif
1893
1894#ifdef CONFIG_IP_VS_PROTO_UDP
1895 if (u->udp_timeout) {
1896 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1897 = u->udp_timeout * HZ;
1898 }
1899#endif
1900 return 0;
1901}
1902
1903
1904#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1905#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1906#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1907 sizeof(struct ip_vs_dest_user))
1908#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1909#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1910#define MAX_ARG_LEN SVCDEST_ARG_LEN
1911
9b5b5cff 1912static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1913 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1914 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1915 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1916 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1917 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1918 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1919 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1920 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1921 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1922 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1923 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1924};
1925
c860c6b1
JV
1926static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
1927 struct ip_vs_service_user *usvc_compat)
1928{
1929 usvc->af = AF_INET;
1930 usvc->protocol = usvc_compat->protocol;
1931 usvc->addr.ip = usvc_compat->addr;
1932 usvc->port = usvc_compat->port;
1933 usvc->fwmark = usvc_compat->fwmark;
1934
1935 /* Deep copy of sched_name is not needed here */
1936 usvc->sched_name = usvc_compat->sched_name;
1937
1938 usvc->flags = usvc_compat->flags;
1939 usvc->timeout = usvc_compat->timeout;
1940 usvc->netmask = usvc_compat->netmask;
1941}
1942
1943static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
1944 struct ip_vs_dest_user *udest_compat)
1945{
1946 udest->addr.ip = udest_compat->addr;
1947 udest->port = udest_compat->port;
1948 udest->conn_flags = udest_compat->conn_flags;
1949 udest->weight = udest_compat->weight;
1950 udest->u_threshold = udest_compat->u_threshold;
1951 udest->l_threshold = udest_compat->l_threshold;
1952}
1953
1da177e4
LT
1954static int
1955do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1956{
1957 int ret;
1958 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
1959 struct ip_vs_service_user *usvc_compat;
1960 struct ip_vs_service_user_kern usvc;
1da177e4 1961 struct ip_vs_service *svc;
c860c6b1
JV
1962 struct ip_vs_dest_user *udest_compat;
1963 struct ip_vs_dest_user_kern udest;
1da177e4
LT
1964
1965 if (!capable(CAP_NET_ADMIN))
1966 return -EPERM;
1967
1968 if (len != set_arglen[SET_CMDID(cmd)]) {
1969 IP_VS_ERR("set_ctl: len %u != %u\n",
1970 len, set_arglen[SET_CMDID(cmd)]);
1971 return -EINVAL;
1972 }
1973
1974 if (copy_from_user(arg, user, len) != 0)
1975 return -EFAULT;
1976
1977 /* increase the module use count */
1978 ip_vs_use_count_inc();
1979
14cc3e2b 1980 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
1981 ret = -ERESTARTSYS;
1982 goto out_dec;
1983 }
1984
1985 if (cmd == IP_VS_SO_SET_FLUSH) {
1986 /* Flush the virtual service */
1987 ret = ip_vs_flush();
1988 goto out_unlock;
1989 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1990 /* Set timeout values for (tcp tcpfin udp) */
1991 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1992 goto out_unlock;
1993 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1994 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1995 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1996 goto out_unlock;
1997 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
1998 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1999 ret = stop_sync_thread(dm->state);
2000 goto out_unlock;
2001 }
2002
c860c6b1
JV
2003 usvc_compat = (struct ip_vs_service_user *)arg;
2004 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2005
2006 /* We only use the new structs internally, so copy userspace compat
2007 * structs to extended internal versions */
2008 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2009 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2010
2011 if (cmd == IP_VS_SO_SET_ZERO) {
2012 /* if no service address is set, zero counters in all */
c860c6b1 2013 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2014 ret = ip_vs_zero_all();
2015 goto out_unlock;
2016 }
2017 }
2018
2019 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2020 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2021 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2022 usvc.protocol, NIPQUAD(usvc.addr.ip),
2023 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2024 ret = -EFAULT;
2025 goto out_unlock;
2026 }
2027
2028 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2029 if (usvc.fwmark == 0)
b18610de
JV
2030 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2031 &usvc.addr, usvc.port);
1da177e4 2032 else
b18610de 2033 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2034
2035 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2036 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2037 ret = -ESRCH;
2038 goto out_unlock;
2039 }
2040
2041 switch (cmd) {
2042 case IP_VS_SO_SET_ADD:
2043 if (svc != NULL)
2044 ret = -EEXIST;
2045 else
c860c6b1 2046 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2047 break;
2048 case IP_VS_SO_SET_EDIT:
c860c6b1 2049 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2050 break;
2051 case IP_VS_SO_SET_DEL:
2052 ret = ip_vs_del_service(svc);
2053 if (!ret)
2054 goto out_unlock;
2055 break;
2056 case IP_VS_SO_SET_ZERO:
2057 ret = ip_vs_zero_service(svc);
2058 break;
2059 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2060 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2061 break;
2062 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2063 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2064 break;
2065 case IP_VS_SO_SET_DELDEST:
c860c6b1 2066 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2067 break;
2068 default:
2069 ret = -EINVAL;
2070 }
2071
2072 if (svc)
2073 ip_vs_service_put(svc);
2074
2075 out_unlock:
14cc3e2b 2076 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2077 out_dec:
2078 /* decrease the module use count */
2079 ip_vs_use_count_dec();
2080
2081 return ret;
2082}
2083
2084
2085static void
2086ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2087{
2088 spin_lock_bh(&src->lock);
2089 memcpy(dst, src, (char*)&src->lock - (char*)src);
2090 spin_unlock_bh(&src->lock);
2091}
2092
2093static void
2094ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2095{
2096 dst->protocol = src->protocol;
e7ade46a 2097 dst->addr = src->addr.ip;
1da177e4
LT
2098 dst->port = src->port;
2099 dst->fwmark = src->fwmark;
4da62fc7 2100 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2101 dst->flags = src->flags;
2102 dst->timeout = src->timeout / HZ;
2103 dst->netmask = src->netmask;
2104 dst->num_dests = src->num_dests;
2105 ip_vs_copy_stats(&dst->stats, &src->stats);
2106}
2107
2108static inline int
2109__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2110 struct ip_vs_get_services __user *uptr)
2111{
2112 int idx, count=0;
2113 struct ip_vs_service *svc;
2114 struct ip_vs_service_entry entry;
2115 int ret = 0;
2116
2117 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2118 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2119 if (count >= get->num_services)
2120 goto out;
4da62fc7 2121 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2122 ip_vs_copy_service(&entry, svc);
2123 if (copy_to_user(&uptr->entrytable[count],
2124 &entry, sizeof(entry))) {
2125 ret = -EFAULT;
2126 goto out;
2127 }
2128 count++;
2129 }
2130 }
2131
2132 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2133 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2134 if (count >= get->num_services)
2135 goto out;
4da62fc7 2136 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2137 ip_vs_copy_service(&entry, svc);
2138 if (copy_to_user(&uptr->entrytable[count],
2139 &entry, sizeof(entry))) {
2140 ret = -EFAULT;
2141 goto out;
2142 }
2143 count++;
2144 }
2145 }
2146 out:
2147 return ret;
2148}
2149
2150static inline int
2151__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2152 struct ip_vs_get_dests __user *uptr)
2153{
2154 struct ip_vs_service *svc;
b18610de 2155 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2156 int ret = 0;
2157
2158 if (get->fwmark)
b18610de 2159 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2160 else
b18610de
JV
2161 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2162 get->port);
2163
1da177e4
LT
2164 if (svc) {
2165 int count = 0;
2166 struct ip_vs_dest *dest;
2167 struct ip_vs_dest_entry entry;
2168
2169 list_for_each_entry(dest, &svc->destinations, n_list) {
2170 if (count >= get->num_dests)
2171 break;
2172
e7ade46a 2173 entry.addr = dest->addr.ip;
1da177e4
LT
2174 entry.port = dest->port;
2175 entry.conn_flags = atomic_read(&dest->conn_flags);
2176 entry.weight = atomic_read(&dest->weight);
2177 entry.u_threshold = dest->u_threshold;
2178 entry.l_threshold = dest->l_threshold;
2179 entry.activeconns = atomic_read(&dest->activeconns);
2180 entry.inactconns = atomic_read(&dest->inactconns);
2181 entry.persistconns = atomic_read(&dest->persistconns);
2182 ip_vs_copy_stats(&entry.stats, &dest->stats);
2183 if (copy_to_user(&uptr->entrytable[count],
2184 &entry, sizeof(entry))) {
2185 ret = -EFAULT;
2186 break;
2187 }
2188 count++;
2189 }
2190 ip_vs_service_put(svc);
2191 } else
2192 ret = -ESRCH;
2193 return ret;
2194}
2195
2196static inline void
2197__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2198{
2199#ifdef CONFIG_IP_VS_PROTO_TCP
2200 u->tcp_timeout =
2201 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2202 u->tcp_fin_timeout =
2203 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2204#endif
2205#ifdef CONFIG_IP_VS_PROTO_UDP
2206 u->udp_timeout =
2207 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2208#endif
2209}
2210
2211
2212#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2213#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2214#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2215#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2216#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2217#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2218#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2219
9b5b5cff 2220static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2221 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2222 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2223 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2224 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2225 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2226 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2227 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2228};
2229
2230static int
2231do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2232{
2233 unsigned char arg[128];
2234 int ret = 0;
2235
2236 if (!capable(CAP_NET_ADMIN))
2237 return -EPERM;
2238
2239 if (*len < get_arglen[GET_CMDID(cmd)]) {
2240 IP_VS_ERR("get_ctl: len %u < %u\n",
2241 *len, get_arglen[GET_CMDID(cmd)]);
2242 return -EINVAL;
2243 }
2244
2245 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2246 return -EFAULT;
2247
14cc3e2b 2248 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2249 return -ERESTARTSYS;
2250
2251 switch (cmd) {
2252 case IP_VS_SO_GET_VERSION:
2253 {
2254 char buf[64];
2255
2256 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2257 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2258 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2259 ret = -EFAULT;
2260 goto out;
2261 }
2262 *len = strlen(buf)+1;
2263 }
2264 break;
2265
2266 case IP_VS_SO_GET_INFO:
2267 {
2268 struct ip_vs_getinfo info;
2269 info.version = IP_VS_VERSION_CODE;
2270 info.size = IP_VS_CONN_TAB_SIZE;
2271 info.num_services = ip_vs_num_services;
2272 if (copy_to_user(user, &info, sizeof(info)) != 0)
2273 ret = -EFAULT;
2274 }
2275 break;
2276
2277 case IP_VS_SO_GET_SERVICES:
2278 {
2279 struct ip_vs_get_services *get;
2280 int size;
2281
2282 get = (struct ip_vs_get_services *)arg;
2283 size = sizeof(*get) +
2284 sizeof(struct ip_vs_service_entry) * get->num_services;
2285 if (*len != size) {
2286 IP_VS_ERR("length: %u != %u\n", *len, size);
2287 ret = -EINVAL;
2288 goto out;
2289 }
2290 ret = __ip_vs_get_service_entries(get, user);
2291 }
2292 break;
2293
2294 case IP_VS_SO_GET_SERVICE:
2295 {
2296 struct ip_vs_service_entry *entry;
2297 struct ip_vs_service *svc;
b18610de 2298 union nf_inet_addr addr;
1da177e4
LT
2299
2300 entry = (struct ip_vs_service_entry *)arg;
b18610de 2301 addr.ip = entry->addr;
1da177e4 2302 if (entry->fwmark)
b18610de 2303 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2304 else
b18610de
JV
2305 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2306 &addr, entry->port);
1da177e4
LT
2307 if (svc) {
2308 ip_vs_copy_service(entry, svc);
2309 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2310 ret = -EFAULT;
2311 ip_vs_service_put(svc);
2312 } else
2313 ret = -ESRCH;
2314 }
2315 break;
2316
2317 case IP_VS_SO_GET_DESTS:
2318 {
2319 struct ip_vs_get_dests *get;
2320 int size;
2321
2322 get = (struct ip_vs_get_dests *)arg;
2323 size = sizeof(*get) +
2324 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2325 if (*len != size) {
2326 IP_VS_ERR("length: %u != %u\n", *len, size);
2327 ret = -EINVAL;
2328 goto out;
2329 }
2330 ret = __ip_vs_get_dest_entries(get, user);
2331 }
2332 break;
2333
2334 case IP_VS_SO_GET_TIMEOUT:
2335 {
2336 struct ip_vs_timeout_user t;
2337
2338 __ip_vs_get_timeouts(&t);
2339 if (copy_to_user(user, &t, sizeof(t)) != 0)
2340 ret = -EFAULT;
2341 }
2342 break;
2343
2344 case IP_VS_SO_GET_DAEMON:
2345 {
2346 struct ip_vs_daemon_user d[2];
2347
2348 memset(&d, 0, sizeof(d));
2349 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2350 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2351 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2352 d[0].syncid = ip_vs_master_syncid;
2353 }
2354 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2355 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2356 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2357 d[1].syncid = ip_vs_backup_syncid;
2358 }
2359 if (copy_to_user(user, &d, sizeof(d)) != 0)
2360 ret = -EFAULT;
2361 }
2362 break;
2363
2364 default:
2365 ret = -EINVAL;
2366 }
2367
2368 out:
14cc3e2b 2369 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2370 return ret;
2371}
2372
2373
2374static struct nf_sockopt_ops ip_vs_sockopts = {
2375 .pf = PF_INET,
2376 .set_optmin = IP_VS_BASE_CTL,
2377 .set_optmax = IP_VS_SO_SET_MAX+1,
2378 .set = do_ip_vs_set_ctl,
2379 .get_optmin = IP_VS_BASE_CTL,
2380 .get_optmax = IP_VS_SO_GET_MAX+1,
2381 .get = do_ip_vs_get_ctl,
16fcec35 2382 .owner = THIS_MODULE,
1da177e4
LT
2383};
2384
9a812198
JV
2385/*
2386 * Generic Netlink interface
2387 */
2388
2389/* IPVS genetlink family */
2390static struct genl_family ip_vs_genl_family = {
2391 .id = GENL_ID_GENERATE,
2392 .hdrsize = 0,
2393 .name = IPVS_GENL_NAME,
2394 .version = IPVS_GENL_VERSION,
2395 .maxattr = IPVS_CMD_MAX,
2396};
2397
2398/* Policy used for first-level command attributes */
2399static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2400 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2401 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2402 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2403 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2404 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2405 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2406};
2407
2408/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2409static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2410 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2411 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2412 .len = IP_VS_IFNAME_MAXLEN },
2413 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2414};
2415
2416/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2417static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2418 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2419 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2420 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2421 .len = sizeof(union nf_inet_addr) },
2422 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2423 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2424 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2425 .len = IP_VS_SCHEDNAME_MAXLEN },
2426 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2427 .len = sizeof(struct ip_vs_flags) },
2428 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2429 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2430 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2431};
2432
2433/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2434static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2435 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2436 .len = sizeof(union nf_inet_addr) },
2437 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2438 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2439 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2440 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2441 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2442 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2443 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2444 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2445 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2446};
2447
2448static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2449 struct ip_vs_stats *stats)
2450{
2451 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2452 if (!nl_stats)
2453 return -EMSGSIZE;
2454
2455 spin_lock_bh(&stats->lock);
2456
2457 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2458 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2459 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2460 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2461 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2462 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2463 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2464 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2465 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2466 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2467
2468 spin_unlock_bh(&stats->lock);
2469
2470 nla_nest_end(skb, nl_stats);
2471
2472 return 0;
2473
2474nla_put_failure:
2475 spin_unlock_bh(&stats->lock);
2476 nla_nest_cancel(skb, nl_stats);
2477 return -EMSGSIZE;
2478}
2479
2480static int ip_vs_genl_fill_service(struct sk_buff *skb,
2481 struct ip_vs_service *svc)
2482{
2483 struct nlattr *nl_service;
2484 struct ip_vs_flags flags = { .flags = svc->flags,
2485 .mask = ~0 };
2486
2487 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2488 if (!nl_service)
2489 return -EMSGSIZE;
2490
2491 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2492
2493 if (svc->fwmark) {
2494 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2495 } else {
2496 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2497 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2498 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2499 }
2500
2501 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2502 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2503 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2504 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2505
2506 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2507 goto nla_put_failure;
2508
2509 nla_nest_end(skb, nl_service);
2510
2511 return 0;
2512
2513nla_put_failure:
2514 nla_nest_cancel(skb, nl_service);
2515 return -EMSGSIZE;
2516}
2517
2518static int ip_vs_genl_dump_service(struct sk_buff *skb,
2519 struct ip_vs_service *svc,
2520 struct netlink_callback *cb)
2521{
2522 void *hdr;
2523
2524 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2525 &ip_vs_genl_family, NLM_F_MULTI,
2526 IPVS_CMD_NEW_SERVICE);
2527 if (!hdr)
2528 return -EMSGSIZE;
2529
2530 if (ip_vs_genl_fill_service(skb, svc) < 0)
2531 goto nla_put_failure;
2532
2533 return genlmsg_end(skb, hdr);
2534
2535nla_put_failure:
2536 genlmsg_cancel(skb, hdr);
2537 return -EMSGSIZE;
2538}
2539
2540static int ip_vs_genl_dump_services(struct sk_buff *skb,
2541 struct netlink_callback *cb)
2542{
2543 int idx = 0, i;
2544 int start = cb->args[0];
2545 struct ip_vs_service *svc;
2546
2547 mutex_lock(&__ip_vs_mutex);
2548 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2549 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2550 if (++idx <= start)
2551 continue;
2552 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2553 idx--;
2554 goto nla_put_failure;
2555 }
2556 }
2557 }
2558
2559 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2560 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2561 if (++idx <= start)
2562 continue;
2563 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2564 idx--;
2565 goto nla_put_failure;
2566 }
2567 }
2568 }
2569
2570nla_put_failure:
2571 mutex_unlock(&__ip_vs_mutex);
2572 cb->args[0] = idx;
2573
2574 return skb->len;
2575}
2576
c860c6b1 2577static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2578 struct nlattr *nla, int full_entry)
2579{
2580 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2581 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2582
2583 /* Parse mandatory identifying service fields first */
2584 if (nla == NULL ||
2585 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2586 return -EINVAL;
2587
2588 nla_af = attrs[IPVS_SVC_ATTR_AF];
2589 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2590 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2591 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2592 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2593
2594 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2595 return -EINVAL;
2596
c860c6b1 2597 usvc->af = nla_get_u16(nla_af);
9a812198
JV
2598 /* For now, only support IPv4 */
2599 if (nla_get_u16(nla_af) != AF_INET)
2600 return -EAFNOSUPPORT;
2601
2602 if (nla_fwmark) {
2603 usvc->protocol = IPPROTO_TCP;
2604 usvc->fwmark = nla_get_u32(nla_fwmark);
2605 } else {
2606 usvc->protocol = nla_get_u16(nla_protocol);
2607 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2608 usvc->port = nla_get_u16(nla_port);
2609 usvc->fwmark = 0;
2610 }
2611
2612 /* If a full entry was requested, check for the additional fields */
2613 if (full_entry) {
2614 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2615 *nla_netmask;
2616 struct ip_vs_flags flags;
2617 struct ip_vs_service *svc;
2618
2619 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2620 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2621 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2622 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2623
2624 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2625 return -EINVAL;
2626
2627 nla_memcpy(&flags, nla_flags, sizeof(flags));
2628
2629 /* prefill flags from service if it already exists */
2630 if (usvc->fwmark)
b18610de 2631 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2632 else
b18610de
JV
2633 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2634 &usvc->addr, usvc->port);
9a812198
JV
2635 if (svc) {
2636 usvc->flags = svc->flags;
2637 ip_vs_service_put(svc);
2638 } else
2639 usvc->flags = 0;
2640
2641 /* set new flags from userland */
2642 usvc->flags = (usvc->flags & ~flags.mask) |
2643 (flags.flags & flags.mask);
c860c6b1 2644 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2645 usvc->timeout = nla_get_u32(nla_timeout);
2646 usvc->netmask = nla_get_u32(nla_netmask);
2647 }
2648
2649 return 0;
2650}
2651
2652static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2653{
c860c6b1 2654 struct ip_vs_service_user_kern usvc;
9a812198
JV
2655 int ret;
2656
2657 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2658 if (ret)
2659 return ERR_PTR(ret);
2660
2661 if (usvc.fwmark)
b18610de 2662 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2663 else
b18610de
JV
2664 return __ip_vs_service_get(usvc.af, usvc.protocol,
2665 &usvc.addr, usvc.port);
9a812198
JV
2666}
2667
2668static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2669{
2670 struct nlattr *nl_dest;
2671
2672 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2673 if (!nl_dest)
2674 return -EMSGSIZE;
2675
2676 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2677 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2678
2679 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2680 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2681 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2682 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2683 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2684 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2685 atomic_read(&dest->activeconns));
2686 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2687 atomic_read(&dest->inactconns));
2688 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2689 atomic_read(&dest->persistconns));
2690
2691 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2692 goto nla_put_failure;
2693
2694 nla_nest_end(skb, nl_dest);
2695
2696 return 0;
2697
2698nla_put_failure:
2699 nla_nest_cancel(skb, nl_dest);
2700 return -EMSGSIZE;
2701}
2702
2703static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2704 struct netlink_callback *cb)
2705{
2706 void *hdr;
2707
2708 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2709 &ip_vs_genl_family, NLM_F_MULTI,
2710 IPVS_CMD_NEW_DEST);
2711 if (!hdr)
2712 return -EMSGSIZE;
2713
2714 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2715 goto nla_put_failure;
2716
2717 return genlmsg_end(skb, hdr);
2718
2719nla_put_failure:
2720 genlmsg_cancel(skb, hdr);
2721 return -EMSGSIZE;
2722}
2723
2724static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2725 struct netlink_callback *cb)
2726{
2727 int idx = 0;
2728 int start = cb->args[0];
2729 struct ip_vs_service *svc;
2730 struct ip_vs_dest *dest;
2731 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2732
2733 mutex_lock(&__ip_vs_mutex);
2734
2735 /* Try to find the service for which to dump destinations */
2736 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2737 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2738 goto out_err;
2739
2740 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2741 if (IS_ERR(svc) || svc == NULL)
2742 goto out_err;
2743
2744 /* Dump the destinations */
2745 list_for_each_entry(dest, &svc->destinations, n_list) {
2746 if (++idx <= start)
2747 continue;
2748 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2749 idx--;
2750 goto nla_put_failure;
2751 }
2752 }
2753
2754nla_put_failure:
2755 cb->args[0] = idx;
2756 ip_vs_service_put(svc);
2757
2758out_err:
2759 mutex_unlock(&__ip_vs_mutex);
2760
2761 return skb->len;
2762}
2763
c860c6b1 2764static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2765 struct nlattr *nla, int full_entry)
2766{
2767 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2768 struct nlattr *nla_addr, *nla_port;
2769
2770 /* Parse mandatory identifying destination fields first */
2771 if (nla == NULL ||
2772 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2773 return -EINVAL;
2774
2775 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2776 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2777
2778 if (!(nla_addr && nla_port))
2779 return -EINVAL;
2780
2781 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2782 udest->port = nla_get_u16(nla_port);
2783
2784 /* If a full entry was requested, check for the additional fields */
2785 if (full_entry) {
2786 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2787 *nla_l_thresh;
2788
2789 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2790 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2791 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2792 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2793
2794 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2795 return -EINVAL;
2796
2797 udest->conn_flags = nla_get_u32(nla_fwd)
2798 & IP_VS_CONN_F_FWD_MASK;
2799 udest->weight = nla_get_u32(nla_weight);
2800 udest->u_threshold = nla_get_u32(nla_u_thresh);
2801 udest->l_threshold = nla_get_u32(nla_l_thresh);
2802 }
2803
2804 return 0;
2805}
2806
2807static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2808 const char *mcast_ifn, __be32 syncid)
2809{
2810 struct nlattr *nl_daemon;
2811
2812 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2813 if (!nl_daemon)
2814 return -EMSGSIZE;
2815
2816 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2817 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2818 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2819
2820 nla_nest_end(skb, nl_daemon);
2821
2822 return 0;
2823
2824nla_put_failure:
2825 nla_nest_cancel(skb, nl_daemon);
2826 return -EMSGSIZE;
2827}
2828
2829static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2830 const char *mcast_ifn, __be32 syncid,
2831 struct netlink_callback *cb)
2832{
2833 void *hdr;
2834 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2835 &ip_vs_genl_family, NLM_F_MULTI,
2836 IPVS_CMD_NEW_DAEMON);
2837 if (!hdr)
2838 return -EMSGSIZE;
2839
2840 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2841 goto nla_put_failure;
2842
2843 return genlmsg_end(skb, hdr);
2844
2845nla_put_failure:
2846 genlmsg_cancel(skb, hdr);
2847 return -EMSGSIZE;
2848}
2849
2850static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2851 struct netlink_callback *cb)
2852{
2853 mutex_lock(&__ip_vs_mutex);
2854 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2855 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2856 ip_vs_master_mcast_ifn,
2857 ip_vs_master_syncid, cb) < 0)
2858 goto nla_put_failure;
2859
2860 cb->args[0] = 1;
2861 }
2862
2863 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2864 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2865 ip_vs_backup_mcast_ifn,
2866 ip_vs_backup_syncid, cb) < 0)
2867 goto nla_put_failure;
2868
2869 cb->args[1] = 1;
2870 }
2871
2872nla_put_failure:
2873 mutex_unlock(&__ip_vs_mutex);
2874
2875 return skb->len;
2876}
2877
2878static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2879{
2880 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2881 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2882 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2883 return -EINVAL;
2884
2885 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2886 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2887 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2888}
2889
2890static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2891{
2892 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2893 return -EINVAL;
2894
2895 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2896}
2897
2898static int ip_vs_genl_set_config(struct nlattr **attrs)
2899{
2900 struct ip_vs_timeout_user t;
2901
2902 __ip_vs_get_timeouts(&t);
2903
2904 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2905 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2906
2907 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2908 t.tcp_fin_timeout =
2909 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2910
2911 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2912 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2913
2914 return ip_vs_set_timeout(&t);
2915}
2916
2917static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2918{
2919 struct ip_vs_service *svc = NULL;
c860c6b1
JV
2920 struct ip_vs_service_user_kern usvc;
2921 struct ip_vs_dest_user_kern udest;
9a812198
JV
2922 int ret = 0, cmd;
2923 int need_full_svc = 0, need_full_dest = 0;
2924
2925 cmd = info->genlhdr->cmd;
2926
2927 mutex_lock(&__ip_vs_mutex);
2928
2929 if (cmd == IPVS_CMD_FLUSH) {
2930 ret = ip_vs_flush();
2931 goto out;
2932 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2933 ret = ip_vs_genl_set_config(info->attrs);
2934 goto out;
2935 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2936 cmd == IPVS_CMD_DEL_DAEMON) {
2937
2938 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2939
2940 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2941 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2942 info->attrs[IPVS_CMD_ATTR_DAEMON],
2943 ip_vs_daemon_policy)) {
2944 ret = -EINVAL;
2945 goto out;
2946 }
2947
2948 if (cmd == IPVS_CMD_NEW_DAEMON)
2949 ret = ip_vs_genl_new_daemon(daemon_attrs);
2950 else
2951 ret = ip_vs_genl_del_daemon(daemon_attrs);
2952 goto out;
2953 } else if (cmd == IPVS_CMD_ZERO &&
2954 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2955 ret = ip_vs_zero_all();
2956 goto out;
2957 }
2958
2959 /* All following commands require a service argument, so check if we
2960 * received a valid one. We need a full service specification when
2961 * adding / editing a service. Only identifying members otherwise. */
2962 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2963 need_full_svc = 1;
2964
2965 ret = ip_vs_genl_parse_service(&usvc,
2966 info->attrs[IPVS_CMD_ATTR_SERVICE],
2967 need_full_svc);
2968 if (ret)
2969 goto out;
2970
2971 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2972 if (usvc.fwmark == 0)
b18610de
JV
2973 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2974 &usvc.addr, usvc.port);
9a812198 2975 else
b18610de 2976 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
2977
2978 /* Unless we're adding a new service, the service must already exist */
2979 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2980 ret = -ESRCH;
2981 goto out;
2982 }
2983
2984 /* Destination commands require a valid destination argument. For
2985 * adding / editing a destination, we need a full destination
2986 * specification. */
2987 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2988 cmd == IPVS_CMD_DEL_DEST) {
2989 if (cmd != IPVS_CMD_DEL_DEST)
2990 need_full_dest = 1;
2991
2992 ret = ip_vs_genl_parse_dest(&udest,
2993 info->attrs[IPVS_CMD_ATTR_DEST],
2994 need_full_dest);
2995 if (ret)
2996 goto out;
2997 }
2998
2999 switch (cmd) {
3000 case IPVS_CMD_NEW_SERVICE:
3001 if (svc == NULL)
3002 ret = ip_vs_add_service(&usvc, &svc);
3003 else
3004 ret = -EEXIST;
3005 break;
3006 case IPVS_CMD_SET_SERVICE:
3007 ret = ip_vs_edit_service(svc, &usvc);
3008 break;
3009 case IPVS_CMD_DEL_SERVICE:
3010 ret = ip_vs_del_service(svc);
3011 break;
3012 case IPVS_CMD_NEW_DEST:
3013 ret = ip_vs_add_dest(svc, &udest);
3014 break;
3015 case IPVS_CMD_SET_DEST:
3016 ret = ip_vs_edit_dest(svc, &udest);
3017 break;
3018 case IPVS_CMD_DEL_DEST:
3019 ret = ip_vs_del_dest(svc, &udest);
3020 break;
3021 case IPVS_CMD_ZERO:
3022 ret = ip_vs_zero_service(svc);
3023 break;
3024 default:
3025 ret = -EINVAL;
3026 }
3027
3028out:
3029 if (svc)
3030 ip_vs_service_put(svc);
3031 mutex_unlock(&__ip_vs_mutex);
3032
3033 return ret;
3034}
3035
3036static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3037{
3038 struct sk_buff *msg;
3039 void *reply;
3040 int ret, cmd, reply_cmd;
3041
3042 cmd = info->genlhdr->cmd;
3043
3044 if (cmd == IPVS_CMD_GET_SERVICE)
3045 reply_cmd = IPVS_CMD_NEW_SERVICE;
3046 else if (cmd == IPVS_CMD_GET_INFO)
3047 reply_cmd = IPVS_CMD_SET_INFO;
3048 else if (cmd == IPVS_CMD_GET_CONFIG)
3049 reply_cmd = IPVS_CMD_SET_CONFIG;
3050 else {
3051 IP_VS_ERR("unknown Generic Netlink command\n");
3052 return -EINVAL;
3053 }
3054
3055 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3056 if (!msg)
3057 return -ENOMEM;
3058
3059 mutex_lock(&__ip_vs_mutex);
3060
3061 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3062 if (reply == NULL)
3063 goto nla_put_failure;
3064
3065 switch (cmd) {
3066 case IPVS_CMD_GET_SERVICE:
3067 {
3068 struct ip_vs_service *svc;
3069
3070 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3071 if (IS_ERR(svc)) {
3072 ret = PTR_ERR(svc);
3073 goto out_err;
3074 } else if (svc) {
3075 ret = ip_vs_genl_fill_service(msg, svc);
3076 ip_vs_service_put(svc);
3077 if (ret)
3078 goto nla_put_failure;
3079 } else {
3080 ret = -ESRCH;
3081 goto out_err;
3082 }
3083
3084 break;
3085 }
3086
3087 case IPVS_CMD_GET_CONFIG:
3088 {
3089 struct ip_vs_timeout_user t;
3090
3091 __ip_vs_get_timeouts(&t);
3092#ifdef CONFIG_IP_VS_PROTO_TCP
3093 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3094 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3095 t.tcp_fin_timeout);
3096#endif
3097#ifdef CONFIG_IP_VS_PROTO_UDP
3098 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3099#endif
3100
3101 break;
3102 }
3103
3104 case IPVS_CMD_GET_INFO:
3105 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3106 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3107 IP_VS_CONN_TAB_SIZE);
3108 break;
3109 }
3110
3111 genlmsg_end(msg, reply);
3112 ret = genlmsg_unicast(msg, info->snd_pid);
3113 goto out;
3114
3115nla_put_failure:
3116 IP_VS_ERR("not enough space in Netlink message\n");
3117 ret = -EMSGSIZE;
3118
3119out_err:
3120 nlmsg_free(msg);
3121out:
3122 mutex_unlock(&__ip_vs_mutex);
3123
3124 return ret;
3125}
3126
3127
3128static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3129 {
3130 .cmd = IPVS_CMD_NEW_SERVICE,
3131 .flags = GENL_ADMIN_PERM,
3132 .policy = ip_vs_cmd_policy,
3133 .doit = ip_vs_genl_set_cmd,
3134 },
3135 {
3136 .cmd = IPVS_CMD_SET_SERVICE,
3137 .flags = GENL_ADMIN_PERM,
3138 .policy = ip_vs_cmd_policy,
3139 .doit = ip_vs_genl_set_cmd,
3140 },
3141 {
3142 .cmd = IPVS_CMD_DEL_SERVICE,
3143 .flags = GENL_ADMIN_PERM,
3144 .policy = ip_vs_cmd_policy,
3145 .doit = ip_vs_genl_set_cmd,
3146 },
3147 {
3148 .cmd = IPVS_CMD_GET_SERVICE,
3149 .flags = GENL_ADMIN_PERM,
3150 .doit = ip_vs_genl_get_cmd,
3151 .dumpit = ip_vs_genl_dump_services,
3152 .policy = ip_vs_cmd_policy,
3153 },
3154 {
3155 .cmd = IPVS_CMD_NEW_DEST,
3156 .flags = GENL_ADMIN_PERM,
3157 .policy = ip_vs_cmd_policy,
3158 .doit = ip_vs_genl_set_cmd,
3159 },
3160 {
3161 .cmd = IPVS_CMD_SET_DEST,
3162 .flags = GENL_ADMIN_PERM,
3163 .policy = ip_vs_cmd_policy,
3164 .doit = ip_vs_genl_set_cmd,
3165 },
3166 {
3167 .cmd = IPVS_CMD_DEL_DEST,
3168 .flags = GENL_ADMIN_PERM,
3169 .policy = ip_vs_cmd_policy,
3170 .doit = ip_vs_genl_set_cmd,
3171 },
3172 {
3173 .cmd = IPVS_CMD_GET_DEST,
3174 .flags = GENL_ADMIN_PERM,
3175 .policy = ip_vs_cmd_policy,
3176 .dumpit = ip_vs_genl_dump_dests,
3177 },
3178 {
3179 .cmd = IPVS_CMD_NEW_DAEMON,
3180 .flags = GENL_ADMIN_PERM,
3181 .policy = ip_vs_cmd_policy,
3182 .doit = ip_vs_genl_set_cmd,
3183 },
3184 {
3185 .cmd = IPVS_CMD_DEL_DAEMON,
3186 .flags = GENL_ADMIN_PERM,
3187 .policy = ip_vs_cmd_policy,
3188 .doit = ip_vs_genl_set_cmd,
3189 },
3190 {
3191 .cmd = IPVS_CMD_GET_DAEMON,
3192 .flags = GENL_ADMIN_PERM,
3193 .dumpit = ip_vs_genl_dump_daemons,
3194 },
3195 {
3196 .cmd = IPVS_CMD_SET_CONFIG,
3197 .flags = GENL_ADMIN_PERM,
3198 .policy = ip_vs_cmd_policy,
3199 .doit = ip_vs_genl_set_cmd,
3200 },
3201 {
3202 .cmd = IPVS_CMD_GET_CONFIG,
3203 .flags = GENL_ADMIN_PERM,
3204 .doit = ip_vs_genl_get_cmd,
3205 },
3206 {
3207 .cmd = IPVS_CMD_GET_INFO,
3208 .flags = GENL_ADMIN_PERM,
3209 .doit = ip_vs_genl_get_cmd,
3210 },
3211 {
3212 .cmd = IPVS_CMD_ZERO,
3213 .flags = GENL_ADMIN_PERM,
3214 .policy = ip_vs_cmd_policy,
3215 .doit = ip_vs_genl_set_cmd,
3216 },
3217 {
3218 .cmd = IPVS_CMD_FLUSH,
3219 .flags = GENL_ADMIN_PERM,
3220 .doit = ip_vs_genl_set_cmd,
3221 },
3222};
3223
3224static int __init ip_vs_genl_register(void)
3225{
3226 int ret, i;
3227
3228 ret = genl_register_family(&ip_vs_genl_family);
3229 if (ret)
3230 return ret;
3231
3232 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3233 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3234 if (ret)
3235 goto err_out;
3236 }
3237 return 0;
3238
3239err_out:
3240 genl_unregister_family(&ip_vs_genl_family);
3241 return ret;
3242}
3243
3244static void ip_vs_genl_unregister(void)
3245{
3246 genl_unregister_family(&ip_vs_genl_family);
3247}
3248
3249/* End of Generic Netlink interface definitions */
3250
1da177e4 3251
048cf48b 3252int __init ip_vs_control_init(void)
1da177e4
LT
3253{
3254 int ret;
3255 int idx;
3256
3257 EnterFunction(2);
3258
3259 ret = nf_register_sockopt(&ip_vs_sockopts);
3260 if (ret) {
3261 IP_VS_ERR("cannot register sockopt.\n");
3262 return ret;
3263 }
3264
9a812198
JV
3265 ret = ip_vs_genl_register();
3266 if (ret) {
3267 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3268 nf_unregister_sockopt(&ip_vs_sockopts);
3269 return ret;
3270 }
3271
457c4cbc
EB
3272 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3273 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3274
90754f8e 3275 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3276
3277 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3278 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3279 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3280 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3281 }
3282 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3283 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3284 }
3285
1da177e4
LT
3286 ip_vs_new_estimator(&ip_vs_stats);
3287
3288 /* Hook the defense timer */
3289 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3290
3291 LeaveFunction(2);
3292 return 0;
3293}
3294
3295
3296void ip_vs_control_cleanup(void)
3297{
3298 EnterFunction(2);
3299 ip_vs_trash_cleanup();
3300 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3301 cancel_work_sync(&defense_work.work);
1da177e4
LT
3302 ip_vs_kill_estimator(&ip_vs_stats);
3303 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3304 proc_net_remove(&init_net, "ip_vs_stats");
3305 proc_net_remove(&init_net, "ip_vs");
9a812198 3306 ip_vs_genl_unregister();
1da177e4
LT
3307 nf_unregister_sockopt(&ip_vs_sockopts);
3308 LeaveFunction(2);
3309}