]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Add/adjust Netfilter hook functions and helpers for v6
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4 39#include <net/sock.h>
9a812198 40#include <net/genetlink.h>
1da177e4
LT
41
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 47static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
48
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
af9debd4
JA
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
1da177e4
LT
97 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
af9debd4
JA
116 local_bh_disable();
117
1da177e4
LT
118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
214
215 local_bh_enable();
1da177e4
LT
216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 225
c4028958 226static void defense_work_handler(struct work_struct *work)
1da177e4
LT
227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
b18610de
JV
285ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
286 __be16 port)
1da177e4
LT
287{
288 register unsigned porth = ntohs(port);
b18610de 289 __be32 addr_fold = addr->ip;
1da177e4 290
b18610de
JV
291#ifdef CONFIG_IP_VS_IPV6
292 if (af == AF_INET6)
293 addr_fold = addr->ip6[0]^addr->ip6[1]^
294 addr->ip6[2]^addr->ip6[3];
295#endif
296
297 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
298 & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Returns hash value of fwmark for virtual service lookup
303 */
304static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
305{
306 return fwmark & IP_VS_SVC_TAB_MASK;
307}
308
309/*
310 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
311 * or in the ip_vs_svc_fwm_table by fwmark.
312 * Should be called with locked tables.
313 */
314static int ip_vs_svc_hash(struct ip_vs_service *svc)
315{
316 unsigned hash;
317
318 if (svc->flags & IP_VS_SVC_F_HASHED) {
319 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
320 "called from %p\n", __builtin_return_address(0));
321 return 0;
322 }
323
324 if (svc->fwmark == 0) {
325 /*
326 * Hash it by <protocol,addr,port> in ip_vs_svc_table
327 */
b18610de 328 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 329 svc->port);
1da177e4
LT
330 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
331 } else {
332 /*
333 * Hash it by fwmark in ip_vs_svc_fwm_table
334 */
335 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
336 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
337 }
338
339 svc->flags |= IP_VS_SVC_F_HASHED;
340 /* increase its refcnt because it is referenced by the svc table */
341 atomic_inc(&svc->refcnt);
342 return 1;
343}
344
345
346/*
347 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
348 * Should be called with locked tables.
349 */
350static int ip_vs_svc_unhash(struct ip_vs_service *svc)
351{
352 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
353 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
354 "called from %p\n", __builtin_return_address(0));
355 return 0;
356 }
357
358 if (svc->fwmark == 0) {
359 /* Remove it from the ip_vs_svc_table table */
360 list_del(&svc->s_list);
361 } else {
362 /* Remove it from the ip_vs_svc_fwm_table table */
363 list_del(&svc->f_list);
364 }
365
366 svc->flags &= ~IP_VS_SVC_F_HASHED;
367 atomic_dec(&svc->refcnt);
368 return 1;
369}
370
371
372/*
373 * Get service by {proto,addr,port} in the service table.
374 */
b18610de
JV
375static inline struct ip_vs_service *
376__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
377 __be16 vport)
1da177e4
LT
378{
379 unsigned hash;
380 struct ip_vs_service *svc;
381
382 /* Check for "full" addressed entries */
b18610de 383 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
384
385 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
386 if ((svc->af == af)
387 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
388 && (svc->port == vport)
389 && (svc->protocol == protocol)) {
390 /* HIT */
391 atomic_inc(&svc->usecnt);
392 return svc;
393 }
394 }
395
396 return NULL;
397}
398
399
400/*
401 * Get service by {fwmark} in the service table.
402 */
b18610de
JV
403static inline struct ip_vs_service *
404__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
405{
406 unsigned hash;
407 struct ip_vs_service *svc;
408
409 /* Check for fwmark addressed entries */
410 hash = ip_vs_svc_fwm_hashkey(fwmark);
411
412 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 413 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423struct ip_vs_service *
3c2e0505
JV
424ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
425 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
426{
427 struct ip_vs_service *svc;
3c2e0505 428
1da177e4
LT
429 read_lock(&__ip_vs_svc_lock);
430
431 /*
432 * Check the table hashed by fwmark first
433 */
3c2e0505 434 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
435 goto out;
436
437 /*
438 * Check the table hashed by <protocol,addr,port>
439 * for "full" addressed entries
440 */
3c2e0505 441 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
442
443 if (svc == NULL
444 && protocol == IPPROTO_TCP
445 && atomic_read(&ip_vs_ftpsvc_counter)
446 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
447 /*
448 * Check if ftp service entry exists, the packet
449 * might belong to FTP data connections.
450 */
3c2e0505 451 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
452 }
453
454 if (svc == NULL
455 && atomic_read(&ip_vs_nullsvc_counter)) {
456 /*
457 * Check if the catch-all port (port zero) exists
458 */
3c2e0505 459 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
460 }
461
462 out:
463 read_unlock(&__ip_vs_svc_lock);
464
3c2e0505
JV
465 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
466 fwmark, ip_vs_proto_name(protocol),
467 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
468 svc ? "hit" : "not hit");
1da177e4
LT
469
470 return svc;
471}
472
473
474static inline void
475__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
476{
477 atomic_inc(&svc->refcnt);
478 dest->svc = svc;
479}
480
481static inline void
482__ip_vs_unbind_svc(struct ip_vs_dest *dest)
483{
484 struct ip_vs_service *svc = dest->svc;
485
486 dest->svc = NULL;
487 if (atomic_dec_and_test(&svc->refcnt))
488 kfree(svc);
489}
490
491
492/*
493 * Returns hash value for real service
494 */
014d730d 495static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port)
1da177e4
LT
496{
497 register unsigned porth = ntohs(port);
498
499 return (ntohl(addr)^(porth>>IP_VS_RTAB_BITS)^porth)
500 & IP_VS_RTAB_MASK;
501}
502
503/*
504 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
505 * should be called with locked tables.
506 */
507static int ip_vs_rs_hash(struct ip_vs_dest *dest)
508{
509 unsigned hash;
510
511 if (!list_empty(&dest->d_list)) {
512 return 0;
513 }
514
515 /*
516 * Hash by proto,addr,port,
517 * which are the parameters of the real service.
518 */
e7ade46a 519 hash = ip_vs_rs_hashkey(dest->addr.ip, dest->port);
1da177e4
LT
520 list_add(&dest->d_list, &ip_vs_rtable[hash]);
521
522 return 1;
523}
524
525/*
526 * UNhashes ip_vs_dest from ip_vs_rtable.
527 * should be called with locked tables.
528 */
529static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
530{
531 /*
532 * Remove it from the ip_vs_rtable table.
533 */
534 if (!list_empty(&dest->d_list)) {
535 list_del(&dest->d_list);
536 INIT_LIST_HEAD(&dest->d_list);
537 }
538
539 return 1;
540}
541
542/*
543 * Lookup real service by <proto,addr,port> in the real service table.
544 */
545struct ip_vs_dest *
014d730d 546ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport)
1da177e4
LT
547{
548 unsigned hash;
549 struct ip_vs_dest *dest;
550
551 /*
552 * Check for "full" addressed entries
553 * Return the first found entry
554 */
555 hash = ip_vs_rs_hashkey(daddr, dport);
556
557 read_lock(&__ip_vs_rs_lock);
558 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
e7ade46a 559 if ((dest->addr.ip == daddr)
1da177e4
LT
560 && (dest->port == dport)
561 && ((dest->protocol == protocol) ||
562 dest->vfwmark)) {
563 /* HIT */
564 read_unlock(&__ip_vs_rs_lock);
565 return dest;
566 }
567 }
568 read_unlock(&__ip_vs_rs_lock);
569
570 return NULL;
571}
572
573/*
574 * Lookup destination by {addr,port} in the given service
575 */
576static struct ip_vs_dest *
014d730d 577ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
578{
579 struct ip_vs_dest *dest;
580
581 /*
582 * Find the destination for the given service
583 */
584 list_for_each_entry(dest, &svc->destinations, n_list) {
e7ade46a 585 if ((dest->addr.ip == daddr) && (dest->port == dport)) {
1da177e4
LT
586 /* HIT */
587 return dest;
588 }
589 }
590
591 return NULL;
592}
593
1e356f9c
RB
594/*
595 * Find destination by {daddr,dport,vaddr,protocol}
596 * Cretaed to be used in ip_vs_process_message() in
597 * the backup synchronization daemon. It finds the
598 * destination to be bound to the received connection
599 * on the backup.
600 *
601 * ip_vs_lookup_real_service() looked promissing, but
602 * seems not working as expected.
603 */
604struct ip_vs_dest *ip_vs_find_dest(__be32 daddr, __be16 dport,
605 __be32 vaddr, __be16 vport, __u16 protocol)
606{
607 struct ip_vs_dest *dest;
608 struct ip_vs_service *svc;
3c2e0505 609 union nf_inet_addr _vaddr = { .ip = vaddr };
1e356f9c 610
3c2e0505 611 svc = ip_vs_service_get(AF_INET, 0, protocol, &_vaddr, vport);
1e356f9c
RB
612 if (!svc)
613 return NULL;
614 dest = ip_vs_lookup_dest(svc, daddr, dport);
615 if (dest)
616 atomic_inc(&dest->refcnt);
617 ip_vs_service_put(svc);
618 return dest;
619}
1da177e4
LT
620
621/*
622 * Lookup dest by {svc,addr,port} in the destination trash.
623 * The destination trash is used to hold the destinations that are removed
624 * from the service table but are still referenced by some conn entries.
625 * The reason to add the destination trash is when the dest is temporary
626 * down (either by administrator or by monitor program), the dest can be
627 * picked back from the trash, the remaining connections to the dest can
628 * continue, and the counting information of the dest is also useful for
629 * scheduling.
630 */
631static struct ip_vs_dest *
014d730d 632ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport)
1da177e4
LT
633{
634 struct ip_vs_dest *dest, *nxt;
635
636 /*
637 * Find the destination in trash
638 */
639 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
640 IP_VS_DBG(3, "Destination %u/%u.%u.%u.%u:%u still in trash, "
4b5bdf5c 641 "dest->refcnt=%d\n",
1da177e4 642 dest->vfwmark,
e7ade46a 643 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4 644 atomic_read(&dest->refcnt));
e7ade46a 645 if (dest->addr.ip == daddr &&
1da177e4
LT
646 dest->port == dport &&
647 dest->vfwmark == svc->fwmark &&
648 dest->protocol == svc->protocol &&
649 (svc->fwmark ||
e7ade46a 650 (dest->vaddr.ip == svc->addr.ip &&
1da177e4
LT
651 dest->vport == svc->port))) {
652 /* HIT */
653 return dest;
654 }
655
656 /*
657 * Try to purge the destination from trash if not referenced
658 */
659 if (atomic_read(&dest->refcnt) == 1) {
660 IP_VS_DBG(3, "Removing destination %u/%u.%u.%u.%u:%u "
661 "from trash\n",
662 dest->vfwmark,
e7ade46a 663 NIPQUAD(dest->addr.ip), ntohs(dest->port));
1da177e4
LT
664 list_del(&dest->n_list);
665 ip_vs_dst_reset(dest);
666 __ip_vs_unbind_svc(dest);
667 kfree(dest);
668 }
669 }
670
671 return NULL;
672}
673
674
675/*
676 * Clean up all the destinations in the trash
677 * Called by the ip_vs_control_cleanup()
678 *
679 * When the ip_vs_control_clearup is activated by ipvs module exit,
680 * the service tables must have been flushed and all the connections
681 * are expired, and the refcnt of each destination in the trash must
682 * be 1, so we simply release them here.
683 */
684static void ip_vs_trash_cleanup(void)
685{
686 struct ip_vs_dest *dest, *nxt;
687
688 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
689 list_del(&dest->n_list);
690 ip_vs_dst_reset(dest);
691 __ip_vs_unbind_svc(dest);
692 kfree(dest);
693 }
694}
695
696
697static void
698ip_vs_zero_stats(struct ip_vs_stats *stats)
699{
700 spin_lock_bh(&stats->lock);
e93615d0
SH
701
702 stats->conns = 0;
703 stats->inpkts = 0;
704 stats->outpkts = 0;
705 stats->inbytes = 0;
706 stats->outbytes = 0;
707
708 stats->cps = 0;
709 stats->inpps = 0;
710 stats->outpps = 0;
711 stats->inbps = 0;
712 stats->outbps = 0;
713
1da177e4 714 ip_vs_zero_estimator(stats);
e93615d0 715
3a14a313 716 spin_unlock_bh(&stats->lock);
1da177e4
LT
717}
718
719/*
720 * Update a destination in the given service
721 */
722static void
723__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 724 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
725{
726 int conn_flags;
727
728 /* set the weight and the flags */
729 atomic_set(&dest->weight, udest->weight);
730 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
731
732 /* check if local node and update the flags */
c860c6b1 733 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
1da177e4
LT
734 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
735 | IP_VS_CONN_F_LOCALNODE;
736 }
737
738 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
739 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
740 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
741 } else {
742 /*
743 * Put the real service in ip_vs_rtable if not present.
744 * For now only for NAT!
745 */
746 write_lock_bh(&__ip_vs_rs_lock);
747 ip_vs_rs_hash(dest);
748 write_unlock_bh(&__ip_vs_rs_lock);
749 }
750 atomic_set(&dest->conn_flags, conn_flags);
751
752 /* bind the service */
753 if (!dest->svc) {
754 __ip_vs_bind_svc(dest, svc);
755 } else {
756 if (dest->svc != svc) {
757 __ip_vs_unbind_svc(dest);
758 ip_vs_zero_stats(&dest->stats);
759 __ip_vs_bind_svc(dest, svc);
760 }
761 }
762
763 /* set the dest status flags */
764 dest->flags |= IP_VS_DEST_F_AVAILABLE;
765
766 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
767 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
768 dest->u_threshold = udest->u_threshold;
769 dest->l_threshold = udest->l_threshold;
770}
771
772
773/*
774 * Create a destination for the given service
775 */
776static int
c860c6b1 777ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
778 struct ip_vs_dest **dest_p)
779{
780 struct ip_vs_dest *dest;
781 unsigned atype;
782
783 EnterFunction(2);
784
c860c6b1 785 atype = inet_addr_type(&init_net, udest->addr.ip);
1da177e4
LT
786 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
787 return -EINVAL;
788
0da974f4 789 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
790 if (dest == NULL) {
791 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
792 return -ENOMEM;
793 }
1da177e4 794
c860c6b1 795 dest->af = svc->af;
1da177e4 796 dest->protocol = svc->protocol;
c860c6b1 797 dest->vaddr = svc->addr;
1da177e4
LT
798 dest->vport = svc->port;
799 dest->vfwmark = svc->fwmark;
c860c6b1 800 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
801 dest->port = udest->port;
802
803 atomic_set(&dest->activeconns, 0);
804 atomic_set(&dest->inactconns, 0);
805 atomic_set(&dest->persistconns, 0);
806 atomic_set(&dest->refcnt, 0);
807
808 INIT_LIST_HEAD(&dest->d_list);
809 spin_lock_init(&dest->dst_lock);
810 spin_lock_init(&dest->stats.lock);
811 __ip_vs_update_dest(svc, dest, udest);
812 ip_vs_new_estimator(&dest->stats);
813
814 *dest_p = dest;
815
816 LeaveFunction(2);
817 return 0;
818}
819
820
821/*
822 * Add a destination into an existing service
823 */
824static int
c860c6b1 825ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
826{
827 struct ip_vs_dest *dest;
c860c6b1 828 union nf_inet_addr daddr;
014d730d 829 __be16 dport = udest->port;
1da177e4
LT
830 int ret;
831
832 EnterFunction(2);
833
834 if (udest->weight < 0) {
835 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
836 return -ERANGE;
837 }
838
839 if (udest->l_threshold > udest->u_threshold) {
840 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
841 "upper threshold\n");
842 return -ERANGE;
843 }
844
c860c6b1
JV
845 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
846
1da177e4
LT
847 /*
848 * Check if the dest already exists in the list
849 */
c860c6b1 850 dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
1da177e4
LT
851 if (dest != NULL) {
852 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
853 return -EEXIST;
854 }
855
856 /*
857 * Check if the dest already exists in the trash and
858 * is from the same service
859 */
c860c6b1 860 dest = ip_vs_trash_get_dest(svc, daddr.ip, dport);
1da177e4
LT
861 if (dest != NULL) {
862 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 863 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
864 NIPQUAD(daddr), ntohs(dport),
865 atomic_read(&dest->refcnt),
866 dest->vfwmark,
e7ade46a 867 NIPQUAD(dest->vaddr.ip),
1da177e4
LT
868 ntohs(dest->vport));
869 __ip_vs_update_dest(svc, dest, udest);
870
871 /*
872 * Get the destination from the trash
873 */
874 list_del(&dest->n_list);
875
876 ip_vs_new_estimator(&dest->stats);
877
878 write_lock_bh(&__ip_vs_svc_lock);
879
880 /*
881 * Wait until all other svc users go away.
882 */
883 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
884
885 list_add(&dest->n_list, &svc->destinations);
886 svc->num_dests++;
887
888 /* call the update_service function of its scheduler */
82dfb6f3
SW
889 if (svc->scheduler->update_service)
890 svc->scheduler->update_service(svc);
1da177e4
LT
891
892 write_unlock_bh(&__ip_vs_svc_lock);
893 return 0;
894 }
895
896 /*
897 * Allocate and initialize the dest structure
898 */
899 ret = ip_vs_new_dest(svc, udest, &dest);
900 if (ret) {
901 return ret;
902 }
903
904 /*
905 * Add the dest entry into the list
906 */
907 atomic_inc(&dest->refcnt);
908
909 write_lock_bh(&__ip_vs_svc_lock);
910
911 /*
912 * Wait until all other svc users go away.
913 */
914 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
915
916 list_add(&dest->n_list, &svc->destinations);
917 svc->num_dests++;
918
919 /* call the update_service function of its scheduler */
82dfb6f3
SW
920 if (svc->scheduler->update_service)
921 svc->scheduler->update_service(svc);
1da177e4
LT
922
923 write_unlock_bh(&__ip_vs_svc_lock);
924
925 LeaveFunction(2);
926
927 return 0;
928}
929
930
931/*
932 * Edit a destination in the given service
933 */
934static int
c860c6b1 935ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
936{
937 struct ip_vs_dest *dest;
c860c6b1 938 union nf_inet_addr daddr;
014d730d 939 __be16 dport = udest->port;
1da177e4
LT
940
941 EnterFunction(2);
942
943 if (udest->weight < 0) {
944 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
945 return -ERANGE;
946 }
947
948 if (udest->l_threshold > udest->u_threshold) {
949 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
950 "upper threshold\n");
951 return -ERANGE;
952 }
953
c860c6b1
JV
954 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
955
1da177e4
LT
956 /*
957 * Lookup the destination list
958 */
c860c6b1 959 dest = ip_vs_lookup_dest(svc, daddr.ip, dport);
1da177e4
LT
960 if (dest == NULL) {
961 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
962 return -ENOENT;
963 }
964
965 __ip_vs_update_dest(svc, dest, udest);
966
967 write_lock_bh(&__ip_vs_svc_lock);
968
969 /* Wait until all other svc users go away */
cae7ca3d 970 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
971
972 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
973 if (svc->scheduler->update_service)
974 svc->scheduler->update_service(svc);
1da177e4
LT
975
976 write_unlock_bh(&__ip_vs_svc_lock);
977
978 LeaveFunction(2);
979
980 return 0;
981}
982
983
984/*
985 * Delete a destination (must be already unlinked from the service)
986 */
987static void __ip_vs_del_dest(struct ip_vs_dest *dest)
988{
989 ip_vs_kill_estimator(&dest->stats);
990
991 /*
992 * Remove it from the d-linked list with the real services.
993 */
994 write_lock_bh(&__ip_vs_rs_lock);
995 ip_vs_rs_unhash(dest);
996 write_unlock_bh(&__ip_vs_rs_lock);
997
998 /*
999 * Decrease the refcnt of the dest, and free the dest
1000 * if nobody refers to it (refcnt=0). Otherwise, throw
1001 * the destination into the trash.
1002 */
1003 if (atomic_dec_and_test(&dest->refcnt)) {
1004 ip_vs_dst_reset(dest);
1005 /* simply decrease svc->refcnt here, let the caller check
1006 and release the service if nobody refers to it.
1007 Only user context can release destination and service,
1008 and only one user context can update virtual service at a
1009 time, so the operation here is OK */
1010 atomic_dec(&dest->svc->refcnt);
1011 kfree(dest);
1012 } else {
4b5bdf5c
RN
1013 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1014 "dest->refcnt=%d\n",
e7ade46a 1015 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1016 atomic_read(&dest->refcnt));
1017 list_add(&dest->n_list, &ip_vs_dest_trash);
1018 atomic_inc(&dest->refcnt);
1019 }
1020}
1021
1022
1023/*
1024 * Unlink a destination from the given service
1025 */
1026static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1027 struct ip_vs_dest *dest,
1028 int svcupd)
1029{
1030 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1031
1032 /*
1033 * Remove it from the d-linked destination list.
1034 */
1035 list_del(&dest->n_list);
1036 svc->num_dests--;
82dfb6f3
SW
1037
1038 /*
1039 * Call the update_service function of its scheduler
1040 */
1041 if (svcupd && svc->scheduler->update_service)
1042 svc->scheduler->update_service(svc);
1da177e4
LT
1043}
1044
1045
1046/*
1047 * Delete a destination server in the given service
1048 */
1049static int
c860c6b1 1050ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1051{
1052 struct ip_vs_dest *dest;
014d730d 1053 __be16 dport = udest->port;
1da177e4
LT
1054
1055 EnterFunction(2);
1056
c860c6b1
JV
1057 dest = ip_vs_lookup_dest(svc, udest->addr.ip, dport);
1058
1da177e4
LT
1059 if (dest == NULL) {
1060 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1061 return -ENOENT;
1062 }
1063
1064 write_lock_bh(&__ip_vs_svc_lock);
1065
1066 /*
1067 * Wait until all other svc users go away.
1068 */
1069 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1070
1071 /*
1072 * Unlink dest from the service
1073 */
1074 __ip_vs_unlink_dest(svc, dest, 1);
1075
1076 write_unlock_bh(&__ip_vs_svc_lock);
1077
1078 /*
1079 * Delete the destination
1080 */
1081 __ip_vs_del_dest(dest);
1082
1083 LeaveFunction(2);
1084
1085 return 0;
1086}
1087
1088
1089/*
1090 * Add a service into the service hash table
1091 */
1092static int
c860c6b1
JV
1093ip_vs_add_service(struct ip_vs_service_user_kern *u,
1094 struct ip_vs_service **svc_p)
1da177e4
LT
1095{
1096 int ret = 0;
1097 struct ip_vs_scheduler *sched = NULL;
1098 struct ip_vs_service *svc = NULL;
1099
1100 /* increase the module use count */
1101 ip_vs_use_count_inc();
1102
1103 /* Lookup the scheduler by 'u->sched_name' */
1104 sched = ip_vs_scheduler_get(u->sched_name);
1105 if (sched == NULL) {
1106 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1107 u->sched_name);
1108 ret = -ENOENT;
1109 goto out_mod_dec;
1110 }
1111
0da974f4 1112 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1113 if (svc == NULL) {
1114 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1115 ret = -ENOMEM;
1116 goto out_err;
1117 }
1da177e4
LT
1118
1119 /* I'm the first user of the service */
1120 atomic_set(&svc->usecnt, 1);
1121 atomic_set(&svc->refcnt, 0);
1122
c860c6b1 1123 svc->af = u->af;
1da177e4 1124 svc->protocol = u->protocol;
c860c6b1 1125 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1126 svc->port = u->port;
1127 svc->fwmark = u->fwmark;
1128 svc->flags = u->flags;
1129 svc->timeout = u->timeout * HZ;
1130 svc->netmask = u->netmask;
1131
1132 INIT_LIST_HEAD(&svc->destinations);
1133 rwlock_init(&svc->sched_lock);
1134 spin_lock_init(&svc->stats.lock);
1135
1136 /* Bind the scheduler */
1137 ret = ip_vs_bind_scheduler(svc, sched);
1138 if (ret)
1139 goto out_err;
1140 sched = NULL;
1141
1142 /* Update the virtual service counters */
1143 if (svc->port == FTPPORT)
1144 atomic_inc(&ip_vs_ftpsvc_counter);
1145 else if (svc->port == 0)
1146 atomic_inc(&ip_vs_nullsvc_counter);
1147
1148 ip_vs_new_estimator(&svc->stats);
1149 ip_vs_num_services++;
1150
1151 /* Hash the service into the service table */
1152 write_lock_bh(&__ip_vs_svc_lock);
1153 ip_vs_svc_hash(svc);
1154 write_unlock_bh(&__ip_vs_svc_lock);
1155
1156 *svc_p = svc;
1157 return 0;
1158
1159 out_err:
1160 if (svc != NULL) {
1161 if (svc->scheduler)
1162 ip_vs_unbind_scheduler(svc);
1163 if (svc->inc) {
1164 local_bh_disable();
1165 ip_vs_app_inc_put(svc->inc);
1166 local_bh_enable();
1167 }
1168 kfree(svc);
1169 }
1170 ip_vs_scheduler_put(sched);
1171
1172 out_mod_dec:
1173 /* decrease the module use count */
1174 ip_vs_use_count_dec();
1175
1176 return ret;
1177}
1178
1179
1180/*
1181 * Edit a service and bind it with a new scheduler
1182 */
1183static int
c860c6b1 1184ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1185{
1186 struct ip_vs_scheduler *sched, *old_sched;
1187 int ret = 0;
1188
1189 /*
1190 * Lookup the scheduler, by 'u->sched_name'
1191 */
1192 sched = ip_vs_scheduler_get(u->sched_name);
1193 if (sched == NULL) {
1194 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1195 u->sched_name);
1196 return -ENOENT;
1197 }
1198 old_sched = sched;
1199
1200 write_lock_bh(&__ip_vs_svc_lock);
1201
1202 /*
1203 * Wait until all other svc users go away.
1204 */
1205 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1206
1207 /*
1208 * Set the flags and timeout value
1209 */
1210 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1211 svc->timeout = u->timeout * HZ;
1212 svc->netmask = u->netmask;
1213
1214 old_sched = svc->scheduler;
1215 if (sched != old_sched) {
1216 /*
1217 * Unbind the old scheduler
1218 */
1219 if ((ret = ip_vs_unbind_scheduler(svc))) {
1220 old_sched = sched;
1221 goto out;
1222 }
1223
1224 /*
1225 * Bind the new scheduler
1226 */
1227 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1228 /*
1229 * If ip_vs_bind_scheduler fails, restore the old
1230 * scheduler.
1231 * The main reason of failure is out of memory.
1232 *
1233 * The question is if the old scheduler can be
1234 * restored all the time. TODO: if it cannot be
1235 * restored some time, we must delete the service,
1236 * otherwise the system may crash.
1237 */
1238 ip_vs_bind_scheduler(svc, old_sched);
1239 old_sched = sched;
1240 goto out;
1241 }
1242 }
1243
1244 out:
1245 write_unlock_bh(&__ip_vs_svc_lock);
1246
1247 if (old_sched)
1248 ip_vs_scheduler_put(old_sched);
1249
1250 return ret;
1251}
1252
1253
1254/*
1255 * Delete a service from the service list
1256 * - The service must be unlinked, unlocked and not referenced!
1257 * - We are called under _bh lock
1258 */
1259static void __ip_vs_del_service(struct ip_vs_service *svc)
1260{
1261 struct ip_vs_dest *dest, *nxt;
1262 struct ip_vs_scheduler *old_sched;
1263
1264 ip_vs_num_services--;
1265 ip_vs_kill_estimator(&svc->stats);
1266
1267 /* Unbind scheduler */
1268 old_sched = svc->scheduler;
1269 ip_vs_unbind_scheduler(svc);
1270 if (old_sched)
1271 ip_vs_scheduler_put(old_sched);
1272
1273 /* Unbind app inc */
1274 if (svc->inc) {
1275 ip_vs_app_inc_put(svc->inc);
1276 svc->inc = NULL;
1277 }
1278
1279 /*
1280 * Unlink the whole destination list
1281 */
1282 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1283 __ip_vs_unlink_dest(svc, dest, 0);
1284 __ip_vs_del_dest(dest);
1285 }
1286
1287 /*
1288 * Update the virtual service counters
1289 */
1290 if (svc->port == FTPPORT)
1291 atomic_dec(&ip_vs_ftpsvc_counter);
1292 else if (svc->port == 0)
1293 atomic_dec(&ip_vs_nullsvc_counter);
1294
1295 /*
1296 * Free the service if nobody refers to it
1297 */
1298 if (atomic_read(&svc->refcnt) == 0)
1299 kfree(svc);
1300
1301 /* decrease the module use count */
1302 ip_vs_use_count_dec();
1303}
1304
1305/*
1306 * Delete a service from the service list
1307 */
1308static int ip_vs_del_service(struct ip_vs_service *svc)
1309{
1310 if (svc == NULL)
1311 return -EEXIST;
1312
1313 /*
1314 * Unhash it from the service table
1315 */
1316 write_lock_bh(&__ip_vs_svc_lock);
1317
1318 ip_vs_svc_unhash(svc);
1319
1320 /*
1321 * Wait until all the svc users go away.
1322 */
1323 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1324
1325 __ip_vs_del_service(svc);
1326
1327 write_unlock_bh(&__ip_vs_svc_lock);
1328
1329 return 0;
1330}
1331
1332
1333/*
1334 * Flush all the virtual services
1335 */
1336static int ip_vs_flush(void)
1337{
1338 int idx;
1339 struct ip_vs_service *svc, *nxt;
1340
1341 /*
1342 * Flush the service table hashed by <protocol,addr,port>
1343 */
1344 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1345 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1346 write_lock_bh(&__ip_vs_svc_lock);
1347 ip_vs_svc_unhash(svc);
1348 /*
1349 * Wait until all the svc users go away.
1350 */
1351 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1352 __ip_vs_del_service(svc);
1353 write_unlock_bh(&__ip_vs_svc_lock);
1354 }
1355 }
1356
1357 /*
1358 * Flush the service table hashed by fwmark
1359 */
1360 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1361 list_for_each_entry_safe(svc, nxt,
1362 &ip_vs_svc_fwm_table[idx], f_list) {
1363 write_lock_bh(&__ip_vs_svc_lock);
1364 ip_vs_svc_unhash(svc);
1365 /*
1366 * Wait until all the svc users go away.
1367 */
1368 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1369 __ip_vs_del_service(svc);
1370 write_unlock_bh(&__ip_vs_svc_lock);
1371 }
1372 }
1373
1374 return 0;
1375}
1376
1377
1378/*
1379 * Zero counters in a service or all services
1380 */
1381static int ip_vs_zero_service(struct ip_vs_service *svc)
1382{
1383 struct ip_vs_dest *dest;
1384
1385 write_lock_bh(&__ip_vs_svc_lock);
1386 list_for_each_entry(dest, &svc->destinations, n_list) {
1387 ip_vs_zero_stats(&dest->stats);
1388 }
1389 ip_vs_zero_stats(&svc->stats);
1390 write_unlock_bh(&__ip_vs_svc_lock);
1391 return 0;
1392}
1393
1394static int ip_vs_zero_all(void)
1395{
1396 int idx;
1397 struct ip_vs_service *svc;
1398
1399 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1400 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1401 ip_vs_zero_service(svc);
1402 }
1403 }
1404
1405 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1406 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1407 ip_vs_zero_service(svc);
1408 }
1409 }
1410
1411 ip_vs_zero_stats(&ip_vs_stats);
1412 return 0;
1413}
1414
1415
1416static int
1417proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1418 void __user *buffer, size_t *lenp, loff_t *ppos)
1419{
1420 int *valp = table->data;
1421 int val = *valp;
1422 int rc;
1423
1424 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1425 if (write && (*valp != val)) {
1426 if ((*valp < 0) || (*valp > 3)) {
1427 /* Restore the correct value */
1428 *valp = val;
1429 } else {
1da177e4 1430 update_defense_level();
1da177e4
LT
1431 }
1432 }
1433 return rc;
1434}
1435
1436
1437static int
1438proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1439 void __user *buffer, size_t *lenp, loff_t *ppos)
1440{
1441 int *valp = table->data;
1442 int val[2];
1443 int rc;
1444
1445 /* backup the value first */
1446 memcpy(val, valp, sizeof(val));
1447
1448 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1449 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1450 /* Restore the correct value */
1451 memcpy(valp, val, sizeof(val));
1452 }
1453 return rc;
1454}
1455
1456
1457/*
1458 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1459 */
1460
1461static struct ctl_table vs_vars[] = {
1462 {
1da177e4
LT
1463 .procname = "amemthresh",
1464 .data = &sysctl_ip_vs_amemthresh,
1465 .maxlen = sizeof(int),
1466 .mode = 0644,
1467 .proc_handler = &proc_dointvec,
1468 },
1469#ifdef CONFIG_IP_VS_DEBUG
1470 {
1da177e4
LT
1471 .procname = "debug_level",
1472 .data = &sysctl_ip_vs_debug_level,
1473 .maxlen = sizeof(int),
1474 .mode = 0644,
1475 .proc_handler = &proc_dointvec,
1476 },
1477#endif
1478 {
1da177e4
LT
1479 .procname = "am_droprate",
1480 .data = &sysctl_ip_vs_am_droprate,
1481 .maxlen = sizeof(int),
1482 .mode = 0644,
1483 .proc_handler = &proc_dointvec,
1484 },
1485 {
1da177e4
LT
1486 .procname = "drop_entry",
1487 .data = &sysctl_ip_vs_drop_entry,
1488 .maxlen = sizeof(int),
1489 .mode = 0644,
1490 .proc_handler = &proc_do_defense_mode,
1491 },
1492 {
1da177e4
LT
1493 .procname = "drop_packet",
1494 .data = &sysctl_ip_vs_drop_packet,
1495 .maxlen = sizeof(int),
1496 .mode = 0644,
1497 .proc_handler = &proc_do_defense_mode,
1498 },
1499 {
1da177e4
LT
1500 .procname = "secure_tcp",
1501 .data = &sysctl_ip_vs_secure_tcp,
1502 .maxlen = sizeof(int),
1503 .mode = 0644,
1504 .proc_handler = &proc_do_defense_mode,
1505 },
1506#if 0
1507 {
1da177e4
LT
1508 .procname = "timeout_established",
1509 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1510 .maxlen = sizeof(int),
1511 .mode = 0644,
1512 .proc_handler = &proc_dointvec_jiffies,
1513 },
1514 {
1da177e4
LT
1515 .procname = "timeout_synsent",
1516 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1517 .maxlen = sizeof(int),
1518 .mode = 0644,
1519 .proc_handler = &proc_dointvec_jiffies,
1520 },
1521 {
1da177e4
LT
1522 .procname = "timeout_synrecv",
1523 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1524 .maxlen = sizeof(int),
1525 .mode = 0644,
1526 .proc_handler = &proc_dointvec_jiffies,
1527 },
1528 {
1da177e4
LT
1529 .procname = "timeout_finwait",
1530 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1531 .maxlen = sizeof(int),
1532 .mode = 0644,
1533 .proc_handler = &proc_dointvec_jiffies,
1534 },
1535 {
1da177e4
LT
1536 .procname = "timeout_timewait",
1537 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1538 .maxlen = sizeof(int),
1539 .mode = 0644,
1540 .proc_handler = &proc_dointvec_jiffies,
1541 },
1542 {
1da177e4
LT
1543 .procname = "timeout_close",
1544 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1545 .maxlen = sizeof(int),
1546 .mode = 0644,
1547 .proc_handler = &proc_dointvec_jiffies,
1548 },
1549 {
1da177e4
LT
1550 .procname = "timeout_closewait",
1551 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1552 .maxlen = sizeof(int),
1553 .mode = 0644,
1554 .proc_handler = &proc_dointvec_jiffies,
1555 },
1556 {
1da177e4
LT
1557 .procname = "timeout_lastack",
1558 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1559 .maxlen = sizeof(int),
1560 .mode = 0644,
1561 .proc_handler = &proc_dointvec_jiffies,
1562 },
1563 {
1da177e4
LT
1564 .procname = "timeout_listen",
1565 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1566 .maxlen = sizeof(int),
1567 .mode = 0644,
1568 .proc_handler = &proc_dointvec_jiffies,
1569 },
1570 {
1da177e4
LT
1571 .procname = "timeout_synack",
1572 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1573 .maxlen = sizeof(int),
1574 .mode = 0644,
1575 .proc_handler = &proc_dointvec_jiffies,
1576 },
1577 {
1da177e4
LT
1578 .procname = "timeout_udp",
1579 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1580 .maxlen = sizeof(int),
1581 .mode = 0644,
1582 .proc_handler = &proc_dointvec_jiffies,
1583 },
1584 {
1da177e4
LT
1585 .procname = "timeout_icmp",
1586 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1587 .maxlen = sizeof(int),
1588 .mode = 0644,
1589 .proc_handler = &proc_dointvec_jiffies,
1590 },
1591#endif
1592 {
1da177e4
LT
1593 .procname = "cache_bypass",
1594 .data = &sysctl_ip_vs_cache_bypass,
1595 .maxlen = sizeof(int),
1596 .mode = 0644,
1597 .proc_handler = &proc_dointvec,
1598 },
1599 {
1da177e4
LT
1600 .procname = "expire_nodest_conn",
1601 .data = &sysctl_ip_vs_expire_nodest_conn,
1602 .maxlen = sizeof(int),
1603 .mode = 0644,
1604 .proc_handler = &proc_dointvec,
1605 },
1606 {
1da177e4
LT
1607 .procname = "expire_quiescent_template",
1608 .data = &sysctl_ip_vs_expire_quiescent_template,
1609 .maxlen = sizeof(int),
1610 .mode = 0644,
1611 .proc_handler = &proc_dointvec,
1612 },
1613 {
1da177e4
LT
1614 .procname = "sync_threshold",
1615 .data = &sysctl_ip_vs_sync_threshold,
1616 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1617 .mode = 0644,
1618 .proc_handler = &proc_do_sync_threshold,
1619 },
1620 {
1da177e4
LT
1621 .procname = "nat_icmp_send",
1622 .data = &sysctl_ip_vs_nat_icmp_send,
1623 .maxlen = sizeof(int),
1624 .mode = 0644,
1625 .proc_handler = &proc_dointvec,
1626 },
1627 { .ctl_name = 0 }
1628};
1629
5587da55 1630const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1631 { .procname = "net", .ctl_name = CTL_NET, },
1632 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1633 { .procname = "vs", },
1634 { }
1da177e4 1635};
90754f8e 1636EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1637
1638static struct ctl_table_header * sysctl_header;
1639
1640#ifdef CONFIG_PROC_FS
1641
1642struct ip_vs_iter {
1643 struct list_head *table;
1644 int bucket;
1645};
1646
1647/*
1648 * Write the contents of the VS rule table to a PROCfs file.
1649 * (It is kept just for backward compatibility)
1650 */
1651static inline const char *ip_vs_fwd_name(unsigned flags)
1652{
1653 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1654 case IP_VS_CONN_F_LOCALNODE:
1655 return "Local";
1656 case IP_VS_CONN_F_TUNNEL:
1657 return "Tunnel";
1658 case IP_VS_CONN_F_DROUTE:
1659 return "Route";
1660 default:
1661 return "Masq";
1662 }
1663}
1664
1665
1666/* Get the Nth entry in the two lists */
1667static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1668{
1669 struct ip_vs_iter *iter = seq->private;
1670 int idx;
1671 struct ip_vs_service *svc;
1672
1673 /* look in hash by protocol */
1674 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1675 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1676 if (pos-- == 0){
1677 iter->table = ip_vs_svc_table;
1678 iter->bucket = idx;
1679 return svc;
1680 }
1681 }
1682 }
1683
1684 /* keep looking in fwmark */
1685 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1686 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1687 if (pos-- == 0) {
1688 iter->table = ip_vs_svc_fwm_table;
1689 iter->bucket = idx;
1690 return svc;
1691 }
1692 }
1693 }
1694
1695 return NULL;
1696}
1697
1698static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1699{
1700
1701 read_lock_bh(&__ip_vs_svc_lock);
1702 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1703}
1704
1705
1706static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1707{
1708 struct list_head *e;
1709 struct ip_vs_iter *iter;
1710 struct ip_vs_service *svc;
1711
1712 ++*pos;
1713 if (v == SEQ_START_TOKEN)
1714 return ip_vs_info_array(seq,0);
1715
1716 svc = v;
1717 iter = seq->private;
1718
1719 if (iter->table == ip_vs_svc_table) {
1720 /* next service in table hashed by protocol */
1721 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1722 return list_entry(e, struct ip_vs_service, s_list);
1723
1724
1725 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1726 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1727 s_list) {
1728 return svc;
1729 }
1730 }
1731
1732 iter->table = ip_vs_svc_fwm_table;
1733 iter->bucket = -1;
1734 goto scan_fwmark;
1735 }
1736
1737 /* next service in hashed by fwmark */
1738 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1739 return list_entry(e, struct ip_vs_service, f_list);
1740
1741 scan_fwmark:
1742 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1743 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1744 f_list)
1745 return svc;
1746 }
1747
1748 return NULL;
1749}
1750
1751static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1752{
1753 read_unlock_bh(&__ip_vs_svc_lock);
1754}
1755
1756
1757static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1758{
1759 if (v == SEQ_START_TOKEN) {
1760 seq_printf(seq,
1761 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1762 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1763 seq_puts(seq,
1764 "Prot LocalAddress:Port Scheduler Flags\n");
1765 seq_puts(seq,
1766 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1767 } else {
1768 const struct ip_vs_service *svc = v;
1769 const struct ip_vs_iter *iter = seq->private;
1770 const struct ip_vs_dest *dest;
1771
1772 if (iter->table == ip_vs_svc_table)
1773 seq_printf(seq, "%s %08X:%04X %s ",
1774 ip_vs_proto_name(svc->protocol),
e7ade46a 1775 ntohl(svc->addr.ip),
1da177e4
LT
1776 ntohs(svc->port),
1777 svc->scheduler->name);
1778 else
1779 seq_printf(seq, "FWM %08X %s ",
1780 svc->fwmark, svc->scheduler->name);
1781
1782 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1783 seq_printf(seq, "persistent %d %08X\n",
1784 svc->timeout,
1785 ntohl(svc->netmask));
1786 else
1787 seq_putc(seq, '\n');
1788
1789 list_for_each_entry(dest, &svc->destinations, n_list) {
1790 seq_printf(seq,
1791 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
e7ade46a 1792 ntohl(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1793 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1794 atomic_read(&dest->weight),
1795 atomic_read(&dest->activeconns),
1796 atomic_read(&dest->inactconns));
1797 }
1798 }
1799 return 0;
1800}
1801
56b3d975 1802static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1803 .start = ip_vs_info_seq_start,
1804 .next = ip_vs_info_seq_next,
1805 .stop = ip_vs_info_seq_stop,
1806 .show = ip_vs_info_seq_show,
1807};
1808
1809static int ip_vs_info_open(struct inode *inode, struct file *file)
1810{
cf7732e4
PE
1811 return seq_open_private(file, &ip_vs_info_seq_ops,
1812 sizeof(struct ip_vs_iter));
1da177e4
LT
1813}
1814
9a32144e 1815static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1816 .owner = THIS_MODULE,
1817 .open = ip_vs_info_open,
1818 .read = seq_read,
1819 .llseek = seq_lseek,
1820 .release = seq_release_private,
1821};
1822
1823#endif
1824
519e49e8
SW
1825struct ip_vs_stats ip_vs_stats = {
1826 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1827};
1da177e4
LT
1828
1829#ifdef CONFIG_PROC_FS
1830static int ip_vs_stats_show(struct seq_file *seq, void *v)
1831{
1832
1833/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1834 seq_puts(seq,
1835 " Total Incoming Outgoing Incoming Outgoing\n");
1836 seq_printf(seq,
1837 " Conns Packets Packets Bytes Bytes\n");
1838
1839 spin_lock_bh(&ip_vs_stats.lock);
1840 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1841 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1842 (unsigned long long) ip_vs_stats.inbytes,
1843 (unsigned long long) ip_vs_stats.outbytes);
1844
1845/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1846 seq_puts(seq,
1847 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1848 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1849 ip_vs_stats.cps,
1850 ip_vs_stats.inpps,
1851 ip_vs_stats.outpps,
1852 ip_vs_stats.inbps,
1853 ip_vs_stats.outbps);
1854 spin_unlock_bh(&ip_vs_stats.lock);
1855
1856 return 0;
1857}
1858
1859static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1860{
1861 return single_open(file, ip_vs_stats_show, NULL);
1862}
1863
9a32144e 1864static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1865 .owner = THIS_MODULE,
1866 .open = ip_vs_stats_seq_open,
1867 .read = seq_read,
1868 .llseek = seq_lseek,
1869 .release = single_release,
1870};
1871
1872#endif
1873
1874/*
1875 * Set timeout values for tcp tcpfin udp in the timeout_table.
1876 */
1877static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1878{
1879 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1880 u->tcp_timeout,
1881 u->tcp_fin_timeout,
1882 u->udp_timeout);
1883
1884#ifdef CONFIG_IP_VS_PROTO_TCP
1885 if (u->tcp_timeout) {
1886 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1887 = u->tcp_timeout * HZ;
1888 }
1889
1890 if (u->tcp_fin_timeout) {
1891 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1892 = u->tcp_fin_timeout * HZ;
1893 }
1894#endif
1895
1896#ifdef CONFIG_IP_VS_PROTO_UDP
1897 if (u->udp_timeout) {
1898 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1899 = u->udp_timeout * HZ;
1900 }
1901#endif
1902 return 0;
1903}
1904
1905
1906#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1907#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1908#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1909 sizeof(struct ip_vs_dest_user))
1910#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1911#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1912#define MAX_ARG_LEN SVCDEST_ARG_LEN
1913
9b5b5cff 1914static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1915 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1916 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1917 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1918 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1919 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1920 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1921 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1922 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1923 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1924 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1925 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1926};
1927
c860c6b1
JV
1928static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
1929 struct ip_vs_service_user *usvc_compat)
1930{
1931 usvc->af = AF_INET;
1932 usvc->protocol = usvc_compat->protocol;
1933 usvc->addr.ip = usvc_compat->addr;
1934 usvc->port = usvc_compat->port;
1935 usvc->fwmark = usvc_compat->fwmark;
1936
1937 /* Deep copy of sched_name is not needed here */
1938 usvc->sched_name = usvc_compat->sched_name;
1939
1940 usvc->flags = usvc_compat->flags;
1941 usvc->timeout = usvc_compat->timeout;
1942 usvc->netmask = usvc_compat->netmask;
1943}
1944
1945static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
1946 struct ip_vs_dest_user *udest_compat)
1947{
1948 udest->addr.ip = udest_compat->addr;
1949 udest->port = udest_compat->port;
1950 udest->conn_flags = udest_compat->conn_flags;
1951 udest->weight = udest_compat->weight;
1952 udest->u_threshold = udest_compat->u_threshold;
1953 udest->l_threshold = udest_compat->l_threshold;
1954}
1955
1da177e4
LT
1956static int
1957do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1958{
1959 int ret;
1960 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
1961 struct ip_vs_service_user *usvc_compat;
1962 struct ip_vs_service_user_kern usvc;
1da177e4 1963 struct ip_vs_service *svc;
c860c6b1
JV
1964 struct ip_vs_dest_user *udest_compat;
1965 struct ip_vs_dest_user_kern udest;
1da177e4
LT
1966
1967 if (!capable(CAP_NET_ADMIN))
1968 return -EPERM;
1969
1970 if (len != set_arglen[SET_CMDID(cmd)]) {
1971 IP_VS_ERR("set_ctl: len %u != %u\n",
1972 len, set_arglen[SET_CMDID(cmd)]);
1973 return -EINVAL;
1974 }
1975
1976 if (copy_from_user(arg, user, len) != 0)
1977 return -EFAULT;
1978
1979 /* increase the module use count */
1980 ip_vs_use_count_inc();
1981
14cc3e2b 1982 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
1983 ret = -ERESTARTSYS;
1984 goto out_dec;
1985 }
1986
1987 if (cmd == IP_VS_SO_SET_FLUSH) {
1988 /* Flush the virtual service */
1989 ret = ip_vs_flush();
1990 goto out_unlock;
1991 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
1992 /* Set timeout values for (tcp tcpfin udp) */
1993 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
1994 goto out_unlock;
1995 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
1996 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
1997 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
1998 goto out_unlock;
1999 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2000 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2001 ret = stop_sync_thread(dm->state);
2002 goto out_unlock;
2003 }
2004
c860c6b1
JV
2005 usvc_compat = (struct ip_vs_service_user *)arg;
2006 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2007
2008 /* We only use the new structs internally, so copy userspace compat
2009 * structs to extended internal versions */
2010 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2011 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2012
2013 if (cmd == IP_VS_SO_SET_ZERO) {
2014 /* if no service address is set, zero counters in all */
c860c6b1 2015 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2016 ret = ip_vs_zero_all();
2017 goto out_unlock;
2018 }
2019 }
2020
2021 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2022 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2023 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2024 usvc.protocol, NIPQUAD(usvc.addr.ip),
2025 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2026 ret = -EFAULT;
2027 goto out_unlock;
2028 }
2029
2030 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2031 if (usvc.fwmark == 0)
b18610de
JV
2032 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2033 &usvc.addr, usvc.port);
1da177e4 2034 else
b18610de 2035 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2036
2037 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2038 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2039 ret = -ESRCH;
2040 goto out_unlock;
2041 }
2042
2043 switch (cmd) {
2044 case IP_VS_SO_SET_ADD:
2045 if (svc != NULL)
2046 ret = -EEXIST;
2047 else
c860c6b1 2048 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2049 break;
2050 case IP_VS_SO_SET_EDIT:
c860c6b1 2051 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2052 break;
2053 case IP_VS_SO_SET_DEL:
2054 ret = ip_vs_del_service(svc);
2055 if (!ret)
2056 goto out_unlock;
2057 break;
2058 case IP_VS_SO_SET_ZERO:
2059 ret = ip_vs_zero_service(svc);
2060 break;
2061 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2062 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2063 break;
2064 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2065 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2066 break;
2067 case IP_VS_SO_SET_DELDEST:
c860c6b1 2068 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2069 break;
2070 default:
2071 ret = -EINVAL;
2072 }
2073
2074 if (svc)
2075 ip_vs_service_put(svc);
2076
2077 out_unlock:
14cc3e2b 2078 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2079 out_dec:
2080 /* decrease the module use count */
2081 ip_vs_use_count_dec();
2082
2083 return ret;
2084}
2085
2086
2087static void
2088ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2089{
2090 spin_lock_bh(&src->lock);
2091 memcpy(dst, src, (char*)&src->lock - (char*)src);
2092 spin_unlock_bh(&src->lock);
2093}
2094
2095static void
2096ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2097{
2098 dst->protocol = src->protocol;
e7ade46a 2099 dst->addr = src->addr.ip;
1da177e4
LT
2100 dst->port = src->port;
2101 dst->fwmark = src->fwmark;
4da62fc7 2102 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2103 dst->flags = src->flags;
2104 dst->timeout = src->timeout / HZ;
2105 dst->netmask = src->netmask;
2106 dst->num_dests = src->num_dests;
2107 ip_vs_copy_stats(&dst->stats, &src->stats);
2108}
2109
2110static inline int
2111__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2112 struct ip_vs_get_services __user *uptr)
2113{
2114 int idx, count=0;
2115 struct ip_vs_service *svc;
2116 struct ip_vs_service_entry entry;
2117 int ret = 0;
2118
2119 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2120 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2121 if (count >= get->num_services)
2122 goto out;
4da62fc7 2123 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2124 ip_vs_copy_service(&entry, svc);
2125 if (copy_to_user(&uptr->entrytable[count],
2126 &entry, sizeof(entry))) {
2127 ret = -EFAULT;
2128 goto out;
2129 }
2130 count++;
2131 }
2132 }
2133
2134 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2135 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2136 if (count >= get->num_services)
2137 goto out;
4da62fc7 2138 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2139 ip_vs_copy_service(&entry, svc);
2140 if (copy_to_user(&uptr->entrytable[count],
2141 &entry, sizeof(entry))) {
2142 ret = -EFAULT;
2143 goto out;
2144 }
2145 count++;
2146 }
2147 }
2148 out:
2149 return ret;
2150}
2151
2152static inline int
2153__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2154 struct ip_vs_get_dests __user *uptr)
2155{
2156 struct ip_vs_service *svc;
b18610de 2157 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2158 int ret = 0;
2159
2160 if (get->fwmark)
b18610de 2161 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2162 else
b18610de
JV
2163 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2164 get->port);
2165
1da177e4
LT
2166 if (svc) {
2167 int count = 0;
2168 struct ip_vs_dest *dest;
2169 struct ip_vs_dest_entry entry;
2170
2171 list_for_each_entry(dest, &svc->destinations, n_list) {
2172 if (count >= get->num_dests)
2173 break;
2174
e7ade46a 2175 entry.addr = dest->addr.ip;
1da177e4
LT
2176 entry.port = dest->port;
2177 entry.conn_flags = atomic_read(&dest->conn_flags);
2178 entry.weight = atomic_read(&dest->weight);
2179 entry.u_threshold = dest->u_threshold;
2180 entry.l_threshold = dest->l_threshold;
2181 entry.activeconns = atomic_read(&dest->activeconns);
2182 entry.inactconns = atomic_read(&dest->inactconns);
2183 entry.persistconns = atomic_read(&dest->persistconns);
2184 ip_vs_copy_stats(&entry.stats, &dest->stats);
2185 if (copy_to_user(&uptr->entrytable[count],
2186 &entry, sizeof(entry))) {
2187 ret = -EFAULT;
2188 break;
2189 }
2190 count++;
2191 }
2192 ip_vs_service_put(svc);
2193 } else
2194 ret = -ESRCH;
2195 return ret;
2196}
2197
2198static inline void
2199__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2200{
2201#ifdef CONFIG_IP_VS_PROTO_TCP
2202 u->tcp_timeout =
2203 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2204 u->tcp_fin_timeout =
2205 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2206#endif
2207#ifdef CONFIG_IP_VS_PROTO_UDP
2208 u->udp_timeout =
2209 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2210#endif
2211}
2212
2213
2214#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2215#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2216#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2217#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2218#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2219#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2220#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2221
9b5b5cff 2222static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2223 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2224 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2225 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2226 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2227 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2228 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2229 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2230};
2231
2232static int
2233do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2234{
2235 unsigned char arg[128];
2236 int ret = 0;
2237
2238 if (!capable(CAP_NET_ADMIN))
2239 return -EPERM;
2240
2241 if (*len < get_arglen[GET_CMDID(cmd)]) {
2242 IP_VS_ERR("get_ctl: len %u < %u\n",
2243 *len, get_arglen[GET_CMDID(cmd)]);
2244 return -EINVAL;
2245 }
2246
2247 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2248 return -EFAULT;
2249
14cc3e2b 2250 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2251 return -ERESTARTSYS;
2252
2253 switch (cmd) {
2254 case IP_VS_SO_GET_VERSION:
2255 {
2256 char buf[64];
2257
2258 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2259 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2260 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2261 ret = -EFAULT;
2262 goto out;
2263 }
2264 *len = strlen(buf)+1;
2265 }
2266 break;
2267
2268 case IP_VS_SO_GET_INFO:
2269 {
2270 struct ip_vs_getinfo info;
2271 info.version = IP_VS_VERSION_CODE;
2272 info.size = IP_VS_CONN_TAB_SIZE;
2273 info.num_services = ip_vs_num_services;
2274 if (copy_to_user(user, &info, sizeof(info)) != 0)
2275 ret = -EFAULT;
2276 }
2277 break;
2278
2279 case IP_VS_SO_GET_SERVICES:
2280 {
2281 struct ip_vs_get_services *get;
2282 int size;
2283
2284 get = (struct ip_vs_get_services *)arg;
2285 size = sizeof(*get) +
2286 sizeof(struct ip_vs_service_entry) * get->num_services;
2287 if (*len != size) {
2288 IP_VS_ERR("length: %u != %u\n", *len, size);
2289 ret = -EINVAL;
2290 goto out;
2291 }
2292 ret = __ip_vs_get_service_entries(get, user);
2293 }
2294 break;
2295
2296 case IP_VS_SO_GET_SERVICE:
2297 {
2298 struct ip_vs_service_entry *entry;
2299 struct ip_vs_service *svc;
b18610de 2300 union nf_inet_addr addr;
1da177e4
LT
2301
2302 entry = (struct ip_vs_service_entry *)arg;
b18610de 2303 addr.ip = entry->addr;
1da177e4 2304 if (entry->fwmark)
b18610de 2305 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2306 else
b18610de
JV
2307 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2308 &addr, entry->port);
1da177e4
LT
2309 if (svc) {
2310 ip_vs_copy_service(entry, svc);
2311 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2312 ret = -EFAULT;
2313 ip_vs_service_put(svc);
2314 } else
2315 ret = -ESRCH;
2316 }
2317 break;
2318
2319 case IP_VS_SO_GET_DESTS:
2320 {
2321 struct ip_vs_get_dests *get;
2322 int size;
2323
2324 get = (struct ip_vs_get_dests *)arg;
2325 size = sizeof(*get) +
2326 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2327 if (*len != size) {
2328 IP_VS_ERR("length: %u != %u\n", *len, size);
2329 ret = -EINVAL;
2330 goto out;
2331 }
2332 ret = __ip_vs_get_dest_entries(get, user);
2333 }
2334 break;
2335
2336 case IP_VS_SO_GET_TIMEOUT:
2337 {
2338 struct ip_vs_timeout_user t;
2339
2340 __ip_vs_get_timeouts(&t);
2341 if (copy_to_user(user, &t, sizeof(t)) != 0)
2342 ret = -EFAULT;
2343 }
2344 break;
2345
2346 case IP_VS_SO_GET_DAEMON:
2347 {
2348 struct ip_vs_daemon_user d[2];
2349
2350 memset(&d, 0, sizeof(d));
2351 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2352 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2353 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2354 d[0].syncid = ip_vs_master_syncid;
2355 }
2356 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2357 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2358 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2359 d[1].syncid = ip_vs_backup_syncid;
2360 }
2361 if (copy_to_user(user, &d, sizeof(d)) != 0)
2362 ret = -EFAULT;
2363 }
2364 break;
2365
2366 default:
2367 ret = -EINVAL;
2368 }
2369
2370 out:
14cc3e2b 2371 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2372 return ret;
2373}
2374
2375
2376static struct nf_sockopt_ops ip_vs_sockopts = {
2377 .pf = PF_INET,
2378 .set_optmin = IP_VS_BASE_CTL,
2379 .set_optmax = IP_VS_SO_SET_MAX+1,
2380 .set = do_ip_vs_set_ctl,
2381 .get_optmin = IP_VS_BASE_CTL,
2382 .get_optmax = IP_VS_SO_GET_MAX+1,
2383 .get = do_ip_vs_get_ctl,
16fcec35 2384 .owner = THIS_MODULE,
1da177e4
LT
2385};
2386
9a812198
JV
2387/*
2388 * Generic Netlink interface
2389 */
2390
2391/* IPVS genetlink family */
2392static struct genl_family ip_vs_genl_family = {
2393 .id = GENL_ID_GENERATE,
2394 .hdrsize = 0,
2395 .name = IPVS_GENL_NAME,
2396 .version = IPVS_GENL_VERSION,
2397 .maxattr = IPVS_CMD_MAX,
2398};
2399
2400/* Policy used for first-level command attributes */
2401static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2402 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2403 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2404 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2405 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2406 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2407 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2408};
2409
2410/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2411static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2412 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2413 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2414 .len = IP_VS_IFNAME_MAXLEN },
2415 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2416};
2417
2418/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2419static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2420 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2421 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2422 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2423 .len = sizeof(union nf_inet_addr) },
2424 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2425 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2426 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2427 .len = IP_VS_SCHEDNAME_MAXLEN },
2428 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2429 .len = sizeof(struct ip_vs_flags) },
2430 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2431 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2432 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2433};
2434
2435/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2436static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2437 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2438 .len = sizeof(union nf_inet_addr) },
2439 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2440 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2441 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2442 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2443 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2444 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2445 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2446 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2447 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2448};
2449
2450static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2451 struct ip_vs_stats *stats)
2452{
2453 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2454 if (!nl_stats)
2455 return -EMSGSIZE;
2456
2457 spin_lock_bh(&stats->lock);
2458
2459 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2460 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2461 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2462 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2463 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2464 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2465 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2466 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2467 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2468 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2469
2470 spin_unlock_bh(&stats->lock);
2471
2472 nla_nest_end(skb, nl_stats);
2473
2474 return 0;
2475
2476nla_put_failure:
2477 spin_unlock_bh(&stats->lock);
2478 nla_nest_cancel(skb, nl_stats);
2479 return -EMSGSIZE;
2480}
2481
2482static int ip_vs_genl_fill_service(struct sk_buff *skb,
2483 struct ip_vs_service *svc)
2484{
2485 struct nlattr *nl_service;
2486 struct ip_vs_flags flags = { .flags = svc->flags,
2487 .mask = ~0 };
2488
2489 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2490 if (!nl_service)
2491 return -EMSGSIZE;
2492
2493 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2494
2495 if (svc->fwmark) {
2496 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2497 } else {
2498 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2499 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2500 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2501 }
2502
2503 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2504 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2505 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2506 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2507
2508 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2509 goto nla_put_failure;
2510
2511 nla_nest_end(skb, nl_service);
2512
2513 return 0;
2514
2515nla_put_failure:
2516 nla_nest_cancel(skb, nl_service);
2517 return -EMSGSIZE;
2518}
2519
2520static int ip_vs_genl_dump_service(struct sk_buff *skb,
2521 struct ip_vs_service *svc,
2522 struct netlink_callback *cb)
2523{
2524 void *hdr;
2525
2526 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2527 &ip_vs_genl_family, NLM_F_MULTI,
2528 IPVS_CMD_NEW_SERVICE);
2529 if (!hdr)
2530 return -EMSGSIZE;
2531
2532 if (ip_vs_genl_fill_service(skb, svc) < 0)
2533 goto nla_put_failure;
2534
2535 return genlmsg_end(skb, hdr);
2536
2537nla_put_failure:
2538 genlmsg_cancel(skb, hdr);
2539 return -EMSGSIZE;
2540}
2541
2542static int ip_vs_genl_dump_services(struct sk_buff *skb,
2543 struct netlink_callback *cb)
2544{
2545 int idx = 0, i;
2546 int start = cb->args[0];
2547 struct ip_vs_service *svc;
2548
2549 mutex_lock(&__ip_vs_mutex);
2550 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2551 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2552 if (++idx <= start)
2553 continue;
2554 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2555 idx--;
2556 goto nla_put_failure;
2557 }
2558 }
2559 }
2560
2561 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2562 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2563 if (++idx <= start)
2564 continue;
2565 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2566 idx--;
2567 goto nla_put_failure;
2568 }
2569 }
2570 }
2571
2572nla_put_failure:
2573 mutex_unlock(&__ip_vs_mutex);
2574 cb->args[0] = idx;
2575
2576 return skb->len;
2577}
2578
c860c6b1 2579static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2580 struct nlattr *nla, int full_entry)
2581{
2582 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2583 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2584
2585 /* Parse mandatory identifying service fields first */
2586 if (nla == NULL ||
2587 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2588 return -EINVAL;
2589
2590 nla_af = attrs[IPVS_SVC_ATTR_AF];
2591 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2592 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2593 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2594 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2595
2596 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2597 return -EINVAL;
2598
c860c6b1 2599 usvc->af = nla_get_u16(nla_af);
9a812198
JV
2600 /* For now, only support IPv4 */
2601 if (nla_get_u16(nla_af) != AF_INET)
2602 return -EAFNOSUPPORT;
2603
2604 if (nla_fwmark) {
2605 usvc->protocol = IPPROTO_TCP;
2606 usvc->fwmark = nla_get_u32(nla_fwmark);
2607 } else {
2608 usvc->protocol = nla_get_u16(nla_protocol);
2609 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2610 usvc->port = nla_get_u16(nla_port);
2611 usvc->fwmark = 0;
2612 }
2613
2614 /* If a full entry was requested, check for the additional fields */
2615 if (full_entry) {
2616 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2617 *nla_netmask;
2618 struct ip_vs_flags flags;
2619 struct ip_vs_service *svc;
2620
2621 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2622 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2623 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2624 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2625
2626 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2627 return -EINVAL;
2628
2629 nla_memcpy(&flags, nla_flags, sizeof(flags));
2630
2631 /* prefill flags from service if it already exists */
2632 if (usvc->fwmark)
b18610de 2633 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2634 else
b18610de
JV
2635 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2636 &usvc->addr, usvc->port);
9a812198
JV
2637 if (svc) {
2638 usvc->flags = svc->flags;
2639 ip_vs_service_put(svc);
2640 } else
2641 usvc->flags = 0;
2642
2643 /* set new flags from userland */
2644 usvc->flags = (usvc->flags & ~flags.mask) |
2645 (flags.flags & flags.mask);
c860c6b1 2646 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2647 usvc->timeout = nla_get_u32(nla_timeout);
2648 usvc->netmask = nla_get_u32(nla_netmask);
2649 }
2650
2651 return 0;
2652}
2653
2654static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2655{
c860c6b1 2656 struct ip_vs_service_user_kern usvc;
9a812198
JV
2657 int ret;
2658
2659 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2660 if (ret)
2661 return ERR_PTR(ret);
2662
2663 if (usvc.fwmark)
b18610de 2664 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2665 else
b18610de
JV
2666 return __ip_vs_service_get(usvc.af, usvc.protocol,
2667 &usvc.addr, usvc.port);
9a812198
JV
2668}
2669
2670static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2671{
2672 struct nlattr *nl_dest;
2673
2674 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2675 if (!nl_dest)
2676 return -EMSGSIZE;
2677
2678 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2679 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2680
2681 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2682 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2683 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2684 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2685 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2686 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2687 atomic_read(&dest->activeconns));
2688 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2689 atomic_read(&dest->inactconns));
2690 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2691 atomic_read(&dest->persistconns));
2692
2693 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2694 goto nla_put_failure;
2695
2696 nla_nest_end(skb, nl_dest);
2697
2698 return 0;
2699
2700nla_put_failure:
2701 nla_nest_cancel(skb, nl_dest);
2702 return -EMSGSIZE;
2703}
2704
2705static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2706 struct netlink_callback *cb)
2707{
2708 void *hdr;
2709
2710 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2711 &ip_vs_genl_family, NLM_F_MULTI,
2712 IPVS_CMD_NEW_DEST);
2713 if (!hdr)
2714 return -EMSGSIZE;
2715
2716 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2717 goto nla_put_failure;
2718
2719 return genlmsg_end(skb, hdr);
2720
2721nla_put_failure:
2722 genlmsg_cancel(skb, hdr);
2723 return -EMSGSIZE;
2724}
2725
2726static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2727 struct netlink_callback *cb)
2728{
2729 int idx = 0;
2730 int start = cb->args[0];
2731 struct ip_vs_service *svc;
2732 struct ip_vs_dest *dest;
2733 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2734
2735 mutex_lock(&__ip_vs_mutex);
2736
2737 /* Try to find the service for which to dump destinations */
2738 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2739 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2740 goto out_err;
2741
2742 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2743 if (IS_ERR(svc) || svc == NULL)
2744 goto out_err;
2745
2746 /* Dump the destinations */
2747 list_for_each_entry(dest, &svc->destinations, n_list) {
2748 if (++idx <= start)
2749 continue;
2750 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2751 idx--;
2752 goto nla_put_failure;
2753 }
2754 }
2755
2756nla_put_failure:
2757 cb->args[0] = idx;
2758 ip_vs_service_put(svc);
2759
2760out_err:
2761 mutex_unlock(&__ip_vs_mutex);
2762
2763 return skb->len;
2764}
2765
c860c6b1 2766static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2767 struct nlattr *nla, int full_entry)
2768{
2769 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2770 struct nlattr *nla_addr, *nla_port;
2771
2772 /* Parse mandatory identifying destination fields first */
2773 if (nla == NULL ||
2774 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2775 return -EINVAL;
2776
2777 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2778 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2779
2780 if (!(nla_addr && nla_port))
2781 return -EINVAL;
2782
2783 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2784 udest->port = nla_get_u16(nla_port);
2785
2786 /* If a full entry was requested, check for the additional fields */
2787 if (full_entry) {
2788 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2789 *nla_l_thresh;
2790
2791 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2792 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2793 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2794 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2795
2796 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2797 return -EINVAL;
2798
2799 udest->conn_flags = nla_get_u32(nla_fwd)
2800 & IP_VS_CONN_F_FWD_MASK;
2801 udest->weight = nla_get_u32(nla_weight);
2802 udest->u_threshold = nla_get_u32(nla_u_thresh);
2803 udest->l_threshold = nla_get_u32(nla_l_thresh);
2804 }
2805
2806 return 0;
2807}
2808
2809static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2810 const char *mcast_ifn, __be32 syncid)
2811{
2812 struct nlattr *nl_daemon;
2813
2814 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2815 if (!nl_daemon)
2816 return -EMSGSIZE;
2817
2818 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2819 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2820 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2821
2822 nla_nest_end(skb, nl_daemon);
2823
2824 return 0;
2825
2826nla_put_failure:
2827 nla_nest_cancel(skb, nl_daemon);
2828 return -EMSGSIZE;
2829}
2830
2831static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2832 const char *mcast_ifn, __be32 syncid,
2833 struct netlink_callback *cb)
2834{
2835 void *hdr;
2836 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2837 &ip_vs_genl_family, NLM_F_MULTI,
2838 IPVS_CMD_NEW_DAEMON);
2839 if (!hdr)
2840 return -EMSGSIZE;
2841
2842 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2843 goto nla_put_failure;
2844
2845 return genlmsg_end(skb, hdr);
2846
2847nla_put_failure:
2848 genlmsg_cancel(skb, hdr);
2849 return -EMSGSIZE;
2850}
2851
2852static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2853 struct netlink_callback *cb)
2854{
2855 mutex_lock(&__ip_vs_mutex);
2856 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2857 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2858 ip_vs_master_mcast_ifn,
2859 ip_vs_master_syncid, cb) < 0)
2860 goto nla_put_failure;
2861
2862 cb->args[0] = 1;
2863 }
2864
2865 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2866 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2867 ip_vs_backup_mcast_ifn,
2868 ip_vs_backup_syncid, cb) < 0)
2869 goto nla_put_failure;
2870
2871 cb->args[1] = 1;
2872 }
2873
2874nla_put_failure:
2875 mutex_unlock(&__ip_vs_mutex);
2876
2877 return skb->len;
2878}
2879
2880static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2881{
2882 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2883 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2884 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2885 return -EINVAL;
2886
2887 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2888 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2889 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2890}
2891
2892static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2893{
2894 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2895 return -EINVAL;
2896
2897 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2898}
2899
2900static int ip_vs_genl_set_config(struct nlattr **attrs)
2901{
2902 struct ip_vs_timeout_user t;
2903
2904 __ip_vs_get_timeouts(&t);
2905
2906 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2907 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2908
2909 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2910 t.tcp_fin_timeout =
2911 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2912
2913 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2914 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2915
2916 return ip_vs_set_timeout(&t);
2917}
2918
2919static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2920{
2921 struct ip_vs_service *svc = NULL;
c860c6b1
JV
2922 struct ip_vs_service_user_kern usvc;
2923 struct ip_vs_dest_user_kern udest;
9a812198
JV
2924 int ret = 0, cmd;
2925 int need_full_svc = 0, need_full_dest = 0;
2926
2927 cmd = info->genlhdr->cmd;
2928
2929 mutex_lock(&__ip_vs_mutex);
2930
2931 if (cmd == IPVS_CMD_FLUSH) {
2932 ret = ip_vs_flush();
2933 goto out;
2934 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2935 ret = ip_vs_genl_set_config(info->attrs);
2936 goto out;
2937 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2938 cmd == IPVS_CMD_DEL_DAEMON) {
2939
2940 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2941
2942 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2943 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2944 info->attrs[IPVS_CMD_ATTR_DAEMON],
2945 ip_vs_daemon_policy)) {
2946 ret = -EINVAL;
2947 goto out;
2948 }
2949
2950 if (cmd == IPVS_CMD_NEW_DAEMON)
2951 ret = ip_vs_genl_new_daemon(daemon_attrs);
2952 else
2953 ret = ip_vs_genl_del_daemon(daemon_attrs);
2954 goto out;
2955 } else if (cmd == IPVS_CMD_ZERO &&
2956 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2957 ret = ip_vs_zero_all();
2958 goto out;
2959 }
2960
2961 /* All following commands require a service argument, so check if we
2962 * received a valid one. We need a full service specification when
2963 * adding / editing a service. Only identifying members otherwise. */
2964 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2965 need_full_svc = 1;
2966
2967 ret = ip_vs_genl_parse_service(&usvc,
2968 info->attrs[IPVS_CMD_ATTR_SERVICE],
2969 need_full_svc);
2970 if (ret)
2971 goto out;
2972
2973 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2974 if (usvc.fwmark == 0)
b18610de
JV
2975 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2976 &usvc.addr, usvc.port);
9a812198 2977 else
b18610de 2978 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
2979
2980 /* Unless we're adding a new service, the service must already exist */
2981 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
2982 ret = -ESRCH;
2983 goto out;
2984 }
2985
2986 /* Destination commands require a valid destination argument. For
2987 * adding / editing a destination, we need a full destination
2988 * specification. */
2989 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
2990 cmd == IPVS_CMD_DEL_DEST) {
2991 if (cmd != IPVS_CMD_DEL_DEST)
2992 need_full_dest = 1;
2993
2994 ret = ip_vs_genl_parse_dest(&udest,
2995 info->attrs[IPVS_CMD_ATTR_DEST],
2996 need_full_dest);
2997 if (ret)
2998 goto out;
2999 }
3000
3001 switch (cmd) {
3002 case IPVS_CMD_NEW_SERVICE:
3003 if (svc == NULL)
3004 ret = ip_vs_add_service(&usvc, &svc);
3005 else
3006 ret = -EEXIST;
3007 break;
3008 case IPVS_CMD_SET_SERVICE:
3009 ret = ip_vs_edit_service(svc, &usvc);
3010 break;
3011 case IPVS_CMD_DEL_SERVICE:
3012 ret = ip_vs_del_service(svc);
3013 break;
3014 case IPVS_CMD_NEW_DEST:
3015 ret = ip_vs_add_dest(svc, &udest);
3016 break;
3017 case IPVS_CMD_SET_DEST:
3018 ret = ip_vs_edit_dest(svc, &udest);
3019 break;
3020 case IPVS_CMD_DEL_DEST:
3021 ret = ip_vs_del_dest(svc, &udest);
3022 break;
3023 case IPVS_CMD_ZERO:
3024 ret = ip_vs_zero_service(svc);
3025 break;
3026 default:
3027 ret = -EINVAL;
3028 }
3029
3030out:
3031 if (svc)
3032 ip_vs_service_put(svc);
3033 mutex_unlock(&__ip_vs_mutex);
3034
3035 return ret;
3036}
3037
3038static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3039{
3040 struct sk_buff *msg;
3041 void *reply;
3042 int ret, cmd, reply_cmd;
3043
3044 cmd = info->genlhdr->cmd;
3045
3046 if (cmd == IPVS_CMD_GET_SERVICE)
3047 reply_cmd = IPVS_CMD_NEW_SERVICE;
3048 else if (cmd == IPVS_CMD_GET_INFO)
3049 reply_cmd = IPVS_CMD_SET_INFO;
3050 else if (cmd == IPVS_CMD_GET_CONFIG)
3051 reply_cmd = IPVS_CMD_SET_CONFIG;
3052 else {
3053 IP_VS_ERR("unknown Generic Netlink command\n");
3054 return -EINVAL;
3055 }
3056
3057 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3058 if (!msg)
3059 return -ENOMEM;
3060
3061 mutex_lock(&__ip_vs_mutex);
3062
3063 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3064 if (reply == NULL)
3065 goto nla_put_failure;
3066
3067 switch (cmd) {
3068 case IPVS_CMD_GET_SERVICE:
3069 {
3070 struct ip_vs_service *svc;
3071
3072 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3073 if (IS_ERR(svc)) {
3074 ret = PTR_ERR(svc);
3075 goto out_err;
3076 } else if (svc) {
3077 ret = ip_vs_genl_fill_service(msg, svc);
3078 ip_vs_service_put(svc);
3079 if (ret)
3080 goto nla_put_failure;
3081 } else {
3082 ret = -ESRCH;
3083 goto out_err;
3084 }
3085
3086 break;
3087 }
3088
3089 case IPVS_CMD_GET_CONFIG:
3090 {
3091 struct ip_vs_timeout_user t;
3092
3093 __ip_vs_get_timeouts(&t);
3094#ifdef CONFIG_IP_VS_PROTO_TCP
3095 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3096 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3097 t.tcp_fin_timeout);
3098#endif
3099#ifdef CONFIG_IP_VS_PROTO_UDP
3100 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3101#endif
3102
3103 break;
3104 }
3105
3106 case IPVS_CMD_GET_INFO:
3107 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3108 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3109 IP_VS_CONN_TAB_SIZE);
3110 break;
3111 }
3112
3113 genlmsg_end(msg, reply);
3114 ret = genlmsg_unicast(msg, info->snd_pid);
3115 goto out;
3116
3117nla_put_failure:
3118 IP_VS_ERR("not enough space in Netlink message\n");
3119 ret = -EMSGSIZE;
3120
3121out_err:
3122 nlmsg_free(msg);
3123out:
3124 mutex_unlock(&__ip_vs_mutex);
3125
3126 return ret;
3127}
3128
3129
3130static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3131 {
3132 .cmd = IPVS_CMD_NEW_SERVICE,
3133 .flags = GENL_ADMIN_PERM,
3134 .policy = ip_vs_cmd_policy,
3135 .doit = ip_vs_genl_set_cmd,
3136 },
3137 {
3138 .cmd = IPVS_CMD_SET_SERVICE,
3139 .flags = GENL_ADMIN_PERM,
3140 .policy = ip_vs_cmd_policy,
3141 .doit = ip_vs_genl_set_cmd,
3142 },
3143 {
3144 .cmd = IPVS_CMD_DEL_SERVICE,
3145 .flags = GENL_ADMIN_PERM,
3146 .policy = ip_vs_cmd_policy,
3147 .doit = ip_vs_genl_set_cmd,
3148 },
3149 {
3150 .cmd = IPVS_CMD_GET_SERVICE,
3151 .flags = GENL_ADMIN_PERM,
3152 .doit = ip_vs_genl_get_cmd,
3153 .dumpit = ip_vs_genl_dump_services,
3154 .policy = ip_vs_cmd_policy,
3155 },
3156 {
3157 .cmd = IPVS_CMD_NEW_DEST,
3158 .flags = GENL_ADMIN_PERM,
3159 .policy = ip_vs_cmd_policy,
3160 .doit = ip_vs_genl_set_cmd,
3161 },
3162 {
3163 .cmd = IPVS_CMD_SET_DEST,
3164 .flags = GENL_ADMIN_PERM,
3165 .policy = ip_vs_cmd_policy,
3166 .doit = ip_vs_genl_set_cmd,
3167 },
3168 {
3169 .cmd = IPVS_CMD_DEL_DEST,
3170 .flags = GENL_ADMIN_PERM,
3171 .policy = ip_vs_cmd_policy,
3172 .doit = ip_vs_genl_set_cmd,
3173 },
3174 {
3175 .cmd = IPVS_CMD_GET_DEST,
3176 .flags = GENL_ADMIN_PERM,
3177 .policy = ip_vs_cmd_policy,
3178 .dumpit = ip_vs_genl_dump_dests,
3179 },
3180 {
3181 .cmd = IPVS_CMD_NEW_DAEMON,
3182 .flags = GENL_ADMIN_PERM,
3183 .policy = ip_vs_cmd_policy,
3184 .doit = ip_vs_genl_set_cmd,
3185 },
3186 {
3187 .cmd = IPVS_CMD_DEL_DAEMON,
3188 .flags = GENL_ADMIN_PERM,
3189 .policy = ip_vs_cmd_policy,
3190 .doit = ip_vs_genl_set_cmd,
3191 },
3192 {
3193 .cmd = IPVS_CMD_GET_DAEMON,
3194 .flags = GENL_ADMIN_PERM,
3195 .dumpit = ip_vs_genl_dump_daemons,
3196 },
3197 {
3198 .cmd = IPVS_CMD_SET_CONFIG,
3199 .flags = GENL_ADMIN_PERM,
3200 .policy = ip_vs_cmd_policy,
3201 .doit = ip_vs_genl_set_cmd,
3202 },
3203 {
3204 .cmd = IPVS_CMD_GET_CONFIG,
3205 .flags = GENL_ADMIN_PERM,
3206 .doit = ip_vs_genl_get_cmd,
3207 },
3208 {
3209 .cmd = IPVS_CMD_GET_INFO,
3210 .flags = GENL_ADMIN_PERM,
3211 .doit = ip_vs_genl_get_cmd,
3212 },
3213 {
3214 .cmd = IPVS_CMD_ZERO,
3215 .flags = GENL_ADMIN_PERM,
3216 .policy = ip_vs_cmd_policy,
3217 .doit = ip_vs_genl_set_cmd,
3218 },
3219 {
3220 .cmd = IPVS_CMD_FLUSH,
3221 .flags = GENL_ADMIN_PERM,
3222 .doit = ip_vs_genl_set_cmd,
3223 },
3224};
3225
3226static int __init ip_vs_genl_register(void)
3227{
3228 int ret, i;
3229
3230 ret = genl_register_family(&ip_vs_genl_family);
3231 if (ret)
3232 return ret;
3233
3234 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3235 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3236 if (ret)
3237 goto err_out;
3238 }
3239 return 0;
3240
3241err_out:
3242 genl_unregister_family(&ip_vs_genl_family);
3243 return ret;
3244}
3245
3246static void ip_vs_genl_unregister(void)
3247{
3248 genl_unregister_family(&ip_vs_genl_family);
3249}
3250
3251/* End of Generic Netlink interface definitions */
3252
1da177e4 3253
048cf48b 3254int __init ip_vs_control_init(void)
1da177e4
LT
3255{
3256 int ret;
3257 int idx;
3258
3259 EnterFunction(2);
3260
3261 ret = nf_register_sockopt(&ip_vs_sockopts);
3262 if (ret) {
3263 IP_VS_ERR("cannot register sockopt.\n");
3264 return ret;
3265 }
3266
9a812198
JV
3267 ret = ip_vs_genl_register();
3268 if (ret) {
3269 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3270 nf_unregister_sockopt(&ip_vs_sockopts);
3271 return ret;
3272 }
3273
457c4cbc
EB
3274 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3275 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3276
90754f8e 3277 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3278
3279 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3280 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3281 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3282 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3283 }
3284 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3285 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3286 }
3287
1da177e4
LT
3288 ip_vs_new_estimator(&ip_vs_stats);
3289
3290 /* Hook the defense timer */
3291 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3292
3293 LeaveFunction(2);
3294 return 0;
3295}
3296
3297
3298void ip_vs_control_cleanup(void)
3299{
3300 EnterFunction(2);
3301 ip_vs_trash_cleanup();
3302 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3303 cancel_work_sync(&defense_work.work);
1da177e4
LT
3304 ip_vs_kill_estimator(&ip_vs_stats);
3305 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3306 proc_net_remove(&init_net, "ip_vs_stats");
3307 proc_net_remove(&init_net, "ip_vs");
9a812198 3308 ip_vs_genl_unregister();
1da177e4
LT
3309 nf_unregister_sockopt(&ip_vs_sockopts);
3310 LeaveFunction(2);
3311}