]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Turn off FTP application helper for IPv6
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4 39#include <net/sock.h>
9a812198 40#include <net/genetlink.h>
1da177e4
LT
41
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 47static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
48
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
af9debd4
JA
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
1da177e4
LT
97 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
af9debd4
JA
116 local_bh_disable();
117
1da177e4
LT
118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
214
215 local_bh_enable();
1da177e4
LT
216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 225
c4028958 226static void defense_work_handler(struct work_struct *work)
1da177e4
LT
227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
b18610de
JV
285ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
286 __be16 port)
1da177e4
LT
287{
288 register unsigned porth = ntohs(port);
b18610de 289 __be32 addr_fold = addr->ip;
1da177e4 290
b18610de
JV
291#ifdef CONFIG_IP_VS_IPV6
292 if (af == AF_INET6)
293 addr_fold = addr->ip6[0]^addr->ip6[1]^
294 addr->ip6[2]^addr->ip6[3];
295#endif
296
297 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
298 & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Returns hash value of fwmark for virtual service lookup
303 */
304static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
305{
306 return fwmark & IP_VS_SVC_TAB_MASK;
307}
308
309/*
310 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
311 * or in the ip_vs_svc_fwm_table by fwmark.
312 * Should be called with locked tables.
313 */
314static int ip_vs_svc_hash(struct ip_vs_service *svc)
315{
316 unsigned hash;
317
318 if (svc->flags & IP_VS_SVC_F_HASHED) {
319 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
320 "called from %p\n", __builtin_return_address(0));
321 return 0;
322 }
323
324 if (svc->fwmark == 0) {
325 /*
326 * Hash it by <protocol,addr,port> in ip_vs_svc_table
327 */
b18610de 328 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 329 svc->port);
1da177e4
LT
330 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
331 } else {
332 /*
333 * Hash it by fwmark in ip_vs_svc_fwm_table
334 */
335 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
336 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
337 }
338
339 svc->flags |= IP_VS_SVC_F_HASHED;
340 /* increase its refcnt because it is referenced by the svc table */
341 atomic_inc(&svc->refcnt);
342 return 1;
343}
344
345
346/*
347 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
348 * Should be called with locked tables.
349 */
350static int ip_vs_svc_unhash(struct ip_vs_service *svc)
351{
352 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
353 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
354 "called from %p\n", __builtin_return_address(0));
355 return 0;
356 }
357
358 if (svc->fwmark == 0) {
359 /* Remove it from the ip_vs_svc_table table */
360 list_del(&svc->s_list);
361 } else {
362 /* Remove it from the ip_vs_svc_fwm_table table */
363 list_del(&svc->f_list);
364 }
365
366 svc->flags &= ~IP_VS_SVC_F_HASHED;
367 atomic_dec(&svc->refcnt);
368 return 1;
369}
370
371
372/*
373 * Get service by {proto,addr,port} in the service table.
374 */
b18610de
JV
375static inline struct ip_vs_service *
376__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
377 __be16 vport)
1da177e4
LT
378{
379 unsigned hash;
380 struct ip_vs_service *svc;
381
382 /* Check for "full" addressed entries */
b18610de 383 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
384
385 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
386 if ((svc->af == af)
387 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
388 && (svc->port == vport)
389 && (svc->protocol == protocol)) {
390 /* HIT */
391 atomic_inc(&svc->usecnt);
392 return svc;
393 }
394 }
395
396 return NULL;
397}
398
399
400/*
401 * Get service by {fwmark} in the service table.
402 */
b18610de
JV
403static inline struct ip_vs_service *
404__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
405{
406 unsigned hash;
407 struct ip_vs_service *svc;
408
409 /* Check for fwmark addressed entries */
410 hash = ip_vs_svc_fwm_hashkey(fwmark);
411
412 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 413 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423struct ip_vs_service *
3c2e0505
JV
424ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
425 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
426{
427 struct ip_vs_service *svc;
3c2e0505 428
1da177e4
LT
429 read_lock(&__ip_vs_svc_lock);
430
431 /*
432 * Check the table hashed by fwmark first
433 */
3c2e0505 434 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
435 goto out;
436
437 /*
438 * Check the table hashed by <protocol,addr,port>
439 * for "full" addressed entries
440 */
3c2e0505 441 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
442
443 if (svc == NULL
444 && protocol == IPPROTO_TCP
445 && atomic_read(&ip_vs_ftpsvc_counter)
446 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
447 /*
448 * Check if ftp service entry exists, the packet
449 * might belong to FTP data connections.
450 */
3c2e0505 451 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
452 }
453
454 if (svc == NULL
455 && atomic_read(&ip_vs_nullsvc_counter)) {
456 /*
457 * Check if the catch-all port (port zero) exists
458 */
3c2e0505 459 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
460 }
461
462 out:
463 read_unlock(&__ip_vs_svc_lock);
464
3c2e0505
JV
465 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
466 fwmark, ip_vs_proto_name(protocol),
467 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
468 svc ? "hit" : "not hit");
1da177e4
LT
469
470 return svc;
471}
472
473
474static inline void
475__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
476{
477 atomic_inc(&svc->refcnt);
478 dest->svc = svc;
479}
480
481static inline void
482__ip_vs_unbind_svc(struct ip_vs_dest *dest)
483{
484 struct ip_vs_service *svc = dest->svc;
485
486 dest->svc = NULL;
487 if (atomic_dec_and_test(&svc->refcnt))
488 kfree(svc);
489}
490
491
492/*
493 * Returns hash value for real service
494 */
7937df15
JV
495static inline unsigned ip_vs_rs_hashkey(int af,
496 const union nf_inet_addr *addr,
497 __be16 port)
1da177e4
LT
498{
499 register unsigned porth = ntohs(port);
7937df15
JV
500 __be32 addr_fold = addr->ip;
501
502#ifdef CONFIG_IP_VS_IPV6
503 if (af == AF_INET6)
504 addr_fold = addr->ip6[0]^addr->ip6[1]^
505 addr->ip6[2]^addr->ip6[3];
506#endif
1da177e4 507
7937df15 508 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
509 & IP_VS_RTAB_MASK;
510}
511
512/*
513 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
514 * should be called with locked tables.
515 */
516static int ip_vs_rs_hash(struct ip_vs_dest *dest)
517{
518 unsigned hash;
519
520 if (!list_empty(&dest->d_list)) {
521 return 0;
522 }
523
524 /*
525 * Hash by proto,addr,port,
526 * which are the parameters of the real service.
527 */
7937df15
JV
528 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
529
1da177e4
LT
530 list_add(&dest->d_list, &ip_vs_rtable[hash]);
531
532 return 1;
533}
534
535/*
536 * UNhashes ip_vs_dest from ip_vs_rtable.
537 * should be called with locked tables.
538 */
539static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
540{
541 /*
542 * Remove it from the ip_vs_rtable table.
543 */
544 if (!list_empty(&dest->d_list)) {
545 list_del(&dest->d_list);
546 INIT_LIST_HEAD(&dest->d_list);
547 }
548
549 return 1;
550}
551
552/*
553 * Lookup real service by <proto,addr,port> in the real service table.
554 */
555struct ip_vs_dest *
7937df15
JV
556ip_vs_lookup_real_service(int af, __u16 protocol,
557 const union nf_inet_addr *daddr,
558 __be16 dport)
1da177e4
LT
559{
560 unsigned hash;
561 struct ip_vs_dest *dest;
562
563 /*
564 * Check for "full" addressed entries
565 * Return the first found entry
566 */
7937df15 567 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
568
569 read_lock(&__ip_vs_rs_lock);
570 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
571 if ((dest->af == af)
572 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
573 && (dest->port == dport)
574 && ((dest->protocol == protocol) ||
575 dest->vfwmark)) {
576 /* HIT */
577 read_unlock(&__ip_vs_rs_lock);
578 return dest;
579 }
580 }
581 read_unlock(&__ip_vs_rs_lock);
582
583 return NULL;
584}
585
586/*
587 * Lookup destination by {addr,port} in the given service
588 */
589static struct ip_vs_dest *
7937df15
JV
590ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
591 __be16 dport)
1da177e4
LT
592{
593 struct ip_vs_dest *dest;
594
595 /*
596 * Find the destination for the given service
597 */
598 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
599 if ((dest->af == svc->af)
600 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
601 && (dest->port == dport)) {
1da177e4
LT
602 /* HIT */
603 return dest;
604 }
605 }
606
607 return NULL;
608}
609
1e356f9c
RB
610/*
611 * Find destination by {daddr,dport,vaddr,protocol}
612 * Cretaed to be used in ip_vs_process_message() in
613 * the backup synchronization daemon. It finds the
614 * destination to be bound to the received connection
615 * on the backup.
616 *
617 * ip_vs_lookup_real_service() looked promissing, but
618 * seems not working as expected.
619 */
7937df15
JV
620struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
621 __be16 dport,
622 const union nf_inet_addr *vaddr,
623 __be16 vport, __u16 protocol)
1e356f9c
RB
624{
625 struct ip_vs_dest *dest;
626 struct ip_vs_service *svc;
627
7937df15 628 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
629 if (!svc)
630 return NULL;
631 dest = ip_vs_lookup_dest(svc, daddr, dport);
632 if (dest)
633 atomic_inc(&dest->refcnt);
634 ip_vs_service_put(svc);
635 return dest;
636}
1da177e4
LT
637
638/*
639 * Lookup dest by {svc,addr,port} in the destination trash.
640 * The destination trash is used to hold the destinations that are removed
641 * from the service table but are still referenced by some conn entries.
642 * The reason to add the destination trash is when the dest is temporary
643 * down (either by administrator or by monitor program), the dest can be
644 * picked back from the trash, the remaining connections to the dest can
645 * continue, and the counting information of the dest is also useful for
646 * scheduling.
647 */
648static struct ip_vs_dest *
7937df15
JV
649ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
650 __be16 dport)
1da177e4
LT
651{
652 struct ip_vs_dest *dest, *nxt;
653
654 /*
655 * Find the destination in trash
656 */
657 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
658 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
659 "dest->refcnt=%d\n",
660 dest->vfwmark,
661 IP_VS_DBG_ADDR(svc->af, &dest->addr),
662 ntohs(dest->port),
663 atomic_read(&dest->refcnt));
664 if (dest->af == svc->af &&
665 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
666 dest->port == dport &&
667 dest->vfwmark == svc->fwmark &&
668 dest->protocol == svc->protocol &&
669 (svc->fwmark ||
7937df15 670 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
671 dest->vport == svc->port))) {
672 /* HIT */
673 return dest;
674 }
675
676 /*
677 * Try to purge the destination from trash if not referenced
678 */
679 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
680 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
681 "from trash\n",
682 dest->vfwmark,
683 IP_VS_DBG_ADDR(svc->af, &dest->addr),
684 ntohs(dest->port));
1da177e4
LT
685 list_del(&dest->n_list);
686 ip_vs_dst_reset(dest);
687 __ip_vs_unbind_svc(dest);
688 kfree(dest);
689 }
690 }
691
692 return NULL;
693}
694
695
696/*
697 * Clean up all the destinations in the trash
698 * Called by the ip_vs_control_cleanup()
699 *
700 * When the ip_vs_control_clearup is activated by ipvs module exit,
701 * the service tables must have been flushed and all the connections
702 * are expired, and the refcnt of each destination in the trash must
703 * be 1, so we simply release them here.
704 */
705static void ip_vs_trash_cleanup(void)
706{
707 struct ip_vs_dest *dest, *nxt;
708
709 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
710 list_del(&dest->n_list);
711 ip_vs_dst_reset(dest);
712 __ip_vs_unbind_svc(dest);
713 kfree(dest);
714 }
715}
716
717
718static void
719ip_vs_zero_stats(struct ip_vs_stats *stats)
720{
721 spin_lock_bh(&stats->lock);
e93615d0
SH
722
723 stats->conns = 0;
724 stats->inpkts = 0;
725 stats->outpkts = 0;
726 stats->inbytes = 0;
727 stats->outbytes = 0;
728
729 stats->cps = 0;
730 stats->inpps = 0;
731 stats->outpps = 0;
732 stats->inbps = 0;
733 stats->outbps = 0;
734
1da177e4 735 ip_vs_zero_estimator(stats);
e93615d0 736
3a14a313 737 spin_unlock_bh(&stats->lock);
1da177e4
LT
738}
739
740/*
741 * Update a destination in the given service
742 */
743static void
744__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 745 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
746{
747 int conn_flags;
748
749 /* set the weight and the flags */
750 atomic_set(&dest->weight, udest->weight);
751 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
752
753 /* check if local node and update the flags */
c860c6b1 754 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
1da177e4
LT
755 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
756 | IP_VS_CONN_F_LOCALNODE;
757 }
758
759 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
760 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
761 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
762 } else {
763 /*
764 * Put the real service in ip_vs_rtable if not present.
765 * For now only for NAT!
766 */
767 write_lock_bh(&__ip_vs_rs_lock);
768 ip_vs_rs_hash(dest);
769 write_unlock_bh(&__ip_vs_rs_lock);
770 }
771 atomic_set(&dest->conn_flags, conn_flags);
772
773 /* bind the service */
774 if (!dest->svc) {
775 __ip_vs_bind_svc(dest, svc);
776 } else {
777 if (dest->svc != svc) {
778 __ip_vs_unbind_svc(dest);
779 ip_vs_zero_stats(&dest->stats);
780 __ip_vs_bind_svc(dest, svc);
781 }
782 }
783
784 /* set the dest status flags */
785 dest->flags |= IP_VS_DEST_F_AVAILABLE;
786
787 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
788 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
789 dest->u_threshold = udest->u_threshold;
790 dest->l_threshold = udest->l_threshold;
791}
792
793
794/*
795 * Create a destination for the given service
796 */
797static int
c860c6b1 798ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
799 struct ip_vs_dest **dest_p)
800{
801 struct ip_vs_dest *dest;
802 unsigned atype;
803
804 EnterFunction(2);
805
c860c6b1 806 atype = inet_addr_type(&init_net, udest->addr.ip);
1da177e4
LT
807 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
808 return -EINVAL;
809
0da974f4 810 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
811 if (dest == NULL) {
812 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
813 return -ENOMEM;
814 }
1da177e4 815
c860c6b1 816 dest->af = svc->af;
1da177e4 817 dest->protocol = svc->protocol;
c860c6b1 818 dest->vaddr = svc->addr;
1da177e4
LT
819 dest->vport = svc->port;
820 dest->vfwmark = svc->fwmark;
c860c6b1 821 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
822 dest->port = udest->port;
823
824 atomic_set(&dest->activeconns, 0);
825 atomic_set(&dest->inactconns, 0);
826 atomic_set(&dest->persistconns, 0);
827 atomic_set(&dest->refcnt, 0);
828
829 INIT_LIST_HEAD(&dest->d_list);
830 spin_lock_init(&dest->dst_lock);
831 spin_lock_init(&dest->stats.lock);
832 __ip_vs_update_dest(svc, dest, udest);
833 ip_vs_new_estimator(&dest->stats);
834
835 *dest_p = dest;
836
837 LeaveFunction(2);
838 return 0;
839}
840
841
842/*
843 * Add a destination into an existing service
844 */
845static int
c860c6b1 846ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
847{
848 struct ip_vs_dest *dest;
c860c6b1 849 union nf_inet_addr daddr;
014d730d 850 __be16 dport = udest->port;
1da177e4
LT
851 int ret;
852
853 EnterFunction(2);
854
855 if (udest->weight < 0) {
856 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
857 return -ERANGE;
858 }
859
860 if (udest->l_threshold > udest->u_threshold) {
861 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
862 "upper threshold\n");
863 return -ERANGE;
864 }
865
c860c6b1
JV
866 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
867
1da177e4
LT
868 /*
869 * Check if the dest already exists in the list
870 */
7937df15
JV
871 dest = ip_vs_lookup_dest(svc, &daddr, dport);
872
1da177e4
LT
873 if (dest != NULL) {
874 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
875 return -EEXIST;
876 }
877
878 /*
879 * Check if the dest already exists in the trash and
880 * is from the same service
881 */
7937df15
JV
882 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
883
1da177e4
LT
884 if (dest != NULL) {
885 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 886 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
887 NIPQUAD(daddr), ntohs(dport),
888 atomic_read(&dest->refcnt),
889 dest->vfwmark,
e7ade46a 890 NIPQUAD(dest->vaddr.ip),
1da177e4
LT
891 ntohs(dest->vport));
892 __ip_vs_update_dest(svc, dest, udest);
893
894 /*
895 * Get the destination from the trash
896 */
897 list_del(&dest->n_list);
898
899 ip_vs_new_estimator(&dest->stats);
900
901 write_lock_bh(&__ip_vs_svc_lock);
902
903 /*
904 * Wait until all other svc users go away.
905 */
906 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
907
908 list_add(&dest->n_list, &svc->destinations);
909 svc->num_dests++;
910
911 /* call the update_service function of its scheduler */
82dfb6f3
SW
912 if (svc->scheduler->update_service)
913 svc->scheduler->update_service(svc);
1da177e4
LT
914
915 write_unlock_bh(&__ip_vs_svc_lock);
916 return 0;
917 }
918
919 /*
920 * Allocate and initialize the dest structure
921 */
922 ret = ip_vs_new_dest(svc, udest, &dest);
923 if (ret) {
924 return ret;
925 }
926
927 /*
928 * Add the dest entry into the list
929 */
930 atomic_inc(&dest->refcnt);
931
932 write_lock_bh(&__ip_vs_svc_lock);
933
934 /*
935 * Wait until all other svc users go away.
936 */
937 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
938
939 list_add(&dest->n_list, &svc->destinations);
940 svc->num_dests++;
941
942 /* call the update_service function of its scheduler */
82dfb6f3
SW
943 if (svc->scheduler->update_service)
944 svc->scheduler->update_service(svc);
1da177e4
LT
945
946 write_unlock_bh(&__ip_vs_svc_lock);
947
948 LeaveFunction(2);
949
950 return 0;
951}
952
953
954/*
955 * Edit a destination in the given service
956 */
957static int
c860c6b1 958ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
959{
960 struct ip_vs_dest *dest;
c860c6b1 961 union nf_inet_addr daddr;
014d730d 962 __be16 dport = udest->port;
1da177e4
LT
963
964 EnterFunction(2);
965
966 if (udest->weight < 0) {
967 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
968 return -ERANGE;
969 }
970
971 if (udest->l_threshold > udest->u_threshold) {
972 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
973 "upper threshold\n");
974 return -ERANGE;
975 }
976
c860c6b1
JV
977 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
978
1da177e4
LT
979 /*
980 * Lookup the destination list
981 */
7937df15
JV
982 dest = ip_vs_lookup_dest(svc, &daddr, dport);
983
1da177e4
LT
984 if (dest == NULL) {
985 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
986 return -ENOENT;
987 }
988
989 __ip_vs_update_dest(svc, dest, udest);
990
991 write_lock_bh(&__ip_vs_svc_lock);
992
993 /* Wait until all other svc users go away */
cae7ca3d 994 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
995
996 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
997 if (svc->scheduler->update_service)
998 svc->scheduler->update_service(svc);
1da177e4
LT
999
1000 write_unlock_bh(&__ip_vs_svc_lock);
1001
1002 LeaveFunction(2);
1003
1004 return 0;
1005}
1006
1007
1008/*
1009 * Delete a destination (must be already unlinked from the service)
1010 */
1011static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1012{
1013 ip_vs_kill_estimator(&dest->stats);
1014
1015 /*
1016 * Remove it from the d-linked list with the real services.
1017 */
1018 write_lock_bh(&__ip_vs_rs_lock);
1019 ip_vs_rs_unhash(dest);
1020 write_unlock_bh(&__ip_vs_rs_lock);
1021
1022 /*
1023 * Decrease the refcnt of the dest, and free the dest
1024 * if nobody refers to it (refcnt=0). Otherwise, throw
1025 * the destination into the trash.
1026 */
1027 if (atomic_dec_and_test(&dest->refcnt)) {
1028 ip_vs_dst_reset(dest);
1029 /* simply decrease svc->refcnt here, let the caller check
1030 and release the service if nobody refers to it.
1031 Only user context can release destination and service,
1032 and only one user context can update virtual service at a
1033 time, so the operation here is OK */
1034 atomic_dec(&dest->svc->refcnt);
1035 kfree(dest);
1036 } else {
4b5bdf5c
RN
1037 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1038 "dest->refcnt=%d\n",
e7ade46a 1039 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1040 atomic_read(&dest->refcnt));
1041 list_add(&dest->n_list, &ip_vs_dest_trash);
1042 atomic_inc(&dest->refcnt);
1043 }
1044}
1045
1046
1047/*
1048 * Unlink a destination from the given service
1049 */
1050static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1051 struct ip_vs_dest *dest,
1052 int svcupd)
1053{
1054 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1055
1056 /*
1057 * Remove it from the d-linked destination list.
1058 */
1059 list_del(&dest->n_list);
1060 svc->num_dests--;
82dfb6f3
SW
1061
1062 /*
1063 * Call the update_service function of its scheduler
1064 */
1065 if (svcupd && svc->scheduler->update_service)
1066 svc->scheduler->update_service(svc);
1da177e4
LT
1067}
1068
1069
1070/*
1071 * Delete a destination server in the given service
1072 */
1073static int
c860c6b1 1074ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1075{
1076 struct ip_vs_dest *dest;
014d730d 1077 __be16 dport = udest->port;
1da177e4
LT
1078
1079 EnterFunction(2);
1080
7937df15 1081 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1082
1da177e4
LT
1083 if (dest == NULL) {
1084 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1085 return -ENOENT;
1086 }
1087
1088 write_lock_bh(&__ip_vs_svc_lock);
1089
1090 /*
1091 * Wait until all other svc users go away.
1092 */
1093 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1094
1095 /*
1096 * Unlink dest from the service
1097 */
1098 __ip_vs_unlink_dest(svc, dest, 1);
1099
1100 write_unlock_bh(&__ip_vs_svc_lock);
1101
1102 /*
1103 * Delete the destination
1104 */
1105 __ip_vs_del_dest(dest);
1106
1107 LeaveFunction(2);
1108
1109 return 0;
1110}
1111
1112
1113/*
1114 * Add a service into the service hash table
1115 */
1116static int
c860c6b1
JV
1117ip_vs_add_service(struct ip_vs_service_user_kern *u,
1118 struct ip_vs_service **svc_p)
1da177e4
LT
1119{
1120 int ret = 0;
1121 struct ip_vs_scheduler *sched = NULL;
1122 struct ip_vs_service *svc = NULL;
1123
1124 /* increase the module use count */
1125 ip_vs_use_count_inc();
1126
1127 /* Lookup the scheduler by 'u->sched_name' */
1128 sched = ip_vs_scheduler_get(u->sched_name);
1129 if (sched == NULL) {
1130 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1131 u->sched_name);
1132 ret = -ENOENT;
1133 goto out_mod_dec;
1134 }
1135
0da974f4 1136 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1137 if (svc == NULL) {
1138 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1139 ret = -ENOMEM;
1140 goto out_err;
1141 }
1da177e4
LT
1142
1143 /* I'm the first user of the service */
1144 atomic_set(&svc->usecnt, 1);
1145 atomic_set(&svc->refcnt, 0);
1146
c860c6b1 1147 svc->af = u->af;
1da177e4 1148 svc->protocol = u->protocol;
c860c6b1 1149 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1150 svc->port = u->port;
1151 svc->fwmark = u->fwmark;
1152 svc->flags = u->flags;
1153 svc->timeout = u->timeout * HZ;
1154 svc->netmask = u->netmask;
1155
1156 INIT_LIST_HEAD(&svc->destinations);
1157 rwlock_init(&svc->sched_lock);
1158 spin_lock_init(&svc->stats.lock);
1159
1160 /* Bind the scheduler */
1161 ret = ip_vs_bind_scheduler(svc, sched);
1162 if (ret)
1163 goto out_err;
1164 sched = NULL;
1165
1166 /* Update the virtual service counters */
1167 if (svc->port == FTPPORT)
1168 atomic_inc(&ip_vs_ftpsvc_counter);
1169 else if (svc->port == 0)
1170 atomic_inc(&ip_vs_nullsvc_counter);
1171
1172 ip_vs_new_estimator(&svc->stats);
1173 ip_vs_num_services++;
1174
1175 /* Hash the service into the service table */
1176 write_lock_bh(&__ip_vs_svc_lock);
1177 ip_vs_svc_hash(svc);
1178 write_unlock_bh(&__ip_vs_svc_lock);
1179
1180 *svc_p = svc;
1181 return 0;
1182
1183 out_err:
1184 if (svc != NULL) {
1185 if (svc->scheduler)
1186 ip_vs_unbind_scheduler(svc);
1187 if (svc->inc) {
1188 local_bh_disable();
1189 ip_vs_app_inc_put(svc->inc);
1190 local_bh_enable();
1191 }
1192 kfree(svc);
1193 }
1194 ip_vs_scheduler_put(sched);
1195
1196 out_mod_dec:
1197 /* decrease the module use count */
1198 ip_vs_use_count_dec();
1199
1200 return ret;
1201}
1202
1203
1204/*
1205 * Edit a service and bind it with a new scheduler
1206 */
1207static int
c860c6b1 1208ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1209{
1210 struct ip_vs_scheduler *sched, *old_sched;
1211 int ret = 0;
1212
1213 /*
1214 * Lookup the scheduler, by 'u->sched_name'
1215 */
1216 sched = ip_vs_scheduler_get(u->sched_name);
1217 if (sched == NULL) {
1218 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1219 u->sched_name);
1220 return -ENOENT;
1221 }
1222 old_sched = sched;
1223
1224 write_lock_bh(&__ip_vs_svc_lock);
1225
1226 /*
1227 * Wait until all other svc users go away.
1228 */
1229 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1230
1231 /*
1232 * Set the flags and timeout value
1233 */
1234 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1235 svc->timeout = u->timeout * HZ;
1236 svc->netmask = u->netmask;
1237
1238 old_sched = svc->scheduler;
1239 if (sched != old_sched) {
1240 /*
1241 * Unbind the old scheduler
1242 */
1243 if ((ret = ip_vs_unbind_scheduler(svc))) {
1244 old_sched = sched;
1245 goto out;
1246 }
1247
1248 /*
1249 * Bind the new scheduler
1250 */
1251 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1252 /*
1253 * If ip_vs_bind_scheduler fails, restore the old
1254 * scheduler.
1255 * The main reason of failure is out of memory.
1256 *
1257 * The question is if the old scheduler can be
1258 * restored all the time. TODO: if it cannot be
1259 * restored some time, we must delete the service,
1260 * otherwise the system may crash.
1261 */
1262 ip_vs_bind_scheduler(svc, old_sched);
1263 old_sched = sched;
1264 goto out;
1265 }
1266 }
1267
1268 out:
1269 write_unlock_bh(&__ip_vs_svc_lock);
1270
1271 if (old_sched)
1272 ip_vs_scheduler_put(old_sched);
1273
1274 return ret;
1275}
1276
1277
1278/*
1279 * Delete a service from the service list
1280 * - The service must be unlinked, unlocked and not referenced!
1281 * - We are called under _bh lock
1282 */
1283static void __ip_vs_del_service(struct ip_vs_service *svc)
1284{
1285 struct ip_vs_dest *dest, *nxt;
1286 struct ip_vs_scheduler *old_sched;
1287
1288 ip_vs_num_services--;
1289 ip_vs_kill_estimator(&svc->stats);
1290
1291 /* Unbind scheduler */
1292 old_sched = svc->scheduler;
1293 ip_vs_unbind_scheduler(svc);
1294 if (old_sched)
1295 ip_vs_scheduler_put(old_sched);
1296
1297 /* Unbind app inc */
1298 if (svc->inc) {
1299 ip_vs_app_inc_put(svc->inc);
1300 svc->inc = NULL;
1301 }
1302
1303 /*
1304 * Unlink the whole destination list
1305 */
1306 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1307 __ip_vs_unlink_dest(svc, dest, 0);
1308 __ip_vs_del_dest(dest);
1309 }
1310
1311 /*
1312 * Update the virtual service counters
1313 */
1314 if (svc->port == FTPPORT)
1315 atomic_dec(&ip_vs_ftpsvc_counter);
1316 else if (svc->port == 0)
1317 atomic_dec(&ip_vs_nullsvc_counter);
1318
1319 /*
1320 * Free the service if nobody refers to it
1321 */
1322 if (atomic_read(&svc->refcnt) == 0)
1323 kfree(svc);
1324
1325 /* decrease the module use count */
1326 ip_vs_use_count_dec();
1327}
1328
1329/*
1330 * Delete a service from the service list
1331 */
1332static int ip_vs_del_service(struct ip_vs_service *svc)
1333{
1334 if (svc == NULL)
1335 return -EEXIST;
1336
1337 /*
1338 * Unhash it from the service table
1339 */
1340 write_lock_bh(&__ip_vs_svc_lock);
1341
1342 ip_vs_svc_unhash(svc);
1343
1344 /*
1345 * Wait until all the svc users go away.
1346 */
1347 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1348
1349 __ip_vs_del_service(svc);
1350
1351 write_unlock_bh(&__ip_vs_svc_lock);
1352
1353 return 0;
1354}
1355
1356
1357/*
1358 * Flush all the virtual services
1359 */
1360static int ip_vs_flush(void)
1361{
1362 int idx;
1363 struct ip_vs_service *svc, *nxt;
1364
1365 /*
1366 * Flush the service table hashed by <protocol,addr,port>
1367 */
1368 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1369 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1370 write_lock_bh(&__ip_vs_svc_lock);
1371 ip_vs_svc_unhash(svc);
1372 /*
1373 * Wait until all the svc users go away.
1374 */
1375 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1376 __ip_vs_del_service(svc);
1377 write_unlock_bh(&__ip_vs_svc_lock);
1378 }
1379 }
1380
1381 /*
1382 * Flush the service table hashed by fwmark
1383 */
1384 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1385 list_for_each_entry_safe(svc, nxt,
1386 &ip_vs_svc_fwm_table[idx], f_list) {
1387 write_lock_bh(&__ip_vs_svc_lock);
1388 ip_vs_svc_unhash(svc);
1389 /*
1390 * Wait until all the svc users go away.
1391 */
1392 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1393 __ip_vs_del_service(svc);
1394 write_unlock_bh(&__ip_vs_svc_lock);
1395 }
1396 }
1397
1398 return 0;
1399}
1400
1401
1402/*
1403 * Zero counters in a service or all services
1404 */
1405static int ip_vs_zero_service(struct ip_vs_service *svc)
1406{
1407 struct ip_vs_dest *dest;
1408
1409 write_lock_bh(&__ip_vs_svc_lock);
1410 list_for_each_entry(dest, &svc->destinations, n_list) {
1411 ip_vs_zero_stats(&dest->stats);
1412 }
1413 ip_vs_zero_stats(&svc->stats);
1414 write_unlock_bh(&__ip_vs_svc_lock);
1415 return 0;
1416}
1417
1418static int ip_vs_zero_all(void)
1419{
1420 int idx;
1421 struct ip_vs_service *svc;
1422
1423 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1424 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1425 ip_vs_zero_service(svc);
1426 }
1427 }
1428
1429 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1430 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1431 ip_vs_zero_service(svc);
1432 }
1433 }
1434
1435 ip_vs_zero_stats(&ip_vs_stats);
1436 return 0;
1437}
1438
1439
1440static int
1441proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1442 void __user *buffer, size_t *lenp, loff_t *ppos)
1443{
1444 int *valp = table->data;
1445 int val = *valp;
1446 int rc;
1447
1448 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1449 if (write && (*valp != val)) {
1450 if ((*valp < 0) || (*valp > 3)) {
1451 /* Restore the correct value */
1452 *valp = val;
1453 } else {
1da177e4 1454 update_defense_level();
1da177e4
LT
1455 }
1456 }
1457 return rc;
1458}
1459
1460
1461static int
1462proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1463 void __user *buffer, size_t *lenp, loff_t *ppos)
1464{
1465 int *valp = table->data;
1466 int val[2];
1467 int rc;
1468
1469 /* backup the value first */
1470 memcpy(val, valp, sizeof(val));
1471
1472 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1473 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1474 /* Restore the correct value */
1475 memcpy(valp, val, sizeof(val));
1476 }
1477 return rc;
1478}
1479
1480
1481/*
1482 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1483 */
1484
1485static struct ctl_table vs_vars[] = {
1486 {
1da177e4
LT
1487 .procname = "amemthresh",
1488 .data = &sysctl_ip_vs_amemthresh,
1489 .maxlen = sizeof(int),
1490 .mode = 0644,
1491 .proc_handler = &proc_dointvec,
1492 },
1493#ifdef CONFIG_IP_VS_DEBUG
1494 {
1da177e4
LT
1495 .procname = "debug_level",
1496 .data = &sysctl_ip_vs_debug_level,
1497 .maxlen = sizeof(int),
1498 .mode = 0644,
1499 .proc_handler = &proc_dointvec,
1500 },
1501#endif
1502 {
1da177e4
LT
1503 .procname = "am_droprate",
1504 .data = &sysctl_ip_vs_am_droprate,
1505 .maxlen = sizeof(int),
1506 .mode = 0644,
1507 .proc_handler = &proc_dointvec,
1508 },
1509 {
1da177e4
LT
1510 .procname = "drop_entry",
1511 .data = &sysctl_ip_vs_drop_entry,
1512 .maxlen = sizeof(int),
1513 .mode = 0644,
1514 .proc_handler = &proc_do_defense_mode,
1515 },
1516 {
1da177e4
LT
1517 .procname = "drop_packet",
1518 .data = &sysctl_ip_vs_drop_packet,
1519 .maxlen = sizeof(int),
1520 .mode = 0644,
1521 .proc_handler = &proc_do_defense_mode,
1522 },
1523 {
1da177e4
LT
1524 .procname = "secure_tcp",
1525 .data = &sysctl_ip_vs_secure_tcp,
1526 .maxlen = sizeof(int),
1527 .mode = 0644,
1528 .proc_handler = &proc_do_defense_mode,
1529 },
1530#if 0
1531 {
1da177e4
LT
1532 .procname = "timeout_established",
1533 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1534 .maxlen = sizeof(int),
1535 .mode = 0644,
1536 .proc_handler = &proc_dointvec_jiffies,
1537 },
1538 {
1da177e4
LT
1539 .procname = "timeout_synsent",
1540 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1541 .maxlen = sizeof(int),
1542 .mode = 0644,
1543 .proc_handler = &proc_dointvec_jiffies,
1544 },
1545 {
1da177e4
LT
1546 .procname = "timeout_synrecv",
1547 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1548 .maxlen = sizeof(int),
1549 .mode = 0644,
1550 .proc_handler = &proc_dointvec_jiffies,
1551 },
1552 {
1da177e4
LT
1553 .procname = "timeout_finwait",
1554 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1555 .maxlen = sizeof(int),
1556 .mode = 0644,
1557 .proc_handler = &proc_dointvec_jiffies,
1558 },
1559 {
1da177e4
LT
1560 .procname = "timeout_timewait",
1561 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1562 .maxlen = sizeof(int),
1563 .mode = 0644,
1564 .proc_handler = &proc_dointvec_jiffies,
1565 },
1566 {
1da177e4
LT
1567 .procname = "timeout_close",
1568 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1569 .maxlen = sizeof(int),
1570 .mode = 0644,
1571 .proc_handler = &proc_dointvec_jiffies,
1572 },
1573 {
1da177e4
LT
1574 .procname = "timeout_closewait",
1575 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1576 .maxlen = sizeof(int),
1577 .mode = 0644,
1578 .proc_handler = &proc_dointvec_jiffies,
1579 },
1580 {
1da177e4
LT
1581 .procname = "timeout_lastack",
1582 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
1585 .proc_handler = &proc_dointvec_jiffies,
1586 },
1587 {
1da177e4
LT
1588 .procname = "timeout_listen",
1589 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = &proc_dointvec_jiffies,
1593 },
1594 {
1da177e4
LT
1595 .procname = "timeout_synack",
1596 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1597 .maxlen = sizeof(int),
1598 .mode = 0644,
1599 .proc_handler = &proc_dointvec_jiffies,
1600 },
1601 {
1da177e4
LT
1602 .procname = "timeout_udp",
1603 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = &proc_dointvec_jiffies,
1607 },
1608 {
1da177e4
LT
1609 .procname = "timeout_icmp",
1610 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec_jiffies,
1614 },
1615#endif
1616 {
1da177e4
LT
1617 .procname = "cache_bypass",
1618 .data = &sysctl_ip_vs_cache_bypass,
1619 .maxlen = sizeof(int),
1620 .mode = 0644,
1621 .proc_handler = &proc_dointvec,
1622 },
1623 {
1da177e4
LT
1624 .procname = "expire_nodest_conn",
1625 .data = &sysctl_ip_vs_expire_nodest_conn,
1626 .maxlen = sizeof(int),
1627 .mode = 0644,
1628 .proc_handler = &proc_dointvec,
1629 },
1630 {
1da177e4
LT
1631 .procname = "expire_quiescent_template",
1632 .data = &sysctl_ip_vs_expire_quiescent_template,
1633 .maxlen = sizeof(int),
1634 .mode = 0644,
1635 .proc_handler = &proc_dointvec,
1636 },
1637 {
1da177e4
LT
1638 .procname = "sync_threshold",
1639 .data = &sysctl_ip_vs_sync_threshold,
1640 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1641 .mode = 0644,
1642 .proc_handler = &proc_do_sync_threshold,
1643 },
1644 {
1da177e4
LT
1645 .procname = "nat_icmp_send",
1646 .data = &sysctl_ip_vs_nat_icmp_send,
1647 .maxlen = sizeof(int),
1648 .mode = 0644,
1649 .proc_handler = &proc_dointvec,
1650 },
1651 { .ctl_name = 0 }
1652};
1653
5587da55 1654const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1655 { .procname = "net", .ctl_name = CTL_NET, },
1656 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1657 { .procname = "vs", },
1658 { }
1da177e4 1659};
90754f8e 1660EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1661
1662static struct ctl_table_header * sysctl_header;
1663
1664#ifdef CONFIG_PROC_FS
1665
1666struct ip_vs_iter {
1667 struct list_head *table;
1668 int bucket;
1669};
1670
1671/*
1672 * Write the contents of the VS rule table to a PROCfs file.
1673 * (It is kept just for backward compatibility)
1674 */
1675static inline const char *ip_vs_fwd_name(unsigned flags)
1676{
1677 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1678 case IP_VS_CONN_F_LOCALNODE:
1679 return "Local";
1680 case IP_VS_CONN_F_TUNNEL:
1681 return "Tunnel";
1682 case IP_VS_CONN_F_DROUTE:
1683 return "Route";
1684 default:
1685 return "Masq";
1686 }
1687}
1688
1689
1690/* Get the Nth entry in the two lists */
1691static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1692{
1693 struct ip_vs_iter *iter = seq->private;
1694 int idx;
1695 struct ip_vs_service *svc;
1696
1697 /* look in hash by protocol */
1698 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1699 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1700 if (pos-- == 0){
1701 iter->table = ip_vs_svc_table;
1702 iter->bucket = idx;
1703 return svc;
1704 }
1705 }
1706 }
1707
1708 /* keep looking in fwmark */
1709 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1710 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1711 if (pos-- == 0) {
1712 iter->table = ip_vs_svc_fwm_table;
1713 iter->bucket = idx;
1714 return svc;
1715 }
1716 }
1717 }
1718
1719 return NULL;
1720}
1721
1722static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1723{
1724
1725 read_lock_bh(&__ip_vs_svc_lock);
1726 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1727}
1728
1729
1730static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1731{
1732 struct list_head *e;
1733 struct ip_vs_iter *iter;
1734 struct ip_vs_service *svc;
1735
1736 ++*pos;
1737 if (v == SEQ_START_TOKEN)
1738 return ip_vs_info_array(seq,0);
1739
1740 svc = v;
1741 iter = seq->private;
1742
1743 if (iter->table == ip_vs_svc_table) {
1744 /* next service in table hashed by protocol */
1745 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1746 return list_entry(e, struct ip_vs_service, s_list);
1747
1748
1749 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1750 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1751 s_list) {
1752 return svc;
1753 }
1754 }
1755
1756 iter->table = ip_vs_svc_fwm_table;
1757 iter->bucket = -1;
1758 goto scan_fwmark;
1759 }
1760
1761 /* next service in hashed by fwmark */
1762 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1763 return list_entry(e, struct ip_vs_service, f_list);
1764
1765 scan_fwmark:
1766 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1767 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1768 f_list)
1769 return svc;
1770 }
1771
1772 return NULL;
1773}
1774
1775static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1776{
1777 read_unlock_bh(&__ip_vs_svc_lock);
1778}
1779
1780
1781static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1782{
1783 if (v == SEQ_START_TOKEN) {
1784 seq_printf(seq,
1785 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1786 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1787 seq_puts(seq,
1788 "Prot LocalAddress:Port Scheduler Flags\n");
1789 seq_puts(seq,
1790 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1791 } else {
1792 const struct ip_vs_service *svc = v;
1793 const struct ip_vs_iter *iter = seq->private;
1794 const struct ip_vs_dest *dest;
1795
667a5f18
VB
1796 if (iter->table == ip_vs_svc_table) {
1797#ifdef CONFIG_IP_VS_IPV6
1798 if (svc->af == AF_INET6)
1799 seq_printf(seq, "%s [" NIP6_FMT "]:%04X %s ",
1800 ip_vs_proto_name(svc->protocol),
1801 NIP6(svc->addr.in6),
1802 ntohs(svc->port),
1803 svc->scheduler->name);
1804 else
1805#endif
1806 seq_printf(seq, "%s %08X:%04X %s ",
1807 ip_vs_proto_name(svc->protocol),
1808 ntohl(svc->addr.ip),
1809 ntohs(svc->port),
1810 svc->scheduler->name);
1811 } else {
1da177e4
LT
1812 seq_printf(seq, "FWM %08X %s ",
1813 svc->fwmark, svc->scheduler->name);
667a5f18 1814 }
1da177e4
LT
1815
1816 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1817 seq_printf(seq, "persistent %d %08X\n",
1818 svc->timeout,
1819 ntohl(svc->netmask));
1820 else
1821 seq_putc(seq, '\n');
1822
1823 list_for_each_entry(dest, &svc->destinations, n_list) {
667a5f18
VB
1824#ifdef CONFIG_IP_VS_IPV6
1825 if (dest->af == AF_INET6)
1826 seq_printf(seq,
1827 " -> [" NIP6_FMT "]:%04X"
1828 " %-7s %-6d %-10d %-10d\n",
1829 NIP6(dest->addr.in6),
1830 ntohs(dest->port),
1831 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1832 atomic_read(&dest->weight),
1833 atomic_read(&dest->activeconns),
1834 atomic_read(&dest->inactconns));
1835 else
1836#endif
1837 seq_printf(seq,
1838 " -> %08X:%04X "
1839 "%-7s %-6d %-10d %-10d\n",
1840 ntohl(dest->addr.ip),
1841 ntohs(dest->port),
1842 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1843 atomic_read(&dest->weight),
1844 atomic_read(&dest->activeconns),
1845 atomic_read(&dest->inactconns));
1846
1da177e4
LT
1847 }
1848 }
1849 return 0;
1850}
1851
56b3d975 1852static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1853 .start = ip_vs_info_seq_start,
1854 .next = ip_vs_info_seq_next,
1855 .stop = ip_vs_info_seq_stop,
1856 .show = ip_vs_info_seq_show,
1857};
1858
1859static int ip_vs_info_open(struct inode *inode, struct file *file)
1860{
cf7732e4
PE
1861 return seq_open_private(file, &ip_vs_info_seq_ops,
1862 sizeof(struct ip_vs_iter));
1da177e4
LT
1863}
1864
9a32144e 1865static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1866 .owner = THIS_MODULE,
1867 .open = ip_vs_info_open,
1868 .read = seq_read,
1869 .llseek = seq_lseek,
1870 .release = seq_release_private,
1871};
1872
1873#endif
1874
519e49e8
SW
1875struct ip_vs_stats ip_vs_stats = {
1876 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1877};
1da177e4
LT
1878
1879#ifdef CONFIG_PROC_FS
1880static int ip_vs_stats_show(struct seq_file *seq, void *v)
1881{
1882
1883/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1884 seq_puts(seq,
1885 " Total Incoming Outgoing Incoming Outgoing\n");
1886 seq_printf(seq,
1887 " Conns Packets Packets Bytes Bytes\n");
1888
1889 spin_lock_bh(&ip_vs_stats.lock);
1890 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1891 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1892 (unsigned long long) ip_vs_stats.inbytes,
1893 (unsigned long long) ip_vs_stats.outbytes);
1894
1895/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1896 seq_puts(seq,
1897 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1898 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1899 ip_vs_stats.cps,
1900 ip_vs_stats.inpps,
1901 ip_vs_stats.outpps,
1902 ip_vs_stats.inbps,
1903 ip_vs_stats.outbps);
1904 spin_unlock_bh(&ip_vs_stats.lock);
1905
1906 return 0;
1907}
1908
1909static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1910{
1911 return single_open(file, ip_vs_stats_show, NULL);
1912}
1913
9a32144e 1914static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1915 .owner = THIS_MODULE,
1916 .open = ip_vs_stats_seq_open,
1917 .read = seq_read,
1918 .llseek = seq_lseek,
1919 .release = single_release,
1920};
1921
1922#endif
1923
1924/*
1925 * Set timeout values for tcp tcpfin udp in the timeout_table.
1926 */
1927static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1928{
1929 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1930 u->tcp_timeout,
1931 u->tcp_fin_timeout,
1932 u->udp_timeout);
1933
1934#ifdef CONFIG_IP_VS_PROTO_TCP
1935 if (u->tcp_timeout) {
1936 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1937 = u->tcp_timeout * HZ;
1938 }
1939
1940 if (u->tcp_fin_timeout) {
1941 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1942 = u->tcp_fin_timeout * HZ;
1943 }
1944#endif
1945
1946#ifdef CONFIG_IP_VS_PROTO_UDP
1947 if (u->udp_timeout) {
1948 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1949 = u->udp_timeout * HZ;
1950 }
1951#endif
1952 return 0;
1953}
1954
1955
1956#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1957#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1958#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1959 sizeof(struct ip_vs_dest_user))
1960#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1961#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1962#define MAX_ARG_LEN SVCDEST_ARG_LEN
1963
9b5b5cff 1964static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1965 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1966 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1967 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1968 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1969 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1970 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1971 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1972 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1973 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1974 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1975 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1976};
1977
c860c6b1
JV
1978static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
1979 struct ip_vs_service_user *usvc_compat)
1980{
1981 usvc->af = AF_INET;
1982 usvc->protocol = usvc_compat->protocol;
1983 usvc->addr.ip = usvc_compat->addr;
1984 usvc->port = usvc_compat->port;
1985 usvc->fwmark = usvc_compat->fwmark;
1986
1987 /* Deep copy of sched_name is not needed here */
1988 usvc->sched_name = usvc_compat->sched_name;
1989
1990 usvc->flags = usvc_compat->flags;
1991 usvc->timeout = usvc_compat->timeout;
1992 usvc->netmask = usvc_compat->netmask;
1993}
1994
1995static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
1996 struct ip_vs_dest_user *udest_compat)
1997{
1998 udest->addr.ip = udest_compat->addr;
1999 udest->port = udest_compat->port;
2000 udest->conn_flags = udest_compat->conn_flags;
2001 udest->weight = udest_compat->weight;
2002 udest->u_threshold = udest_compat->u_threshold;
2003 udest->l_threshold = udest_compat->l_threshold;
2004}
2005
1da177e4
LT
2006static int
2007do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2008{
2009 int ret;
2010 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
2011 struct ip_vs_service_user *usvc_compat;
2012 struct ip_vs_service_user_kern usvc;
1da177e4 2013 struct ip_vs_service *svc;
c860c6b1
JV
2014 struct ip_vs_dest_user *udest_compat;
2015 struct ip_vs_dest_user_kern udest;
1da177e4
LT
2016
2017 if (!capable(CAP_NET_ADMIN))
2018 return -EPERM;
2019
2020 if (len != set_arglen[SET_CMDID(cmd)]) {
2021 IP_VS_ERR("set_ctl: len %u != %u\n",
2022 len, set_arglen[SET_CMDID(cmd)]);
2023 return -EINVAL;
2024 }
2025
2026 if (copy_from_user(arg, user, len) != 0)
2027 return -EFAULT;
2028
2029 /* increase the module use count */
2030 ip_vs_use_count_inc();
2031
14cc3e2b 2032 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2033 ret = -ERESTARTSYS;
2034 goto out_dec;
2035 }
2036
2037 if (cmd == IP_VS_SO_SET_FLUSH) {
2038 /* Flush the virtual service */
2039 ret = ip_vs_flush();
2040 goto out_unlock;
2041 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2042 /* Set timeout values for (tcp tcpfin udp) */
2043 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2044 goto out_unlock;
2045 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2046 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2047 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2048 goto out_unlock;
2049 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2050 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2051 ret = stop_sync_thread(dm->state);
2052 goto out_unlock;
2053 }
2054
c860c6b1
JV
2055 usvc_compat = (struct ip_vs_service_user *)arg;
2056 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2057
2058 /* We only use the new structs internally, so copy userspace compat
2059 * structs to extended internal versions */
2060 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2061 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2062
2063 if (cmd == IP_VS_SO_SET_ZERO) {
2064 /* if no service address is set, zero counters in all */
c860c6b1 2065 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2066 ret = ip_vs_zero_all();
2067 goto out_unlock;
2068 }
2069 }
2070
2071 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2072 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2073 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2074 usvc.protocol, NIPQUAD(usvc.addr.ip),
2075 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2076 ret = -EFAULT;
2077 goto out_unlock;
2078 }
2079
2080 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2081 if (usvc.fwmark == 0)
b18610de
JV
2082 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2083 &usvc.addr, usvc.port);
1da177e4 2084 else
b18610de 2085 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2086
2087 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2088 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2089 ret = -ESRCH;
2090 goto out_unlock;
2091 }
2092
2093 switch (cmd) {
2094 case IP_VS_SO_SET_ADD:
2095 if (svc != NULL)
2096 ret = -EEXIST;
2097 else
c860c6b1 2098 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2099 break;
2100 case IP_VS_SO_SET_EDIT:
c860c6b1 2101 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2102 break;
2103 case IP_VS_SO_SET_DEL:
2104 ret = ip_vs_del_service(svc);
2105 if (!ret)
2106 goto out_unlock;
2107 break;
2108 case IP_VS_SO_SET_ZERO:
2109 ret = ip_vs_zero_service(svc);
2110 break;
2111 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2112 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2113 break;
2114 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2115 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2116 break;
2117 case IP_VS_SO_SET_DELDEST:
c860c6b1 2118 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2119 break;
2120 default:
2121 ret = -EINVAL;
2122 }
2123
2124 if (svc)
2125 ip_vs_service_put(svc);
2126
2127 out_unlock:
14cc3e2b 2128 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2129 out_dec:
2130 /* decrease the module use count */
2131 ip_vs_use_count_dec();
2132
2133 return ret;
2134}
2135
2136
2137static void
2138ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2139{
2140 spin_lock_bh(&src->lock);
2141 memcpy(dst, src, (char*)&src->lock - (char*)src);
2142 spin_unlock_bh(&src->lock);
2143}
2144
2145static void
2146ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2147{
2148 dst->protocol = src->protocol;
e7ade46a 2149 dst->addr = src->addr.ip;
1da177e4
LT
2150 dst->port = src->port;
2151 dst->fwmark = src->fwmark;
4da62fc7 2152 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2153 dst->flags = src->flags;
2154 dst->timeout = src->timeout / HZ;
2155 dst->netmask = src->netmask;
2156 dst->num_dests = src->num_dests;
2157 ip_vs_copy_stats(&dst->stats, &src->stats);
2158}
2159
2160static inline int
2161__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2162 struct ip_vs_get_services __user *uptr)
2163{
2164 int idx, count=0;
2165 struct ip_vs_service *svc;
2166 struct ip_vs_service_entry entry;
2167 int ret = 0;
2168
2169 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2170 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2171 if (count >= get->num_services)
2172 goto out;
4da62fc7 2173 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2174 ip_vs_copy_service(&entry, svc);
2175 if (copy_to_user(&uptr->entrytable[count],
2176 &entry, sizeof(entry))) {
2177 ret = -EFAULT;
2178 goto out;
2179 }
2180 count++;
2181 }
2182 }
2183
2184 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2185 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2186 if (count >= get->num_services)
2187 goto out;
4da62fc7 2188 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2189 ip_vs_copy_service(&entry, svc);
2190 if (copy_to_user(&uptr->entrytable[count],
2191 &entry, sizeof(entry))) {
2192 ret = -EFAULT;
2193 goto out;
2194 }
2195 count++;
2196 }
2197 }
2198 out:
2199 return ret;
2200}
2201
2202static inline int
2203__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2204 struct ip_vs_get_dests __user *uptr)
2205{
2206 struct ip_vs_service *svc;
b18610de 2207 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2208 int ret = 0;
2209
2210 if (get->fwmark)
b18610de 2211 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2212 else
b18610de
JV
2213 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2214 get->port);
2215
1da177e4
LT
2216 if (svc) {
2217 int count = 0;
2218 struct ip_vs_dest *dest;
2219 struct ip_vs_dest_entry entry;
2220
2221 list_for_each_entry(dest, &svc->destinations, n_list) {
2222 if (count >= get->num_dests)
2223 break;
2224
e7ade46a 2225 entry.addr = dest->addr.ip;
1da177e4
LT
2226 entry.port = dest->port;
2227 entry.conn_flags = atomic_read(&dest->conn_flags);
2228 entry.weight = atomic_read(&dest->weight);
2229 entry.u_threshold = dest->u_threshold;
2230 entry.l_threshold = dest->l_threshold;
2231 entry.activeconns = atomic_read(&dest->activeconns);
2232 entry.inactconns = atomic_read(&dest->inactconns);
2233 entry.persistconns = atomic_read(&dest->persistconns);
2234 ip_vs_copy_stats(&entry.stats, &dest->stats);
2235 if (copy_to_user(&uptr->entrytable[count],
2236 &entry, sizeof(entry))) {
2237 ret = -EFAULT;
2238 break;
2239 }
2240 count++;
2241 }
2242 ip_vs_service_put(svc);
2243 } else
2244 ret = -ESRCH;
2245 return ret;
2246}
2247
2248static inline void
2249__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2250{
2251#ifdef CONFIG_IP_VS_PROTO_TCP
2252 u->tcp_timeout =
2253 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2254 u->tcp_fin_timeout =
2255 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2256#endif
2257#ifdef CONFIG_IP_VS_PROTO_UDP
2258 u->udp_timeout =
2259 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2260#endif
2261}
2262
2263
2264#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2265#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2266#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2267#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2268#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2269#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2270#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2271
9b5b5cff 2272static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2273 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2274 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2275 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2276 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2277 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2278 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2279 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2280};
2281
2282static int
2283do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2284{
2285 unsigned char arg[128];
2286 int ret = 0;
2287
2288 if (!capable(CAP_NET_ADMIN))
2289 return -EPERM;
2290
2291 if (*len < get_arglen[GET_CMDID(cmd)]) {
2292 IP_VS_ERR("get_ctl: len %u < %u\n",
2293 *len, get_arglen[GET_CMDID(cmd)]);
2294 return -EINVAL;
2295 }
2296
2297 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2298 return -EFAULT;
2299
14cc3e2b 2300 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2301 return -ERESTARTSYS;
2302
2303 switch (cmd) {
2304 case IP_VS_SO_GET_VERSION:
2305 {
2306 char buf[64];
2307
2308 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2309 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2310 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2311 ret = -EFAULT;
2312 goto out;
2313 }
2314 *len = strlen(buf)+1;
2315 }
2316 break;
2317
2318 case IP_VS_SO_GET_INFO:
2319 {
2320 struct ip_vs_getinfo info;
2321 info.version = IP_VS_VERSION_CODE;
2322 info.size = IP_VS_CONN_TAB_SIZE;
2323 info.num_services = ip_vs_num_services;
2324 if (copy_to_user(user, &info, sizeof(info)) != 0)
2325 ret = -EFAULT;
2326 }
2327 break;
2328
2329 case IP_VS_SO_GET_SERVICES:
2330 {
2331 struct ip_vs_get_services *get;
2332 int size;
2333
2334 get = (struct ip_vs_get_services *)arg;
2335 size = sizeof(*get) +
2336 sizeof(struct ip_vs_service_entry) * get->num_services;
2337 if (*len != size) {
2338 IP_VS_ERR("length: %u != %u\n", *len, size);
2339 ret = -EINVAL;
2340 goto out;
2341 }
2342 ret = __ip_vs_get_service_entries(get, user);
2343 }
2344 break;
2345
2346 case IP_VS_SO_GET_SERVICE:
2347 {
2348 struct ip_vs_service_entry *entry;
2349 struct ip_vs_service *svc;
b18610de 2350 union nf_inet_addr addr;
1da177e4
LT
2351
2352 entry = (struct ip_vs_service_entry *)arg;
b18610de 2353 addr.ip = entry->addr;
1da177e4 2354 if (entry->fwmark)
b18610de 2355 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2356 else
b18610de
JV
2357 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2358 &addr, entry->port);
1da177e4
LT
2359 if (svc) {
2360 ip_vs_copy_service(entry, svc);
2361 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2362 ret = -EFAULT;
2363 ip_vs_service_put(svc);
2364 } else
2365 ret = -ESRCH;
2366 }
2367 break;
2368
2369 case IP_VS_SO_GET_DESTS:
2370 {
2371 struct ip_vs_get_dests *get;
2372 int size;
2373
2374 get = (struct ip_vs_get_dests *)arg;
2375 size = sizeof(*get) +
2376 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2377 if (*len != size) {
2378 IP_VS_ERR("length: %u != %u\n", *len, size);
2379 ret = -EINVAL;
2380 goto out;
2381 }
2382 ret = __ip_vs_get_dest_entries(get, user);
2383 }
2384 break;
2385
2386 case IP_VS_SO_GET_TIMEOUT:
2387 {
2388 struct ip_vs_timeout_user t;
2389
2390 __ip_vs_get_timeouts(&t);
2391 if (copy_to_user(user, &t, sizeof(t)) != 0)
2392 ret = -EFAULT;
2393 }
2394 break;
2395
2396 case IP_VS_SO_GET_DAEMON:
2397 {
2398 struct ip_vs_daemon_user d[2];
2399
2400 memset(&d, 0, sizeof(d));
2401 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2402 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2403 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2404 d[0].syncid = ip_vs_master_syncid;
2405 }
2406 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2407 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2408 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2409 d[1].syncid = ip_vs_backup_syncid;
2410 }
2411 if (copy_to_user(user, &d, sizeof(d)) != 0)
2412 ret = -EFAULT;
2413 }
2414 break;
2415
2416 default:
2417 ret = -EINVAL;
2418 }
2419
2420 out:
14cc3e2b 2421 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2422 return ret;
2423}
2424
2425
2426static struct nf_sockopt_ops ip_vs_sockopts = {
2427 .pf = PF_INET,
2428 .set_optmin = IP_VS_BASE_CTL,
2429 .set_optmax = IP_VS_SO_SET_MAX+1,
2430 .set = do_ip_vs_set_ctl,
2431 .get_optmin = IP_VS_BASE_CTL,
2432 .get_optmax = IP_VS_SO_GET_MAX+1,
2433 .get = do_ip_vs_get_ctl,
16fcec35 2434 .owner = THIS_MODULE,
1da177e4
LT
2435};
2436
9a812198
JV
2437/*
2438 * Generic Netlink interface
2439 */
2440
2441/* IPVS genetlink family */
2442static struct genl_family ip_vs_genl_family = {
2443 .id = GENL_ID_GENERATE,
2444 .hdrsize = 0,
2445 .name = IPVS_GENL_NAME,
2446 .version = IPVS_GENL_VERSION,
2447 .maxattr = IPVS_CMD_MAX,
2448};
2449
2450/* Policy used for first-level command attributes */
2451static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2452 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2453 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2454 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2455 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2456 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2457 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2458};
2459
2460/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2461static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2462 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2463 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2464 .len = IP_VS_IFNAME_MAXLEN },
2465 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2466};
2467
2468/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2469static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2470 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2471 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2472 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2473 .len = sizeof(union nf_inet_addr) },
2474 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2475 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2476 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2477 .len = IP_VS_SCHEDNAME_MAXLEN },
2478 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2479 .len = sizeof(struct ip_vs_flags) },
2480 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2481 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2482 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2483};
2484
2485/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2486static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2487 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2488 .len = sizeof(union nf_inet_addr) },
2489 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2490 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2491 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2492 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2493 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2494 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2495 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2496 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2497 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2498};
2499
2500static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2501 struct ip_vs_stats *stats)
2502{
2503 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2504 if (!nl_stats)
2505 return -EMSGSIZE;
2506
2507 spin_lock_bh(&stats->lock);
2508
2509 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2510 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2511 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2512 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2513 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2514 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2515 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2516 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2517 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2518 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2519
2520 spin_unlock_bh(&stats->lock);
2521
2522 nla_nest_end(skb, nl_stats);
2523
2524 return 0;
2525
2526nla_put_failure:
2527 spin_unlock_bh(&stats->lock);
2528 nla_nest_cancel(skb, nl_stats);
2529 return -EMSGSIZE;
2530}
2531
2532static int ip_vs_genl_fill_service(struct sk_buff *skb,
2533 struct ip_vs_service *svc)
2534{
2535 struct nlattr *nl_service;
2536 struct ip_vs_flags flags = { .flags = svc->flags,
2537 .mask = ~0 };
2538
2539 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2540 if (!nl_service)
2541 return -EMSGSIZE;
2542
2543 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2544
2545 if (svc->fwmark) {
2546 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2547 } else {
2548 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2549 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2550 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2551 }
2552
2553 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2554 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2555 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2556 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2557
2558 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2559 goto nla_put_failure;
2560
2561 nla_nest_end(skb, nl_service);
2562
2563 return 0;
2564
2565nla_put_failure:
2566 nla_nest_cancel(skb, nl_service);
2567 return -EMSGSIZE;
2568}
2569
2570static int ip_vs_genl_dump_service(struct sk_buff *skb,
2571 struct ip_vs_service *svc,
2572 struct netlink_callback *cb)
2573{
2574 void *hdr;
2575
2576 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2577 &ip_vs_genl_family, NLM_F_MULTI,
2578 IPVS_CMD_NEW_SERVICE);
2579 if (!hdr)
2580 return -EMSGSIZE;
2581
2582 if (ip_vs_genl_fill_service(skb, svc) < 0)
2583 goto nla_put_failure;
2584
2585 return genlmsg_end(skb, hdr);
2586
2587nla_put_failure:
2588 genlmsg_cancel(skb, hdr);
2589 return -EMSGSIZE;
2590}
2591
2592static int ip_vs_genl_dump_services(struct sk_buff *skb,
2593 struct netlink_callback *cb)
2594{
2595 int idx = 0, i;
2596 int start = cb->args[0];
2597 struct ip_vs_service *svc;
2598
2599 mutex_lock(&__ip_vs_mutex);
2600 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2601 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2602 if (++idx <= start)
2603 continue;
2604 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2605 idx--;
2606 goto nla_put_failure;
2607 }
2608 }
2609 }
2610
2611 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2612 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2613 if (++idx <= start)
2614 continue;
2615 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2616 idx--;
2617 goto nla_put_failure;
2618 }
2619 }
2620 }
2621
2622nla_put_failure:
2623 mutex_unlock(&__ip_vs_mutex);
2624 cb->args[0] = idx;
2625
2626 return skb->len;
2627}
2628
c860c6b1 2629static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2630 struct nlattr *nla, int full_entry)
2631{
2632 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2633 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2634
2635 /* Parse mandatory identifying service fields first */
2636 if (nla == NULL ||
2637 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2638 return -EINVAL;
2639
2640 nla_af = attrs[IPVS_SVC_ATTR_AF];
2641 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2642 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2643 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2644 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2645
2646 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2647 return -EINVAL;
2648
c860c6b1 2649 usvc->af = nla_get_u16(nla_af);
9a812198
JV
2650 /* For now, only support IPv4 */
2651 if (nla_get_u16(nla_af) != AF_INET)
2652 return -EAFNOSUPPORT;
2653
2654 if (nla_fwmark) {
2655 usvc->protocol = IPPROTO_TCP;
2656 usvc->fwmark = nla_get_u32(nla_fwmark);
2657 } else {
2658 usvc->protocol = nla_get_u16(nla_protocol);
2659 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2660 usvc->port = nla_get_u16(nla_port);
2661 usvc->fwmark = 0;
2662 }
2663
2664 /* If a full entry was requested, check for the additional fields */
2665 if (full_entry) {
2666 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2667 *nla_netmask;
2668 struct ip_vs_flags flags;
2669 struct ip_vs_service *svc;
2670
2671 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2672 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2673 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2674 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2675
2676 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2677 return -EINVAL;
2678
2679 nla_memcpy(&flags, nla_flags, sizeof(flags));
2680
2681 /* prefill flags from service if it already exists */
2682 if (usvc->fwmark)
b18610de 2683 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2684 else
b18610de
JV
2685 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2686 &usvc->addr, usvc->port);
9a812198
JV
2687 if (svc) {
2688 usvc->flags = svc->flags;
2689 ip_vs_service_put(svc);
2690 } else
2691 usvc->flags = 0;
2692
2693 /* set new flags from userland */
2694 usvc->flags = (usvc->flags & ~flags.mask) |
2695 (flags.flags & flags.mask);
c860c6b1 2696 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2697 usvc->timeout = nla_get_u32(nla_timeout);
2698 usvc->netmask = nla_get_u32(nla_netmask);
2699 }
2700
2701 return 0;
2702}
2703
2704static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2705{
c860c6b1 2706 struct ip_vs_service_user_kern usvc;
9a812198
JV
2707 int ret;
2708
2709 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2710 if (ret)
2711 return ERR_PTR(ret);
2712
2713 if (usvc.fwmark)
b18610de 2714 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2715 else
b18610de
JV
2716 return __ip_vs_service_get(usvc.af, usvc.protocol,
2717 &usvc.addr, usvc.port);
9a812198
JV
2718}
2719
2720static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2721{
2722 struct nlattr *nl_dest;
2723
2724 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2725 if (!nl_dest)
2726 return -EMSGSIZE;
2727
2728 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2729 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2730
2731 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2732 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2733 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2734 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2735 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2736 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2737 atomic_read(&dest->activeconns));
2738 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2739 atomic_read(&dest->inactconns));
2740 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2741 atomic_read(&dest->persistconns));
2742
2743 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2744 goto nla_put_failure;
2745
2746 nla_nest_end(skb, nl_dest);
2747
2748 return 0;
2749
2750nla_put_failure:
2751 nla_nest_cancel(skb, nl_dest);
2752 return -EMSGSIZE;
2753}
2754
2755static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2756 struct netlink_callback *cb)
2757{
2758 void *hdr;
2759
2760 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2761 &ip_vs_genl_family, NLM_F_MULTI,
2762 IPVS_CMD_NEW_DEST);
2763 if (!hdr)
2764 return -EMSGSIZE;
2765
2766 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2767 goto nla_put_failure;
2768
2769 return genlmsg_end(skb, hdr);
2770
2771nla_put_failure:
2772 genlmsg_cancel(skb, hdr);
2773 return -EMSGSIZE;
2774}
2775
2776static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2777 struct netlink_callback *cb)
2778{
2779 int idx = 0;
2780 int start = cb->args[0];
2781 struct ip_vs_service *svc;
2782 struct ip_vs_dest *dest;
2783 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2784
2785 mutex_lock(&__ip_vs_mutex);
2786
2787 /* Try to find the service for which to dump destinations */
2788 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2789 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2790 goto out_err;
2791
2792 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2793 if (IS_ERR(svc) || svc == NULL)
2794 goto out_err;
2795
2796 /* Dump the destinations */
2797 list_for_each_entry(dest, &svc->destinations, n_list) {
2798 if (++idx <= start)
2799 continue;
2800 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2801 idx--;
2802 goto nla_put_failure;
2803 }
2804 }
2805
2806nla_put_failure:
2807 cb->args[0] = idx;
2808 ip_vs_service_put(svc);
2809
2810out_err:
2811 mutex_unlock(&__ip_vs_mutex);
2812
2813 return skb->len;
2814}
2815
c860c6b1 2816static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2817 struct nlattr *nla, int full_entry)
2818{
2819 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2820 struct nlattr *nla_addr, *nla_port;
2821
2822 /* Parse mandatory identifying destination fields first */
2823 if (nla == NULL ||
2824 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2825 return -EINVAL;
2826
2827 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2828 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2829
2830 if (!(nla_addr && nla_port))
2831 return -EINVAL;
2832
2833 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2834 udest->port = nla_get_u16(nla_port);
2835
2836 /* If a full entry was requested, check for the additional fields */
2837 if (full_entry) {
2838 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2839 *nla_l_thresh;
2840
2841 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2842 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2843 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2844 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2845
2846 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2847 return -EINVAL;
2848
2849 udest->conn_flags = nla_get_u32(nla_fwd)
2850 & IP_VS_CONN_F_FWD_MASK;
2851 udest->weight = nla_get_u32(nla_weight);
2852 udest->u_threshold = nla_get_u32(nla_u_thresh);
2853 udest->l_threshold = nla_get_u32(nla_l_thresh);
2854 }
2855
2856 return 0;
2857}
2858
2859static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2860 const char *mcast_ifn, __be32 syncid)
2861{
2862 struct nlattr *nl_daemon;
2863
2864 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2865 if (!nl_daemon)
2866 return -EMSGSIZE;
2867
2868 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2869 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2870 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2871
2872 nla_nest_end(skb, nl_daemon);
2873
2874 return 0;
2875
2876nla_put_failure:
2877 nla_nest_cancel(skb, nl_daemon);
2878 return -EMSGSIZE;
2879}
2880
2881static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2882 const char *mcast_ifn, __be32 syncid,
2883 struct netlink_callback *cb)
2884{
2885 void *hdr;
2886 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2887 &ip_vs_genl_family, NLM_F_MULTI,
2888 IPVS_CMD_NEW_DAEMON);
2889 if (!hdr)
2890 return -EMSGSIZE;
2891
2892 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2893 goto nla_put_failure;
2894
2895 return genlmsg_end(skb, hdr);
2896
2897nla_put_failure:
2898 genlmsg_cancel(skb, hdr);
2899 return -EMSGSIZE;
2900}
2901
2902static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2903 struct netlink_callback *cb)
2904{
2905 mutex_lock(&__ip_vs_mutex);
2906 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2907 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2908 ip_vs_master_mcast_ifn,
2909 ip_vs_master_syncid, cb) < 0)
2910 goto nla_put_failure;
2911
2912 cb->args[0] = 1;
2913 }
2914
2915 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2916 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2917 ip_vs_backup_mcast_ifn,
2918 ip_vs_backup_syncid, cb) < 0)
2919 goto nla_put_failure;
2920
2921 cb->args[1] = 1;
2922 }
2923
2924nla_put_failure:
2925 mutex_unlock(&__ip_vs_mutex);
2926
2927 return skb->len;
2928}
2929
2930static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2931{
2932 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2933 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2934 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2935 return -EINVAL;
2936
2937 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2938 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2939 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2940}
2941
2942static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2943{
2944 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2945 return -EINVAL;
2946
2947 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2948}
2949
2950static int ip_vs_genl_set_config(struct nlattr **attrs)
2951{
2952 struct ip_vs_timeout_user t;
2953
2954 __ip_vs_get_timeouts(&t);
2955
2956 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2957 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2958
2959 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2960 t.tcp_fin_timeout =
2961 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2962
2963 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2964 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2965
2966 return ip_vs_set_timeout(&t);
2967}
2968
2969static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2970{
2971 struct ip_vs_service *svc = NULL;
c860c6b1
JV
2972 struct ip_vs_service_user_kern usvc;
2973 struct ip_vs_dest_user_kern udest;
9a812198
JV
2974 int ret = 0, cmd;
2975 int need_full_svc = 0, need_full_dest = 0;
2976
2977 cmd = info->genlhdr->cmd;
2978
2979 mutex_lock(&__ip_vs_mutex);
2980
2981 if (cmd == IPVS_CMD_FLUSH) {
2982 ret = ip_vs_flush();
2983 goto out;
2984 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2985 ret = ip_vs_genl_set_config(info->attrs);
2986 goto out;
2987 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2988 cmd == IPVS_CMD_DEL_DAEMON) {
2989
2990 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2991
2992 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2993 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2994 info->attrs[IPVS_CMD_ATTR_DAEMON],
2995 ip_vs_daemon_policy)) {
2996 ret = -EINVAL;
2997 goto out;
2998 }
2999
3000 if (cmd == IPVS_CMD_NEW_DAEMON)
3001 ret = ip_vs_genl_new_daemon(daemon_attrs);
3002 else
3003 ret = ip_vs_genl_del_daemon(daemon_attrs);
3004 goto out;
3005 } else if (cmd == IPVS_CMD_ZERO &&
3006 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3007 ret = ip_vs_zero_all();
3008 goto out;
3009 }
3010
3011 /* All following commands require a service argument, so check if we
3012 * received a valid one. We need a full service specification when
3013 * adding / editing a service. Only identifying members otherwise. */
3014 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3015 need_full_svc = 1;
3016
3017 ret = ip_vs_genl_parse_service(&usvc,
3018 info->attrs[IPVS_CMD_ATTR_SERVICE],
3019 need_full_svc);
3020 if (ret)
3021 goto out;
3022
3023 /* Lookup the exact service by <protocol, addr, port> or fwmark */
3024 if (usvc.fwmark == 0)
b18610de
JV
3025 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3026 &usvc.addr, usvc.port);
9a812198 3027 else
b18610de 3028 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3029
3030 /* Unless we're adding a new service, the service must already exist */
3031 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3032 ret = -ESRCH;
3033 goto out;
3034 }
3035
3036 /* Destination commands require a valid destination argument. For
3037 * adding / editing a destination, we need a full destination
3038 * specification. */
3039 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3040 cmd == IPVS_CMD_DEL_DEST) {
3041 if (cmd != IPVS_CMD_DEL_DEST)
3042 need_full_dest = 1;
3043
3044 ret = ip_vs_genl_parse_dest(&udest,
3045 info->attrs[IPVS_CMD_ATTR_DEST],
3046 need_full_dest);
3047 if (ret)
3048 goto out;
3049 }
3050
3051 switch (cmd) {
3052 case IPVS_CMD_NEW_SERVICE:
3053 if (svc == NULL)
3054 ret = ip_vs_add_service(&usvc, &svc);
3055 else
3056 ret = -EEXIST;
3057 break;
3058 case IPVS_CMD_SET_SERVICE:
3059 ret = ip_vs_edit_service(svc, &usvc);
3060 break;
3061 case IPVS_CMD_DEL_SERVICE:
3062 ret = ip_vs_del_service(svc);
3063 break;
3064 case IPVS_CMD_NEW_DEST:
3065 ret = ip_vs_add_dest(svc, &udest);
3066 break;
3067 case IPVS_CMD_SET_DEST:
3068 ret = ip_vs_edit_dest(svc, &udest);
3069 break;
3070 case IPVS_CMD_DEL_DEST:
3071 ret = ip_vs_del_dest(svc, &udest);
3072 break;
3073 case IPVS_CMD_ZERO:
3074 ret = ip_vs_zero_service(svc);
3075 break;
3076 default:
3077 ret = -EINVAL;
3078 }
3079
3080out:
3081 if (svc)
3082 ip_vs_service_put(svc);
3083 mutex_unlock(&__ip_vs_mutex);
3084
3085 return ret;
3086}
3087
3088static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3089{
3090 struct sk_buff *msg;
3091 void *reply;
3092 int ret, cmd, reply_cmd;
3093
3094 cmd = info->genlhdr->cmd;
3095
3096 if (cmd == IPVS_CMD_GET_SERVICE)
3097 reply_cmd = IPVS_CMD_NEW_SERVICE;
3098 else if (cmd == IPVS_CMD_GET_INFO)
3099 reply_cmd = IPVS_CMD_SET_INFO;
3100 else if (cmd == IPVS_CMD_GET_CONFIG)
3101 reply_cmd = IPVS_CMD_SET_CONFIG;
3102 else {
3103 IP_VS_ERR("unknown Generic Netlink command\n");
3104 return -EINVAL;
3105 }
3106
3107 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3108 if (!msg)
3109 return -ENOMEM;
3110
3111 mutex_lock(&__ip_vs_mutex);
3112
3113 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3114 if (reply == NULL)
3115 goto nla_put_failure;
3116
3117 switch (cmd) {
3118 case IPVS_CMD_GET_SERVICE:
3119 {
3120 struct ip_vs_service *svc;
3121
3122 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3123 if (IS_ERR(svc)) {
3124 ret = PTR_ERR(svc);
3125 goto out_err;
3126 } else if (svc) {
3127 ret = ip_vs_genl_fill_service(msg, svc);
3128 ip_vs_service_put(svc);
3129 if (ret)
3130 goto nla_put_failure;
3131 } else {
3132 ret = -ESRCH;
3133 goto out_err;
3134 }
3135
3136 break;
3137 }
3138
3139 case IPVS_CMD_GET_CONFIG:
3140 {
3141 struct ip_vs_timeout_user t;
3142
3143 __ip_vs_get_timeouts(&t);
3144#ifdef CONFIG_IP_VS_PROTO_TCP
3145 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3146 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3147 t.tcp_fin_timeout);
3148#endif
3149#ifdef CONFIG_IP_VS_PROTO_UDP
3150 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3151#endif
3152
3153 break;
3154 }
3155
3156 case IPVS_CMD_GET_INFO:
3157 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3158 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3159 IP_VS_CONN_TAB_SIZE);
3160 break;
3161 }
3162
3163 genlmsg_end(msg, reply);
3164 ret = genlmsg_unicast(msg, info->snd_pid);
3165 goto out;
3166
3167nla_put_failure:
3168 IP_VS_ERR("not enough space in Netlink message\n");
3169 ret = -EMSGSIZE;
3170
3171out_err:
3172 nlmsg_free(msg);
3173out:
3174 mutex_unlock(&__ip_vs_mutex);
3175
3176 return ret;
3177}
3178
3179
3180static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3181 {
3182 .cmd = IPVS_CMD_NEW_SERVICE,
3183 .flags = GENL_ADMIN_PERM,
3184 .policy = ip_vs_cmd_policy,
3185 .doit = ip_vs_genl_set_cmd,
3186 },
3187 {
3188 .cmd = IPVS_CMD_SET_SERVICE,
3189 .flags = GENL_ADMIN_PERM,
3190 .policy = ip_vs_cmd_policy,
3191 .doit = ip_vs_genl_set_cmd,
3192 },
3193 {
3194 .cmd = IPVS_CMD_DEL_SERVICE,
3195 .flags = GENL_ADMIN_PERM,
3196 .policy = ip_vs_cmd_policy,
3197 .doit = ip_vs_genl_set_cmd,
3198 },
3199 {
3200 .cmd = IPVS_CMD_GET_SERVICE,
3201 .flags = GENL_ADMIN_PERM,
3202 .doit = ip_vs_genl_get_cmd,
3203 .dumpit = ip_vs_genl_dump_services,
3204 .policy = ip_vs_cmd_policy,
3205 },
3206 {
3207 .cmd = IPVS_CMD_NEW_DEST,
3208 .flags = GENL_ADMIN_PERM,
3209 .policy = ip_vs_cmd_policy,
3210 .doit = ip_vs_genl_set_cmd,
3211 },
3212 {
3213 .cmd = IPVS_CMD_SET_DEST,
3214 .flags = GENL_ADMIN_PERM,
3215 .policy = ip_vs_cmd_policy,
3216 .doit = ip_vs_genl_set_cmd,
3217 },
3218 {
3219 .cmd = IPVS_CMD_DEL_DEST,
3220 .flags = GENL_ADMIN_PERM,
3221 .policy = ip_vs_cmd_policy,
3222 .doit = ip_vs_genl_set_cmd,
3223 },
3224 {
3225 .cmd = IPVS_CMD_GET_DEST,
3226 .flags = GENL_ADMIN_PERM,
3227 .policy = ip_vs_cmd_policy,
3228 .dumpit = ip_vs_genl_dump_dests,
3229 },
3230 {
3231 .cmd = IPVS_CMD_NEW_DAEMON,
3232 .flags = GENL_ADMIN_PERM,
3233 .policy = ip_vs_cmd_policy,
3234 .doit = ip_vs_genl_set_cmd,
3235 },
3236 {
3237 .cmd = IPVS_CMD_DEL_DAEMON,
3238 .flags = GENL_ADMIN_PERM,
3239 .policy = ip_vs_cmd_policy,
3240 .doit = ip_vs_genl_set_cmd,
3241 },
3242 {
3243 .cmd = IPVS_CMD_GET_DAEMON,
3244 .flags = GENL_ADMIN_PERM,
3245 .dumpit = ip_vs_genl_dump_daemons,
3246 },
3247 {
3248 .cmd = IPVS_CMD_SET_CONFIG,
3249 .flags = GENL_ADMIN_PERM,
3250 .policy = ip_vs_cmd_policy,
3251 .doit = ip_vs_genl_set_cmd,
3252 },
3253 {
3254 .cmd = IPVS_CMD_GET_CONFIG,
3255 .flags = GENL_ADMIN_PERM,
3256 .doit = ip_vs_genl_get_cmd,
3257 },
3258 {
3259 .cmd = IPVS_CMD_GET_INFO,
3260 .flags = GENL_ADMIN_PERM,
3261 .doit = ip_vs_genl_get_cmd,
3262 },
3263 {
3264 .cmd = IPVS_CMD_ZERO,
3265 .flags = GENL_ADMIN_PERM,
3266 .policy = ip_vs_cmd_policy,
3267 .doit = ip_vs_genl_set_cmd,
3268 },
3269 {
3270 .cmd = IPVS_CMD_FLUSH,
3271 .flags = GENL_ADMIN_PERM,
3272 .doit = ip_vs_genl_set_cmd,
3273 },
3274};
3275
3276static int __init ip_vs_genl_register(void)
3277{
3278 int ret, i;
3279
3280 ret = genl_register_family(&ip_vs_genl_family);
3281 if (ret)
3282 return ret;
3283
3284 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3285 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3286 if (ret)
3287 goto err_out;
3288 }
3289 return 0;
3290
3291err_out:
3292 genl_unregister_family(&ip_vs_genl_family);
3293 return ret;
3294}
3295
3296static void ip_vs_genl_unregister(void)
3297{
3298 genl_unregister_family(&ip_vs_genl_family);
3299}
3300
3301/* End of Generic Netlink interface definitions */
3302
1da177e4 3303
048cf48b 3304int __init ip_vs_control_init(void)
1da177e4
LT
3305{
3306 int ret;
3307 int idx;
3308
3309 EnterFunction(2);
3310
3311 ret = nf_register_sockopt(&ip_vs_sockopts);
3312 if (ret) {
3313 IP_VS_ERR("cannot register sockopt.\n");
3314 return ret;
3315 }
3316
9a812198
JV
3317 ret = ip_vs_genl_register();
3318 if (ret) {
3319 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3320 nf_unregister_sockopt(&ip_vs_sockopts);
3321 return ret;
3322 }
3323
457c4cbc
EB
3324 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3325 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3326
90754f8e 3327 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3328
3329 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3330 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3331 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3332 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3333 }
3334 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3335 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3336 }
3337
1da177e4
LT
3338 ip_vs_new_estimator(&ip_vs_stats);
3339
3340 /* Hook the defense timer */
3341 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3342
3343 LeaveFunction(2);
3344 return 0;
3345}
3346
3347
3348void ip_vs_control_cleanup(void)
3349{
3350 EnterFunction(2);
3351 ip_vs_trash_cleanup();
3352 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3353 cancel_work_sync(&defense_work.work);
1da177e4
LT
3354 ip_vs_kill_estimator(&ip_vs_stats);
3355 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3356 proc_net_remove(&init_net, "ip_vs_stats");
3357 proc_net_remove(&init_net, "ip_vs");
9a812198 3358 ip_vs_genl_unregister();
1da177e4
LT
3359 nf_unregister_sockopt(&ip_vs_sockopts);
3360 LeaveFunction(2);
3361}