]> bbs.cooldavid.org Git - net-next-2.6.git/blame - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Convert real server lookup functions
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
CommitLineData
1da177e4
LT
1/*
2 * IPVS An implementation of the IP virtual server support for the
3 * LINUX operating system. IPVS is now implemented as a module
4 * over the NetFilter framework. IPVS can be used to build a
5 * high-performance and highly available server based on a
6 * cluster of servers.
7 *
1da177e4
LT
8 * Authors: Wensong Zhang <wensong@linuxvirtualserver.org>
9 * Peter Kese <peter.kese@ijs.si>
10 * Julian Anastasov <ja@ssi.bg>
11 *
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
16 *
17 * Changes:
18 *
19 */
20
21#include <linux/module.h>
22#include <linux/init.h>
23#include <linux/types.h>
4fc268d2 24#include <linux/capability.h>
1da177e4
LT
25#include <linux/fs.h>
26#include <linux/sysctl.h>
27#include <linux/proc_fs.h>
28#include <linux/workqueue.h>
29#include <linux/swap.h>
1da177e4
LT
30#include <linux/seq_file.h>
31
32#include <linux/netfilter.h>
33#include <linux/netfilter_ipv4.h>
14cc3e2b 34#include <linux/mutex.h>
1da177e4 35
457c4cbc 36#include <net/net_namespace.h>
1da177e4 37#include <net/ip.h>
14c85021 38#include <net/route.h>
1da177e4 39#include <net/sock.h>
9a812198 40#include <net/genetlink.h>
1da177e4
LT
41
42#include <asm/uaccess.h>
43
44#include <net/ip_vs.h>
45
46/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
14cc3e2b 47static DEFINE_MUTEX(__ip_vs_mutex);
1da177e4
LT
48
49/* lock for service table */
50static DEFINE_RWLOCK(__ip_vs_svc_lock);
51
52/* lock for table with the real services */
53static DEFINE_RWLOCK(__ip_vs_rs_lock);
54
55/* lock for state and timeout tables */
56static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
57
58/* lock for drop entry handling */
59static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
60
61/* lock for drop packet handling */
62static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
63
64/* 1/rate drop and drop-entry variables */
65int ip_vs_drop_rate = 0;
66int ip_vs_drop_counter = 0;
67static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
68
69/* number of virtual services */
70static int ip_vs_num_services = 0;
71
72/* sysctl variables */
73static int sysctl_ip_vs_drop_entry = 0;
74static int sysctl_ip_vs_drop_packet = 0;
75static int sysctl_ip_vs_secure_tcp = 0;
76static int sysctl_ip_vs_amemthresh = 1024;
77static int sysctl_ip_vs_am_droprate = 10;
78int sysctl_ip_vs_cache_bypass = 0;
79int sysctl_ip_vs_expire_nodest_conn = 0;
80int sysctl_ip_vs_expire_quiescent_template = 0;
81int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
82int sysctl_ip_vs_nat_icmp_send = 0;
83
84
85#ifdef CONFIG_IP_VS_DEBUG
86static int sysctl_ip_vs_debug_level = 0;
87
88int ip_vs_get_debug_level(void)
89{
90 return sysctl_ip_vs_debug_level;
91}
92#endif
93
94/*
af9debd4
JA
95 * update_defense_level is called from keventd and from sysctl,
96 * so it needs to protect itself from softirqs
1da177e4
LT
97 */
98static void update_defense_level(void)
99{
100 struct sysinfo i;
101 static int old_secure_tcp = 0;
102 int availmem;
103 int nomem;
104 int to_change = -1;
105
106 /* we only count free and buffered memory (in pages) */
107 si_meminfo(&i);
108 availmem = i.freeram + i.bufferram;
109 /* however in linux 2.5 the i.bufferram is total page cache size,
110 we need adjust it */
111 /* si_swapinfo(&i); */
112 /* availmem = availmem - (i.totalswap - i.freeswap); */
113
114 nomem = (availmem < sysctl_ip_vs_amemthresh);
115
af9debd4
JA
116 local_bh_disable();
117
1da177e4
LT
118 /* drop_entry */
119 spin_lock(&__ip_vs_dropentry_lock);
120 switch (sysctl_ip_vs_drop_entry) {
121 case 0:
122 atomic_set(&ip_vs_dropentry, 0);
123 break;
124 case 1:
125 if (nomem) {
126 atomic_set(&ip_vs_dropentry, 1);
127 sysctl_ip_vs_drop_entry = 2;
128 } else {
129 atomic_set(&ip_vs_dropentry, 0);
130 }
131 break;
132 case 2:
133 if (nomem) {
134 atomic_set(&ip_vs_dropentry, 1);
135 } else {
136 atomic_set(&ip_vs_dropentry, 0);
137 sysctl_ip_vs_drop_entry = 1;
138 };
139 break;
140 case 3:
141 atomic_set(&ip_vs_dropentry, 1);
142 break;
143 }
144 spin_unlock(&__ip_vs_dropentry_lock);
145
146 /* drop_packet */
147 spin_lock(&__ip_vs_droppacket_lock);
148 switch (sysctl_ip_vs_drop_packet) {
149 case 0:
150 ip_vs_drop_rate = 0;
151 break;
152 case 1:
153 if (nomem) {
154 ip_vs_drop_rate = ip_vs_drop_counter
155 = sysctl_ip_vs_amemthresh /
156 (sysctl_ip_vs_amemthresh-availmem);
157 sysctl_ip_vs_drop_packet = 2;
158 } else {
159 ip_vs_drop_rate = 0;
160 }
161 break;
162 case 2:
163 if (nomem) {
164 ip_vs_drop_rate = ip_vs_drop_counter
165 = sysctl_ip_vs_amemthresh /
166 (sysctl_ip_vs_amemthresh-availmem);
167 } else {
168 ip_vs_drop_rate = 0;
169 sysctl_ip_vs_drop_packet = 1;
170 }
171 break;
172 case 3:
173 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
174 break;
175 }
176 spin_unlock(&__ip_vs_droppacket_lock);
177
178 /* secure_tcp */
179 write_lock(&__ip_vs_securetcp_lock);
180 switch (sysctl_ip_vs_secure_tcp) {
181 case 0:
182 if (old_secure_tcp >= 2)
183 to_change = 0;
184 break;
185 case 1:
186 if (nomem) {
187 if (old_secure_tcp < 2)
188 to_change = 1;
189 sysctl_ip_vs_secure_tcp = 2;
190 } else {
191 if (old_secure_tcp >= 2)
192 to_change = 0;
193 }
194 break;
195 case 2:
196 if (nomem) {
197 if (old_secure_tcp < 2)
198 to_change = 1;
199 } else {
200 if (old_secure_tcp >= 2)
201 to_change = 0;
202 sysctl_ip_vs_secure_tcp = 1;
203 }
204 break;
205 case 3:
206 if (old_secure_tcp < 2)
207 to_change = 1;
208 break;
209 }
210 old_secure_tcp = sysctl_ip_vs_secure_tcp;
211 if (to_change >= 0)
212 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
213 write_unlock(&__ip_vs_securetcp_lock);
af9debd4
JA
214
215 local_bh_enable();
1da177e4
LT
216}
217
218
219/*
220 * Timer for checking the defense
221 */
222#define DEFENSE_TIMER_PERIOD 1*HZ
c4028958
DH
223static void defense_work_handler(struct work_struct *work);
224static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
1da177e4 225
c4028958 226static void defense_work_handler(struct work_struct *work)
1da177e4
LT
227{
228 update_defense_level();
229 if (atomic_read(&ip_vs_dropentry))
230 ip_vs_random_dropentry();
231
232 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
233}
234
235int
236ip_vs_use_count_inc(void)
237{
238 return try_module_get(THIS_MODULE);
239}
240
241void
242ip_vs_use_count_dec(void)
243{
244 module_put(THIS_MODULE);
245}
246
247
248/*
249 * Hash table: for virtual service lookups
250 */
251#define IP_VS_SVC_TAB_BITS 8
252#define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
253#define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
254
255/* the service table hashed by <protocol, addr, port> */
256static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
257/* the service table hashed by fwmark */
258static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
259
260/*
261 * Hash table: for real service lookups
262 */
263#define IP_VS_RTAB_BITS 4
264#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
265#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
266
267static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
268
269/*
270 * Trash for destinations
271 */
272static LIST_HEAD(ip_vs_dest_trash);
273
274/*
275 * FTP & NULL virtual service counters
276 */
277static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
278static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
279
280
281/*
282 * Returns hash value for virtual service
283 */
284static __inline__ unsigned
b18610de
JV
285ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
286 __be16 port)
1da177e4
LT
287{
288 register unsigned porth = ntohs(port);
b18610de 289 __be32 addr_fold = addr->ip;
1da177e4 290
b18610de
JV
291#ifdef CONFIG_IP_VS_IPV6
292 if (af == AF_INET6)
293 addr_fold = addr->ip6[0]^addr->ip6[1]^
294 addr->ip6[2]^addr->ip6[3];
295#endif
296
297 return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
1da177e4
LT
298 & IP_VS_SVC_TAB_MASK;
299}
300
301/*
302 * Returns hash value of fwmark for virtual service lookup
303 */
304static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
305{
306 return fwmark & IP_VS_SVC_TAB_MASK;
307}
308
309/*
310 * Hashes a service in the ip_vs_svc_table by <proto,addr,port>
311 * or in the ip_vs_svc_fwm_table by fwmark.
312 * Should be called with locked tables.
313 */
314static int ip_vs_svc_hash(struct ip_vs_service *svc)
315{
316 unsigned hash;
317
318 if (svc->flags & IP_VS_SVC_F_HASHED) {
319 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
320 "called from %p\n", __builtin_return_address(0));
321 return 0;
322 }
323
324 if (svc->fwmark == 0) {
325 /*
326 * Hash it by <protocol,addr,port> in ip_vs_svc_table
327 */
b18610de 328 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
e7ade46a 329 svc->port);
1da177e4
LT
330 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
331 } else {
332 /*
333 * Hash it by fwmark in ip_vs_svc_fwm_table
334 */
335 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
336 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
337 }
338
339 svc->flags |= IP_VS_SVC_F_HASHED;
340 /* increase its refcnt because it is referenced by the svc table */
341 atomic_inc(&svc->refcnt);
342 return 1;
343}
344
345
346/*
347 * Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
348 * Should be called with locked tables.
349 */
350static int ip_vs_svc_unhash(struct ip_vs_service *svc)
351{
352 if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
353 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
354 "called from %p\n", __builtin_return_address(0));
355 return 0;
356 }
357
358 if (svc->fwmark == 0) {
359 /* Remove it from the ip_vs_svc_table table */
360 list_del(&svc->s_list);
361 } else {
362 /* Remove it from the ip_vs_svc_fwm_table table */
363 list_del(&svc->f_list);
364 }
365
366 svc->flags &= ~IP_VS_SVC_F_HASHED;
367 atomic_dec(&svc->refcnt);
368 return 1;
369}
370
371
372/*
373 * Get service by {proto,addr,port} in the service table.
374 */
b18610de
JV
375static inline struct ip_vs_service *
376__ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
377 __be16 vport)
1da177e4
LT
378{
379 unsigned hash;
380 struct ip_vs_service *svc;
381
382 /* Check for "full" addressed entries */
b18610de 383 hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
1da177e4
LT
384
385 list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
b18610de
JV
386 if ((svc->af == af)
387 && ip_vs_addr_equal(af, &svc->addr, vaddr)
1da177e4
LT
388 && (svc->port == vport)
389 && (svc->protocol == protocol)) {
390 /* HIT */
391 atomic_inc(&svc->usecnt);
392 return svc;
393 }
394 }
395
396 return NULL;
397}
398
399
400/*
401 * Get service by {fwmark} in the service table.
402 */
b18610de
JV
403static inline struct ip_vs_service *
404__ip_vs_svc_fwm_get(int af, __u32 fwmark)
1da177e4
LT
405{
406 unsigned hash;
407 struct ip_vs_service *svc;
408
409 /* Check for fwmark addressed entries */
410 hash = ip_vs_svc_fwm_hashkey(fwmark);
411
412 list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
b18610de 413 if (svc->fwmark == fwmark && svc->af == af) {
1da177e4
LT
414 /* HIT */
415 atomic_inc(&svc->usecnt);
416 return svc;
417 }
418 }
419
420 return NULL;
421}
422
423struct ip_vs_service *
3c2e0505
JV
424ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
425 const union nf_inet_addr *vaddr, __be16 vport)
1da177e4
LT
426{
427 struct ip_vs_service *svc;
3c2e0505 428
1da177e4
LT
429 read_lock(&__ip_vs_svc_lock);
430
431 /*
432 * Check the table hashed by fwmark first
433 */
3c2e0505 434 if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
1da177e4
LT
435 goto out;
436
437 /*
438 * Check the table hashed by <protocol,addr,port>
439 * for "full" addressed entries
440 */
3c2e0505 441 svc = __ip_vs_service_get(af, protocol, vaddr, vport);
1da177e4
LT
442
443 if (svc == NULL
444 && protocol == IPPROTO_TCP
445 && atomic_read(&ip_vs_ftpsvc_counter)
446 && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
447 /*
448 * Check if ftp service entry exists, the packet
449 * might belong to FTP data connections.
450 */
3c2e0505 451 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
1da177e4
LT
452 }
453
454 if (svc == NULL
455 && atomic_read(&ip_vs_nullsvc_counter)) {
456 /*
457 * Check if the catch-all port (port zero) exists
458 */
3c2e0505 459 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
1da177e4
LT
460 }
461
462 out:
463 read_unlock(&__ip_vs_svc_lock);
464
3c2e0505
JV
465 IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
466 fwmark, ip_vs_proto_name(protocol),
467 IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
468 svc ? "hit" : "not hit");
1da177e4
LT
469
470 return svc;
471}
472
473
474static inline void
475__ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
476{
477 atomic_inc(&svc->refcnt);
478 dest->svc = svc;
479}
480
481static inline void
482__ip_vs_unbind_svc(struct ip_vs_dest *dest)
483{
484 struct ip_vs_service *svc = dest->svc;
485
486 dest->svc = NULL;
487 if (atomic_dec_and_test(&svc->refcnt))
488 kfree(svc);
489}
490
491
492/*
493 * Returns hash value for real service
494 */
7937df15
JV
495static inline unsigned ip_vs_rs_hashkey(int af,
496 const union nf_inet_addr *addr,
497 __be16 port)
1da177e4
LT
498{
499 register unsigned porth = ntohs(port);
7937df15
JV
500 __be32 addr_fold = addr->ip;
501
502#ifdef CONFIG_IP_VS_IPV6
503 if (af == AF_INET6)
504 addr_fold = addr->ip6[0]^addr->ip6[1]^
505 addr->ip6[2]^addr->ip6[3];
506#endif
1da177e4 507
7937df15 508 return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
1da177e4
LT
509 & IP_VS_RTAB_MASK;
510}
511
512/*
513 * Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
514 * should be called with locked tables.
515 */
516static int ip_vs_rs_hash(struct ip_vs_dest *dest)
517{
518 unsigned hash;
519
520 if (!list_empty(&dest->d_list)) {
521 return 0;
522 }
523
524 /*
525 * Hash by proto,addr,port,
526 * which are the parameters of the real service.
527 */
7937df15
JV
528 hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
529
1da177e4
LT
530 list_add(&dest->d_list, &ip_vs_rtable[hash]);
531
532 return 1;
533}
534
535/*
536 * UNhashes ip_vs_dest from ip_vs_rtable.
537 * should be called with locked tables.
538 */
539static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
540{
541 /*
542 * Remove it from the ip_vs_rtable table.
543 */
544 if (!list_empty(&dest->d_list)) {
545 list_del(&dest->d_list);
546 INIT_LIST_HEAD(&dest->d_list);
547 }
548
549 return 1;
550}
551
552/*
553 * Lookup real service by <proto,addr,port> in the real service table.
554 */
555struct ip_vs_dest *
7937df15
JV
556ip_vs_lookup_real_service(int af, __u16 protocol,
557 const union nf_inet_addr *daddr,
558 __be16 dport)
1da177e4
LT
559{
560 unsigned hash;
561 struct ip_vs_dest *dest;
562
563 /*
564 * Check for "full" addressed entries
565 * Return the first found entry
566 */
7937df15 567 hash = ip_vs_rs_hashkey(af, daddr, dport);
1da177e4
LT
568
569 read_lock(&__ip_vs_rs_lock);
570 list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
7937df15
JV
571 if ((dest->af == af)
572 && ip_vs_addr_equal(af, &dest->addr, daddr)
1da177e4
LT
573 && (dest->port == dport)
574 && ((dest->protocol == protocol) ||
575 dest->vfwmark)) {
576 /* HIT */
577 read_unlock(&__ip_vs_rs_lock);
578 return dest;
579 }
580 }
581 read_unlock(&__ip_vs_rs_lock);
582
583 return NULL;
584}
585
586/*
587 * Lookup destination by {addr,port} in the given service
588 */
589static struct ip_vs_dest *
7937df15
JV
590ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
591 __be16 dport)
1da177e4
LT
592{
593 struct ip_vs_dest *dest;
594
595 /*
596 * Find the destination for the given service
597 */
598 list_for_each_entry(dest, &svc->destinations, n_list) {
7937df15
JV
599 if ((dest->af == svc->af)
600 && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
601 && (dest->port == dport)) {
1da177e4
LT
602 /* HIT */
603 return dest;
604 }
605 }
606
607 return NULL;
608}
609
1e356f9c
RB
610/*
611 * Find destination by {daddr,dport,vaddr,protocol}
612 * Cretaed to be used in ip_vs_process_message() in
613 * the backup synchronization daemon. It finds the
614 * destination to be bound to the received connection
615 * on the backup.
616 *
617 * ip_vs_lookup_real_service() looked promissing, but
618 * seems not working as expected.
619 */
7937df15
JV
620struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
621 __be16 dport,
622 const union nf_inet_addr *vaddr,
623 __be16 vport, __u16 protocol)
1e356f9c
RB
624{
625 struct ip_vs_dest *dest;
626 struct ip_vs_service *svc;
627
7937df15 628 svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
1e356f9c
RB
629 if (!svc)
630 return NULL;
631 dest = ip_vs_lookup_dest(svc, daddr, dport);
632 if (dest)
633 atomic_inc(&dest->refcnt);
634 ip_vs_service_put(svc);
635 return dest;
636}
1da177e4
LT
637
638/*
639 * Lookup dest by {svc,addr,port} in the destination trash.
640 * The destination trash is used to hold the destinations that are removed
641 * from the service table but are still referenced by some conn entries.
642 * The reason to add the destination trash is when the dest is temporary
643 * down (either by administrator or by monitor program), the dest can be
644 * picked back from the trash, the remaining connections to the dest can
645 * continue, and the counting information of the dest is also useful for
646 * scheduling.
647 */
648static struct ip_vs_dest *
7937df15
JV
649ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
650 __be16 dport)
1da177e4
LT
651{
652 struct ip_vs_dest *dest, *nxt;
653
654 /*
655 * Find the destination in trash
656 */
657 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
7937df15
JV
658 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
659 "dest->refcnt=%d\n",
660 dest->vfwmark,
661 IP_VS_DBG_ADDR(svc->af, &dest->addr),
662 ntohs(dest->port),
663 atomic_read(&dest->refcnt));
664 if (dest->af == svc->af &&
665 ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
1da177e4
LT
666 dest->port == dport &&
667 dest->vfwmark == svc->fwmark &&
668 dest->protocol == svc->protocol &&
669 (svc->fwmark ||
7937df15 670 (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
1da177e4
LT
671 dest->vport == svc->port))) {
672 /* HIT */
673 return dest;
674 }
675
676 /*
677 * Try to purge the destination from trash if not referenced
678 */
679 if (atomic_read(&dest->refcnt) == 1) {
7937df15
JV
680 IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
681 "from trash\n",
682 dest->vfwmark,
683 IP_VS_DBG_ADDR(svc->af, &dest->addr),
684 ntohs(dest->port));
1da177e4
LT
685 list_del(&dest->n_list);
686 ip_vs_dst_reset(dest);
687 __ip_vs_unbind_svc(dest);
688 kfree(dest);
689 }
690 }
691
692 return NULL;
693}
694
695
696/*
697 * Clean up all the destinations in the trash
698 * Called by the ip_vs_control_cleanup()
699 *
700 * When the ip_vs_control_clearup is activated by ipvs module exit,
701 * the service tables must have been flushed and all the connections
702 * are expired, and the refcnt of each destination in the trash must
703 * be 1, so we simply release them here.
704 */
705static void ip_vs_trash_cleanup(void)
706{
707 struct ip_vs_dest *dest, *nxt;
708
709 list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
710 list_del(&dest->n_list);
711 ip_vs_dst_reset(dest);
712 __ip_vs_unbind_svc(dest);
713 kfree(dest);
714 }
715}
716
717
718static void
719ip_vs_zero_stats(struct ip_vs_stats *stats)
720{
721 spin_lock_bh(&stats->lock);
e93615d0
SH
722
723 stats->conns = 0;
724 stats->inpkts = 0;
725 stats->outpkts = 0;
726 stats->inbytes = 0;
727 stats->outbytes = 0;
728
729 stats->cps = 0;
730 stats->inpps = 0;
731 stats->outpps = 0;
732 stats->inbps = 0;
733 stats->outbps = 0;
734
1da177e4 735 ip_vs_zero_estimator(stats);
e93615d0 736
3a14a313 737 spin_unlock_bh(&stats->lock);
1da177e4
LT
738}
739
740/*
741 * Update a destination in the given service
742 */
743static void
744__ip_vs_update_dest(struct ip_vs_service *svc,
c860c6b1 745 struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
746{
747 int conn_flags;
748
749 /* set the weight and the flags */
750 atomic_set(&dest->weight, udest->weight);
751 conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
752
753 /* check if local node and update the flags */
c860c6b1 754 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
1da177e4
LT
755 conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
756 | IP_VS_CONN_F_LOCALNODE;
757 }
758
759 /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
760 if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
761 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
762 } else {
763 /*
764 * Put the real service in ip_vs_rtable if not present.
765 * For now only for NAT!
766 */
767 write_lock_bh(&__ip_vs_rs_lock);
768 ip_vs_rs_hash(dest);
769 write_unlock_bh(&__ip_vs_rs_lock);
770 }
771 atomic_set(&dest->conn_flags, conn_flags);
772
773 /* bind the service */
774 if (!dest->svc) {
775 __ip_vs_bind_svc(dest, svc);
776 } else {
777 if (dest->svc != svc) {
778 __ip_vs_unbind_svc(dest);
779 ip_vs_zero_stats(&dest->stats);
780 __ip_vs_bind_svc(dest, svc);
781 }
782 }
783
784 /* set the dest status flags */
785 dest->flags |= IP_VS_DEST_F_AVAILABLE;
786
787 if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
788 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
789 dest->u_threshold = udest->u_threshold;
790 dest->l_threshold = udest->l_threshold;
791}
792
793
794/*
795 * Create a destination for the given service
796 */
797static int
c860c6b1 798ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
1da177e4
LT
799 struct ip_vs_dest **dest_p)
800{
801 struct ip_vs_dest *dest;
802 unsigned atype;
803
804 EnterFunction(2);
805
c860c6b1 806 atype = inet_addr_type(&init_net, udest->addr.ip);
1da177e4
LT
807 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
808 return -EINVAL;
809
0da974f4 810 dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
1da177e4
LT
811 if (dest == NULL) {
812 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
813 return -ENOMEM;
814 }
1da177e4 815
c860c6b1 816 dest->af = svc->af;
1da177e4 817 dest->protocol = svc->protocol;
c860c6b1 818 dest->vaddr = svc->addr;
1da177e4
LT
819 dest->vport = svc->port;
820 dest->vfwmark = svc->fwmark;
c860c6b1 821 ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
1da177e4
LT
822 dest->port = udest->port;
823
824 atomic_set(&dest->activeconns, 0);
825 atomic_set(&dest->inactconns, 0);
826 atomic_set(&dest->persistconns, 0);
827 atomic_set(&dest->refcnt, 0);
828
829 INIT_LIST_HEAD(&dest->d_list);
830 spin_lock_init(&dest->dst_lock);
831 spin_lock_init(&dest->stats.lock);
832 __ip_vs_update_dest(svc, dest, udest);
833 ip_vs_new_estimator(&dest->stats);
834
835 *dest_p = dest;
836
837 LeaveFunction(2);
838 return 0;
839}
840
841
842/*
843 * Add a destination into an existing service
844 */
845static int
c860c6b1 846ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
847{
848 struct ip_vs_dest *dest;
c860c6b1 849 union nf_inet_addr daddr;
014d730d 850 __be16 dport = udest->port;
1da177e4
LT
851 int ret;
852
853 EnterFunction(2);
854
855 if (udest->weight < 0) {
856 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
857 return -ERANGE;
858 }
859
860 if (udest->l_threshold > udest->u_threshold) {
861 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
862 "upper threshold\n");
863 return -ERANGE;
864 }
865
c860c6b1
JV
866 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
867
1da177e4
LT
868 /*
869 * Check if the dest already exists in the list
870 */
7937df15
JV
871 dest = ip_vs_lookup_dest(svc, &daddr, dport);
872
1da177e4
LT
873 if (dest != NULL) {
874 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
875 return -EEXIST;
876 }
877
878 /*
879 * Check if the dest already exists in the trash and
880 * is from the same service
881 */
7937df15
JV
882 dest = ip_vs_trash_get_dest(svc, &daddr, dport);
883
1da177e4
LT
884 if (dest != NULL) {
885 IP_VS_DBG(3, "Get destination %u.%u.%u.%u:%u from trash, "
4b5bdf5c 886 "dest->refcnt=%d, service %u/%u.%u.%u.%u:%u\n",
1da177e4
LT
887 NIPQUAD(daddr), ntohs(dport),
888 atomic_read(&dest->refcnt),
889 dest->vfwmark,
e7ade46a 890 NIPQUAD(dest->vaddr.ip),
1da177e4
LT
891 ntohs(dest->vport));
892 __ip_vs_update_dest(svc, dest, udest);
893
894 /*
895 * Get the destination from the trash
896 */
897 list_del(&dest->n_list);
898
899 ip_vs_new_estimator(&dest->stats);
900
901 write_lock_bh(&__ip_vs_svc_lock);
902
903 /*
904 * Wait until all other svc users go away.
905 */
906 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
907
908 list_add(&dest->n_list, &svc->destinations);
909 svc->num_dests++;
910
911 /* call the update_service function of its scheduler */
82dfb6f3
SW
912 if (svc->scheduler->update_service)
913 svc->scheduler->update_service(svc);
1da177e4
LT
914
915 write_unlock_bh(&__ip_vs_svc_lock);
916 return 0;
917 }
918
919 /*
920 * Allocate and initialize the dest structure
921 */
922 ret = ip_vs_new_dest(svc, udest, &dest);
923 if (ret) {
924 return ret;
925 }
926
927 /*
928 * Add the dest entry into the list
929 */
930 atomic_inc(&dest->refcnt);
931
932 write_lock_bh(&__ip_vs_svc_lock);
933
934 /*
935 * Wait until all other svc users go away.
936 */
937 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
938
939 list_add(&dest->n_list, &svc->destinations);
940 svc->num_dests++;
941
942 /* call the update_service function of its scheduler */
82dfb6f3
SW
943 if (svc->scheduler->update_service)
944 svc->scheduler->update_service(svc);
1da177e4
LT
945
946 write_unlock_bh(&__ip_vs_svc_lock);
947
948 LeaveFunction(2);
949
950 return 0;
951}
952
953
954/*
955 * Edit a destination in the given service
956 */
957static int
c860c6b1 958ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
959{
960 struct ip_vs_dest *dest;
c860c6b1 961 union nf_inet_addr daddr;
014d730d 962 __be16 dport = udest->port;
1da177e4
LT
963
964 EnterFunction(2);
965
966 if (udest->weight < 0) {
967 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
968 return -ERANGE;
969 }
970
971 if (udest->l_threshold > udest->u_threshold) {
972 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
973 "upper threshold\n");
974 return -ERANGE;
975 }
976
c860c6b1
JV
977 ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
978
1da177e4
LT
979 /*
980 * Lookup the destination list
981 */
7937df15
JV
982 dest = ip_vs_lookup_dest(svc, &daddr, dport);
983
1da177e4
LT
984 if (dest == NULL) {
985 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
986 return -ENOENT;
987 }
988
989 __ip_vs_update_dest(svc, dest, udest);
990
991 write_lock_bh(&__ip_vs_svc_lock);
992
993 /* Wait until all other svc users go away */
cae7ca3d 994 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1da177e4
LT
995
996 /* call the update_service, because server weight may be changed */
82dfb6f3
SW
997 if (svc->scheduler->update_service)
998 svc->scheduler->update_service(svc);
1da177e4
LT
999
1000 write_unlock_bh(&__ip_vs_svc_lock);
1001
1002 LeaveFunction(2);
1003
1004 return 0;
1005}
1006
1007
1008/*
1009 * Delete a destination (must be already unlinked from the service)
1010 */
1011static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1012{
1013 ip_vs_kill_estimator(&dest->stats);
1014
1015 /*
1016 * Remove it from the d-linked list with the real services.
1017 */
1018 write_lock_bh(&__ip_vs_rs_lock);
1019 ip_vs_rs_unhash(dest);
1020 write_unlock_bh(&__ip_vs_rs_lock);
1021
1022 /*
1023 * Decrease the refcnt of the dest, and free the dest
1024 * if nobody refers to it (refcnt=0). Otherwise, throw
1025 * the destination into the trash.
1026 */
1027 if (atomic_dec_and_test(&dest->refcnt)) {
1028 ip_vs_dst_reset(dest);
1029 /* simply decrease svc->refcnt here, let the caller check
1030 and release the service if nobody refers to it.
1031 Only user context can release destination and service,
1032 and only one user context can update virtual service at a
1033 time, so the operation here is OK */
1034 atomic_dec(&dest->svc->refcnt);
1035 kfree(dest);
1036 } else {
4b5bdf5c
RN
1037 IP_VS_DBG(3, "Moving dest %u.%u.%u.%u:%u into trash, "
1038 "dest->refcnt=%d\n",
e7ade46a 1039 NIPQUAD(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1040 atomic_read(&dest->refcnt));
1041 list_add(&dest->n_list, &ip_vs_dest_trash);
1042 atomic_inc(&dest->refcnt);
1043 }
1044}
1045
1046
1047/*
1048 * Unlink a destination from the given service
1049 */
1050static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1051 struct ip_vs_dest *dest,
1052 int svcupd)
1053{
1054 dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1055
1056 /*
1057 * Remove it from the d-linked destination list.
1058 */
1059 list_del(&dest->n_list);
1060 svc->num_dests--;
82dfb6f3
SW
1061
1062 /*
1063 * Call the update_service function of its scheduler
1064 */
1065 if (svcupd && svc->scheduler->update_service)
1066 svc->scheduler->update_service(svc);
1da177e4
LT
1067}
1068
1069
1070/*
1071 * Delete a destination server in the given service
1072 */
1073static int
c860c6b1 1074ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1da177e4
LT
1075{
1076 struct ip_vs_dest *dest;
014d730d 1077 __be16 dport = udest->port;
1da177e4
LT
1078
1079 EnterFunction(2);
1080
7937df15 1081 dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
c860c6b1 1082
1da177e4
LT
1083 if (dest == NULL) {
1084 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1085 return -ENOENT;
1086 }
1087
1088 write_lock_bh(&__ip_vs_svc_lock);
1089
1090 /*
1091 * Wait until all other svc users go away.
1092 */
1093 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1094
1095 /*
1096 * Unlink dest from the service
1097 */
1098 __ip_vs_unlink_dest(svc, dest, 1);
1099
1100 write_unlock_bh(&__ip_vs_svc_lock);
1101
1102 /*
1103 * Delete the destination
1104 */
1105 __ip_vs_del_dest(dest);
1106
1107 LeaveFunction(2);
1108
1109 return 0;
1110}
1111
1112
1113/*
1114 * Add a service into the service hash table
1115 */
1116static int
c860c6b1
JV
1117ip_vs_add_service(struct ip_vs_service_user_kern *u,
1118 struct ip_vs_service **svc_p)
1da177e4
LT
1119{
1120 int ret = 0;
1121 struct ip_vs_scheduler *sched = NULL;
1122 struct ip_vs_service *svc = NULL;
1123
1124 /* increase the module use count */
1125 ip_vs_use_count_inc();
1126
1127 /* Lookup the scheduler by 'u->sched_name' */
1128 sched = ip_vs_scheduler_get(u->sched_name);
1129 if (sched == NULL) {
1130 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1131 u->sched_name);
1132 ret = -ENOENT;
1133 goto out_mod_dec;
1134 }
1135
0da974f4 1136 svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1da177e4
LT
1137 if (svc == NULL) {
1138 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1139 ret = -ENOMEM;
1140 goto out_err;
1141 }
1da177e4
LT
1142
1143 /* I'm the first user of the service */
1144 atomic_set(&svc->usecnt, 1);
1145 atomic_set(&svc->refcnt, 0);
1146
c860c6b1 1147 svc->af = u->af;
1da177e4 1148 svc->protocol = u->protocol;
c860c6b1 1149 ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1da177e4
LT
1150 svc->port = u->port;
1151 svc->fwmark = u->fwmark;
1152 svc->flags = u->flags;
1153 svc->timeout = u->timeout * HZ;
1154 svc->netmask = u->netmask;
1155
1156 INIT_LIST_HEAD(&svc->destinations);
1157 rwlock_init(&svc->sched_lock);
1158 spin_lock_init(&svc->stats.lock);
1159
1160 /* Bind the scheduler */
1161 ret = ip_vs_bind_scheduler(svc, sched);
1162 if (ret)
1163 goto out_err;
1164 sched = NULL;
1165
1166 /* Update the virtual service counters */
1167 if (svc->port == FTPPORT)
1168 atomic_inc(&ip_vs_ftpsvc_counter);
1169 else if (svc->port == 0)
1170 atomic_inc(&ip_vs_nullsvc_counter);
1171
1172 ip_vs_new_estimator(&svc->stats);
1173 ip_vs_num_services++;
1174
1175 /* Hash the service into the service table */
1176 write_lock_bh(&__ip_vs_svc_lock);
1177 ip_vs_svc_hash(svc);
1178 write_unlock_bh(&__ip_vs_svc_lock);
1179
1180 *svc_p = svc;
1181 return 0;
1182
1183 out_err:
1184 if (svc != NULL) {
1185 if (svc->scheduler)
1186 ip_vs_unbind_scheduler(svc);
1187 if (svc->inc) {
1188 local_bh_disable();
1189 ip_vs_app_inc_put(svc->inc);
1190 local_bh_enable();
1191 }
1192 kfree(svc);
1193 }
1194 ip_vs_scheduler_put(sched);
1195
1196 out_mod_dec:
1197 /* decrease the module use count */
1198 ip_vs_use_count_dec();
1199
1200 return ret;
1201}
1202
1203
1204/*
1205 * Edit a service and bind it with a new scheduler
1206 */
1207static int
c860c6b1 1208ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1da177e4
LT
1209{
1210 struct ip_vs_scheduler *sched, *old_sched;
1211 int ret = 0;
1212
1213 /*
1214 * Lookup the scheduler, by 'u->sched_name'
1215 */
1216 sched = ip_vs_scheduler_get(u->sched_name);
1217 if (sched == NULL) {
1218 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1219 u->sched_name);
1220 return -ENOENT;
1221 }
1222 old_sched = sched;
1223
1224 write_lock_bh(&__ip_vs_svc_lock);
1225
1226 /*
1227 * Wait until all other svc users go away.
1228 */
1229 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1230
1231 /*
1232 * Set the flags and timeout value
1233 */
1234 svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1235 svc->timeout = u->timeout * HZ;
1236 svc->netmask = u->netmask;
1237
1238 old_sched = svc->scheduler;
1239 if (sched != old_sched) {
1240 /*
1241 * Unbind the old scheduler
1242 */
1243 if ((ret = ip_vs_unbind_scheduler(svc))) {
1244 old_sched = sched;
1245 goto out;
1246 }
1247
1248 /*
1249 * Bind the new scheduler
1250 */
1251 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1252 /*
1253 * If ip_vs_bind_scheduler fails, restore the old
1254 * scheduler.
1255 * The main reason of failure is out of memory.
1256 *
1257 * The question is if the old scheduler can be
1258 * restored all the time. TODO: if it cannot be
1259 * restored some time, we must delete the service,
1260 * otherwise the system may crash.
1261 */
1262 ip_vs_bind_scheduler(svc, old_sched);
1263 old_sched = sched;
1264 goto out;
1265 }
1266 }
1267
1268 out:
1269 write_unlock_bh(&__ip_vs_svc_lock);
1270
1271 if (old_sched)
1272 ip_vs_scheduler_put(old_sched);
1273
1274 return ret;
1275}
1276
1277
1278/*
1279 * Delete a service from the service list
1280 * - The service must be unlinked, unlocked and not referenced!
1281 * - We are called under _bh lock
1282 */
1283static void __ip_vs_del_service(struct ip_vs_service *svc)
1284{
1285 struct ip_vs_dest *dest, *nxt;
1286 struct ip_vs_scheduler *old_sched;
1287
1288 ip_vs_num_services--;
1289 ip_vs_kill_estimator(&svc->stats);
1290
1291 /* Unbind scheduler */
1292 old_sched = svc->scheduler;
1293 ip_vs_unbind_scheduler(svc);
1294 if (old_sched)
1295 ip_vs_scheduler_put(old_sched);
1296
1297 /* Unbind app inc */
1298 if (svc->inc) {
1299 ip_vs_app_inc_put(svc->inc);
1300 svc->inc = NULL;
1301 }
1302
1303 /*
1304 * Unlink the whole destination list
1305 */
1306 list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1307 __ip_vs_unlink_dest(svc, dest, 0);
1308 __ip_vs_del_dest(dest);
1309 }
1310
1311 /*
1312 * Update the virtual service counters
1313 */
1314 if (svc->port == FTPPORT)
1315 atomic_dec(&ip_vs_ftpsvc_counter);
1316 else if (svc->port == 0)
1317 atomic_dec(&ip_vs_nullsvc_counter);
1318
1319 /*
1320 * Free the service if nobody refers to it
1321 */
1322 if (atomic_read(&svc->refcnt) == 0)
1323 kfree(svc);
1324
1325 /* decrease the module use count */
1326 ip_vs_use_count_dec();
1327}
1328
1329/*
1330 * Delete a service from the service list
1331 */
1332static int ip_vs_del_service(struct ip_vs_service *svc)
1333{
1334 if (svc == NULL)
1335 return -EEXIST;
1336
1337 /*
1338 * Unhash it from the service table
1339 */
1340 write_lock_bh(&__ip_vs_svc_lock);
1341
1342 ip_vs_svc_unhash(svc);
1343
1344 /*
1345 * Wait until all the svc users go away.
1346 */
1347 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1348
1349 __ip_vs_del_service(svc);
1350
1351 write_unlock_bh(&__ip_vs_svc_lock);
1352
1353 return 0;
1354}
1355
1356
1357/*
1358 * Flush all the virtual services
1359 */
1360static int ip_vs_flush(void)
1361{
1362 int idx;
1363 struct ip_vs_service *svc, *nxt;
1364
1365 /*
1366 * Flush the service table hashed by <protocol,addr,port>
1367 */
1368 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1369 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1370 write_lock_bh(&__ip_vs_svc_lock);
1371 ip_vs_svc_unhash(svc);
1372 /*
1373 * Wait until all the svc users go away.
1374 */
1375 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1376 __ip_vs_del_service(svc);
1377 write_unlock_bh(&__ip_vs_svc_lock);
1378 }
1379 }
1380
1381 /*
1382 * Flush the service table hashed by fwmark
1383 */
1384 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1385 list_for_each_entry_safe(svc, nxt,
1386 &ip_vs_svc_fwm_table[idx], f_list) {
1387 write_lock_bh(&__ip_vs_svc_lock);
1388 ip_vs_svc_unhash(svc);
1389 /*
1390 * Wait until all the svc users go away.
1391 */
1392 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1393 __ip_vs_del_service(svc);
1394 write_unlock_bh(&__ip_vs_svc_lock);
1395 }
1396 }
1397
1398 return 0;
1399}
1400
1401
1402/*
1403 * Zero counters in a service or all services
1404 */
1405static int ip_vs_zero_service(struct ip_vs_service *svc)
1406{
1407 struct ip_vs_dest *dest;
1408
1409 write_lock_bh(&__ip_vs_svc_lock);
1410 list_for_each_entry(dest, &svc->destinations, n_list) {
1411 ip_vs_zero_stats(&dest->stats);
1412 }
1413 ip_vs_zero_stats(&svc->stats);
1414 write_unlock_bh(&__ip_vs_svc_lock);
1415 return 0;
1416}
1417
1418static int ip_vs_zero_all(void)
1419{
1420 int idx;
1421 struct ip_vs_service *svc;
1422
1423 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1424 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1425 ip_vs_zero_service(svc);
1426 }
1427 }
1428
1429 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1430 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1431 ip_vs_zero_service(svc);
1432 }
1433 }
1434
1435 ip_vs_zero_stats(&ip_vs_stats);
1436 return 0;
1437}
1438
1439
1440static int
1441proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1442 void __user *buffer, size_t *lenp, loff_t *ppos)
1443{
1444 int *valp = table->data;
1445 int val = *valp;
1446 int rc;
1447
1448 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1449 if (write && (*valp != val)) {
1450 if ((*valp < 0) || (*valp > 3)) {
1451 /* Restore the correct value */
1452 *valp = val;
1453 } else {
1da177e4 1454 update_defense_level();
1da177e4
LT
1455 }
1456 }
1457 return rc;
1458}
1459
1460
1461static int
1462proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1463 void __user *buffer, size_t *lenp, loff_t *ppos)
1464{
1465 int *valp = table->data;
1466 int val[2];
1467 int rc;
1468
1469 /* backup the value first */
1470 memcpy(val, valp, sizeof(val));
1471
1472 rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1473 if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1474 /* Restore the correct value */
1475 memcpy(valp, val, sizeof(val));
1476 }
1477 return rc;
1478}
1479
1480
1481/*
1482 * IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1483 */
1484
1485static struct ctl_table vs_vars[] = {
1486 {
1da177e4
LT
1487 .procname = "amemthresh",
1488 .data = &sysctl_ip_vs_amemthresh,
1489 .maxlen = sizeof(int),
1490 .mode = 0644,
1491 .proc_handler = &proc_dointvec,
1492 },
1493#ifdef CONFIG_IP_VS_DEBUG
1494 {
1da177e4
LT
1495 .procname = "debug_level",
1496 .data = &sysctl_ip_vs_debug_level,
1497 .maxlen = sizeof(int),
1498 .mode = 0644,
1499 .proc_handler = &proc_dointvec,
1500 },
1501#endif
1502 {
1da177e4
LT
1503 .procname = "am_droprate",
1504 .data = &sysctl_ip_vs_am_droprate,
1505 .maxlen = sizeof(int),
1506 .mode = 0644,
1507 .proc_handler = &proc_dointvec,
1508 },
1509 {
1da177e4
LT
1510 .procname = "drop_entry",
1511 .data = &sysctl_ip_vs_drop_entry,
1512 .maxlen = sizeof(int),
1513 .mode = 0644,
1514 .proc_handler = &proc_do_defense_mode,
1515 },
1516 {
1da177e4
LT
1517 .procname = "drop_packet",
1518 .data = &sysctl_ip_vs_drop_packet,
1519 .maxlen = sizeof(int),
1520 .mode = 0644,
1521 .proc_handler = &proc_do_defense_mode,
1522 },
1523 {
1da177e4
LT
1524 .procname = "secure_tcp",
1525 .data = &sysctl_ip_vs_secure_tcp,
1526 .maxlen = sizeof(int),
1527 .mode = 0644,
1528 .proc_handler = &proc_do_defense_mode,
1529 },
1530#if 0
1531 {
1da177e4
LT
1532 .procname = "timeout_established",
1533 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1534 .maxlen = sizeof(int),
1535 .mode = 0644,
1536 .proc_handler = &proc_dointvec_jiffies,
1537 },
1538 {
1da177e4
LT
1539 .procname = "timeout_synsent",
1540 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1541 .maxlen = sizeof(int),
1542 .mode = 0644,
1543 .proc_handler = &proc_dointvec_jiffies,
1544 },
1545 {
1da177e4
LT
1546 .procname = "timeout_synrecv",
1547 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1548 .maxlen = sizeof(int),
1549 .mode = 0644,
1550 .proc_handler = &proc_dointvec_jiffies,
1551 },
1552 {
1da177e4
LT
1553 .procname = "timeout_finwait",
1554 .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1555 .maxlen = sizeof(int),
1556 .mode = 0644,
1557 .proc_handler = &proc_dointvec_jiffies,
1558 },
1559 {
1da177e4
LT
1560 .procname = "timeout_timewait",
1561 .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1562 .maxlen = sizeof(int),
1563 .mode = 0644,
1564 .proc_handler = &proc_dointvec_jiffies,
1565 },
1566 {
1da177e4
LT
1567 .procname = "timeout_close",
1568 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1569 .maxlen = sizeof(int),
1570 .mode = 0644,
1571 .proc_handler = &proc_dointvec_jiffies,
1572 },
1573 {
1da177e4
LT
1574 .procname = "timeout_closewait",
1575 .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1576 .maxlen = sizeof(int),
1577 .mode = 0644,
1578 .proc_handler = &proc_dointvec_jiffies,
1579 },
1580 {
1da177e4
LT
1581 .procname = "timeout_lastack",
1582 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1583 .maxlen = sizeof(int),
1584 .mode = 0644,
1585 .proc_handler = &proc_dointvec_jiffies,
1586 },
1587 {
1da177e4
LT
1588 .procname = "timeout_listen",
1589 .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1590 .maxlen = sizeof(int),
1591 .mode = 0644,
1592 .proc_handler = &proc_dointvec_jiffies,
1593 },
1594 {
1da177e4
LT
1595 .procname = "timeout_synack",
1596 .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1597 .maxlen = sizeof(int),
1598 .mode = 0644,
1599 .proc_handler = &proc_dointvec_jiffies,
1600 },
1601 {
1da177e4
LT
1602 .procname = "timeout_udp",
1603 .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1604 .maxlen = sizeof(int),
1605 .mode = 0644,
1606 .proc_handler = &proc_dointvec_jiffies,
1607 },
1608 {
1da177e4
LT
1609 .procname = "timeout_icmp",
1610 .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1611 .maxlen = sizeof(int),
1612 .mode = 0644,
1613 .proc_handler = &proc_dointvec_jiffies,
1614 },
1615#endif
1616 {
1da177e4
LT
1617 .procname = "cache_bypass",
1618 .data = &sysctl_ip_vs_cache_bypass,
1619 .maxlen = sizeof(int),
1620 .mode = 0644,
1621 .proc_handler = &proc_dointvec,
1622 },
1623 {
1da177e4
LT
1624 .procname = "expire_nodest_conn",
1625 .data = &sysctl_ip_vs_expire_nodest_conn,
1626 .maxlen = sizeof(int),
1627 .mode = 0644,
1628 .proc_handler = &proc_dointvec,
1629 },
1630 {
1da177e4
LT
1631 .procname = "expire_quiescent_template",
1632 .data = &sysctl_ip_vs_expire_quiescent_template,
1633 .maxlen = sizeof(int),
1634 .mode = 0644,
1635 .proc_handler = &proc_dointvec,
1636 },
1637 {
1da177e4
LT
1638 .procname = "sync_threshold",
1639 .data = &sysctl_ip_vs_sync_threshold,
1640 .maxlen = sizeof(sysctl_ip_vs_sync_threshold),
1641 .mode = 0644,
1642 .proc_handler = &proc_do_sync_threshold,
1643 },
1644 {
1da177e4
LT
1645 .procname = "nat_icmp_send",
1646 .data = &sysctl_ip_vs_nat_icmp_send,
1647 .maxlen = sizeof(int),
1648 .mode = 0644,
1649 .proc_handler = &proc_dointvec,
1650 },
1651 { .ctl_name = 0 }
1652};
1653
5587da55 1654const struct ctl_path net_vs_ctl_path[] = {
90754f8e
PE
1655 { .procname = "net", .ctl_name = CTL_NET, },
1656 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1657 { .procname = "vs", },
1658 { }
1da177e4 1659};
90754f8e 1660EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1da177e4
LT
1661
1662static struct ctl_table_header * sysctl_header;
1663
1664#ifdef CONFIG_PROC_FS
1665
1666struct ip_vs_iter {
1667 struct list_head *table;
1668 int bucket;
1669};
1670
1671/*
1672 * Write the contents of the VS rule table to a PROCfs file.
1673 * (It is kept just for backward compatibility)
1674 */
1675static inline const char *ip_vs_fwd_name(unsigned flags)
1676{
1677 switch (flags & IP_VS_CONN_F_FWD_MASK) {
1678 case IP_VS_CONN_F_LOCALNODE:
1679 return "Local";
1680 case IP_VS_CONN_F_TUNNEL:
1681 return "Tunnel";
1682 case IP_VS_CONN_F_DROUTE:
1683 return "Route";
1684 default:
1685 return "Masq";
1686 }
1687}
1688
1689
1690/* Get the Nth entry in the two lists */
1691static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1692{
1693 struct ip_vs_iter *iter = seq->private;
1694 int idx;
1695 struct ip_vs_service *svc;
1696
1697 /* look in hash by protocol */
1698 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1699 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1700 if (pos-- == 0){
1701 iter->table = ip_vs_svc_table;
1702 iter->bucket = idx;
1703 return svc;
1704 }
1705 }
1706 }
1707
1708 /* keep looking in fwmark */
1709 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1710 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1711 if (pos-- == 0) {
1712 iter->table = ip_vs_svc_fwm_table;
1713 iter->bucket = idx;
1714 return svc;
1715 }
1716 }
1717 }
1718
1719 return NULL;
1720}
1721
1722static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1723{
1724
1725 read_lock_bh(&__ip_vs_svc_lock);
1726 return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1727}
1728
1729
1730static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1731{
1732 struct list_head *e;
1733 struct ip_vs_iter *iter;
1734 struct ip_vs_service *svc;
1735
1736 ++*pos;
1737 if (v == SEQ_START_TOKEN)
1738 return ip_vs_info_array(seq,0);
1739
1740 svc = v;
1741 iter = seq->private;
1742
1743 if (iter->table == ip_vs_svc_table) {
1744 /* next service in table hashed by protocol */
1745 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1746 return list_entry(e, struct ip_vs_service, s_list);
1747
1748
1749 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1750 list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1751 s_list) {
1752 return svc;
1753 }
1754 }
1755
1756 iter->table = ip_vs_svc_fwm_table;
1757 iter->bucket = -1;
1758 goto scan_fwmark;
1759 }
1760
1761 /* next service in hashed by fwmark */
1762 if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1763 return list_entry(e, struct ip_vs_service, f_list);
1764
1765 scan_fwmark:
1766 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1767 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1768 f_list)
1769 return svc;
1770 }
1771
1772 return NULL;
1773}
1774
1775static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1776{
1777 read_unlock_bh(&__ip_vs_svc_lock);
1778}
1779
1780
1781static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1782{
1783 if (v == SEQ_START_TOKEN) {
1784 seq_printf(seq,
1785 "IP Virtual Server version %d.%d.%d (size=%d)\n",
1786 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1787 seq_puts(seq,
1788 "Prot LocalAddress:Port Scheduler Flags\n");
1789 seq_puts(seq,
1790 " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1791 } else {
1792 const struct ip_vs_service *svc = v;
1793 const struct ip_vs_iter *iter = seq->private;
1794 const struct ip_vs_dest *dest;
1795
1796 if (iter->table == ip_vs_svc_table)
1797 seq_printf(seq, "%s %08X:%04X %s ",
1798 ip_vs_proto_name(svc->protocol),
e7ade46a 1799 ntohl(svc->addr.ip),
1da177e4
LT
1800 ntohs(svc->port),
1801 svc->scheduler->name);
1802 else
1803 seq_printf(seq, "FWM %08X %s ",
1804 svc->fwmark, svc->scheduler->name);
1805
1806 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1807 seq_printf(seq, "persistent %d %08X\n",
1808 svc->timeout,
1809 ntohl(svc->netmask));
1810 else
1811 seq_putc(seq, '\n');
1812
1813 list_for_each_entry(dest, &svc->destinations, n_list) {
1814 seq_printf(seq,
1815 " -> %08X:%04X %-7s %-6d %-10d %-10d\n",
e7ade46a 1816 ntohl(dest->addr.ip), ntohs(dest->port),
1da177e4
LT
1817 ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1818 atomic_read(&dest->weight),
1819 atomic_read(&dest->activeconns),
1820 atomic_read(&dest->inactconns));
1821 }
1822 }
1823 return 0;
1824}
1825
56b3d975 1826static const struct seq_operations ip_vs_info_seq_ops = {
1da177e4
LT
1827 .start = ip_vs_info_seq_start,
1828 .next = ip_vs_info_seq_next,
1829 .stop = ip_vs_info_seq_stop,
1830 .show = ip_vs_info_seq_show,
1831};
1832
1833static int ip_vs_info_open(struct inode *inode, struct file *file)
1834{
cf7732e4
PE
1835 return seq_open_private(file, &ip_vs_info_seq_ops,
1836 sizeof(struct ip_vs_iter));
1da177e4
LT
1837}
1838
9a32144e 1839static const struct file_operations ip_vs_info_fops = {
1da177e4
LT
1840 .owner = THIS_MODULE,
1841 .open = ip_vs_info_open,
1842 .read = seq_read,
1843 .llseek = seq_lseek,
1844 .release = seq_release_private,
1845};
1846
1847#endif
1848
519e49e8
SW
1849struct ip_vs_stats ip_vs_stats = {
1850 .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1851};
1da177e4
LT
1852
1853#ifdef CONFIG_PROC_FS
1854static int ip_vs_stats_show(struct seq_file *seq, void *v)
1855{
1856
1857/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1858 seq_puts(seq,
1859 " Total Incoming Outgoing Incoming Outgoing\n");
1860 seq_printf(seq,
1861 " Conns Packets Packets Bytes Bytes\n");
1862
1863 spin_lock_bh(&ip_vs_stats.lock);
1864 seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1865 ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1866 (unsigned long long) ip_vs_stats.inbytes,
1867 (unsigned long long) ip_vs_stats.outbytes);
1868
1869/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1870 seq_puts(seq,
1871 " Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
1872 seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1873 ip_vs_stats.cps,
1874 ip_vs_stats.inpps,
1875 ip_vs_stats.outpps,
1876 ip_vs_stats.inbps,
1877 ip_vs_stats.outbps);
1878 spin_unlock_bh(&ip_vs_stats.lock);
1879
1880 return 0;
1881}
1882
1883static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1884{
1885 return single_open(file, ip_vs_stats_show, NULL);
1886}
1887
9a32144e 1888static const struct file_operations ip_vs_stats_fops = {
1da177e4
LT
1889 .owner = THIS_MODULE,
1890 .open = ip_vs_stats_seq_open,
1891 .read = seq_read,
1892 .llseek = seq_lseek,
1893 .release = single_release,
1894};
1895
1896#endif
1897
1898/*
1899 * Set timeout values for tcp tcpfin udp in the timeout_table.
1900 */
1901static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1902{
1903 IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1904 u->tcp_timeout,
1905 u->tcp_fin_timeout,
1906 u->udp_timeout);
1907
1908#ifdef CONFIG_IP_VS_PROTO_TCP
1909 if (u->tcp_timeout) {
1910 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1911 = u->tcp_timeout * HZ;
1912 }
1913
1914 if (u->tcp_fin_timeout) {
1915 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1916 = u->tcp_fin_timeout * HZ;
1917 }
1918#endif
1919
1920#ifdef CONFIG_IP_VS_PROTO_UDP
1921 if (u->udp_timeout) {
1922 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1923 = u->udp_timeout * HZ;
1924 }
1925#endif
1926 return 0;
1927}
1928
1929
1930#define SET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
1931#define SERVICE_ARG_LEN (sizeof(struct ip_vs_service_user))
1932#define SVCDEST_ARG_LEN (sizeof(struct ip_vs_service_user) + \
1933 sizeof(struct ip_vs_dest_user))
1934#define TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
1935#define DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user))
1936#define MAX_ARG_LEN SVCDEST_ARG_LEN
1937
9b5b5cff 1938static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
1da177e4
LT
1939 [SET_CMDID(IP_VS_SO_SET_ADD)] = SERVICE_ARG_LEN,
1940 [SET_CMDID(IP_VS_SO_SET_EDIT)] = SERVICE_ARG_LEN,
1941 [SET_CMDID(IP_VS_SO_SET_DEL)] = SERVICE_ARG_LEN,
1942 [SET_CMDID(IP_VS_SO_SET_FLUSH)] = 0,
1943 [SET_CMDID(IP_VS_SO_SET_ADDDEST)] = SVCDEST_ARG_LEN,
1944 [SET_CMDID(IP_VS_SO_SET_DELDEST)] = SVCDEST_ARG_LEN,
1945 [SET_CMDID(IP_VS_SO_SET_EDITDEST)] = SVCDEST_ARG_LEN,
1946 [SET_CMDID(IP_VS_SO_SET_TIMEOUT)] = TIMEOUT_ARG_LEN,
1947 [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)] = DAEMON_ARG_LEN,
1948 [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)] = DAEMON_ARG_LEN,
1949 [SET_CMDID(IP_VS_SO_SET_ZERO)] = SERVICE_ARG_LEN,
1950};
1951
c860c6b1
JV
1952static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
1953 struct ip_vs_service_user *usvc_compat)
1954{
1955 usvc->af = AF_INET;
1956 usvc->protocol = usvc_compat->protocol;
1957 usvc->addr.ip = usvc_compat->addr;
1958 usvc->port = usvc_compat->port;
1959 usvc->fwmark = usvc_compat->fwmark;
1960
1961 /* Deep copy of sched_name is not needed here */
1962 usvc->sched_name = usvc_compat->sched_name;
1963
1964 usvc->flags = usvc_compat->flags;
1965 usvc->timeout = usvc_compat->timeout;
1966 usvc->netmask = usvc_compat->netmask;
1967}
1968
1969static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
1970 struct ip_vs_dest_user *udest_compat)
1971{
1972 udest->addr.ip = udest_compat->addr;
1973 udest->port = udest_compat->port;
1974 udest->conn_flags = udest_compat->conn_flags;
1975 udest->weight = udest_compat->weight;
1976 udest->u_threshold = udest_compat->u_threshold;
1977 udest->l_threshold = udest_compat->l_threshold;
1978}
1979
1da177e4
LT
1980static int
1981do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1982{
1983 int ret;
1984 unsigned char arg[MAX_ARG_LEN];
c860c6b1
JV
1985 struct ip_vs_service_user *usvc_compat;
1986 struct ip_vs_service_user_kern usvc;
1da177e4 1987 struct ip_vs_service *svc;
c860c6b1
JV
1988 struct ip_vs_dest_user *udest_compat;
1989 struct ip_vs_dest_user_kern udest;
1da177e4
LT
1990
1991 if (!capable(CAP_NET_ADMIN))
1992 return -EPERM;
1993
1994 if (len != set_arglen[SET_CMDID(cmd)]) {
1995 IP_VS_ERR("set_ctl: len %u != %u\n",
1996 len, set_arglen[SET_CMDID(cmd)]);
1997 return -EINVAL;
1998 }
1999
2000 if (copy_from_user(arg, user, len) != 0)
2001 return -EFAULT;
2002
2003 /* increase the module use count */
2004 ip_vs_use_count_inc();
2005
14cc3e2b 2006 if (mutex_lock_interruptible(&__ip_vs_mutex)) {
1da177e4
LT
2007 ret = -ERESTARTSYS;
2008 goto out_dec;
2009 }
2010
2011 if (cmd == IP_VS_SO_SET_FLUSH) {
2012 /* Flush the virtual service */
2013 ret = ip_vs_flush();
2014 goto out_unlock;
2015 } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2016 /* Set timeout values for (tcp tcpfin udp) */
2017 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2018 goto out_unlock;
2019 } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2020 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2021 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2022 goto out_unlock;
2023 } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2024 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2025 ret = stop_sync_thread(dm->state);
2026 goto out_unlock;
2027 }
2028
c860c6b1
JV
2029 usvc_compat = (struct ip_vs_service_user *)arg;
2030 udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2031
2032 /* We only use the new structs internally, so copy userspace compat
2033 * structs to extended internal versions */
2034 ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2035 ip_vs_copy_udest_compat(&udest, udest_compat);
1da177e4
LT
2036
2037 if (cmd == IP_VS_SO_SET_ZERO) {
2038 /* if no service address is set, zero counters in all */
c860c6b1 2039 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
1da177e4
LT
2040 ret = ip_vs_zero_all();
2041 goto out_unlock;
2042 }
2043 }
2044
2045 /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
c860c6b1 2046 if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
1da177e4 2047 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
c860c6b1
JV
2048 usvc.protocol, NIPQUAD(usvc.addr.ip),
2049 ntohs(usvc.port), usvc.sched_name);
1da177e4
LT
2050 ret = -EFAULT;
2051 goto out_unlock;
2052 }
2053
2054 /* Lookup the exact service by <protocol, addr, port> or fwmark */
c860c6b1 2055 if (usvc.fwmark == 0)
b18610de
JV
2056 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2057 &usvc.addr, usvc.port);
1da177e4 2058 else
b18610de 2059 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
1da177e4
LT
2060
2061 if (cmd != IP_VS_SO_SET_ADD
c860c6b1 2062 && (svc == NULL || svc->protocol != usvc.protocol)) {
1da177e4
LT
2063 ret = -ESRCH;
2064 goto out_unlock;
2065 }
2066
2067 switch (cmd) {
2068 case IP_VS_SO_SET_ADD:
2069 if (svc != NULL)
2070 ret = -EEXIST;
2071 else
c860c6b1 2072 ret = ip_vs_add_service(&usvc, &svc);
1da177e4
LT
2073 break;
2074 case IP_VS_SO_SET_EDIT:
c860c6b1 2075 ret = ip_vs_edit_service(svc, &usvc);
1da177e4
LT
2076 break;
2077 case IP_VS_SO_SET_DEL:
2078 ret = ip_vs_del_service(svc);
2079 if (!ret)
2080 goto out_unlock;
2081 break;
2082 case IP_VS_SO_SET_ZERO:
2083 ret = ip_vs_zero_service(svc);
2084 break;
2085 case IP_VS_SO_SET_ADDDEST:
c860c6b1 2086 ret = ip_vs_add_dest(svc, &udest);
1da177e4
LT
2087 break;
2088 case IP_VS_SO_SET_EDITDEST:
c860c6b1 2089 ret = ip_vs_edit_dest(svc, &udest);
1da177e4
LT
2090 break;
2091 case IP_VS_SO_SET_DELDEST:
c860c6b1 2092 ret = ip_vs_del_dest(svc, &udest);
1da177e4
LT
2093 break;
2094 default:
2095 ret = -EINVAL;
2096 }
2097
2098 if (svc)
2099 ip_vs_service_put(svc);
2100
2101 out_unlock:
14cc3e2b 2102 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2103 out_dec:
2104 /* decrease the module use count */
2105 ip_vs_use_count_dec();
2106
2107 return ret;
2108}
2109
2110
2111static void
2112ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2113{
2114 spin_lock_bh(&src->lock);
2115 memcpy(dst, src, (char*)&src->lock - (char*)src);
2116 spin_unlock_bh(&src->lock);
2117}
2118
2119static void
2120ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2121{
2122 dst->protocol = src->protocol;
e7ade46a 2123 dst->addr = src->addr.ip;
1da177e4
LT
2124 dst->port = src->port;
2125 dst->fwmark = src->fwmark;
4da62fc7 2126 strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
1da177e4
LT
2127 dst->flags = src->flags;
2128 dst->timeout = src->timeout / HZ;
2129 dst->netmask = src->netmask;
2130 dst->num_dests = src->num_dests;
2131 ip_vs_copy_stats(&dst->stats, &src->stats);
2132}
2133
2134static inline int
2135__ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2136 struct ip_vs_get_services __user *uptr)
2137{
2138 int idx, count=0;
2139 struct ip_vs_service *svc;
2140 struct ip_vs_service_entry entry;
2141 int ret = 0;
2142
2143 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2144 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2145 if (count >= get->num_services)
2146 goto out;
4da62fc7 2147 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2148 ip_vs_copy_service(&entry, svc);
2149 if (copy_to_user(&uptr->entrytable[count],
2150 &entry, sizeof(entry))) {
2151 ret = -EFAULT;
2152 goto out;
2153 }
2154 count++;
2155 }
2156 }
2157
2158 for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2159 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2160 if (count >= get->num_services)
2161 goto out;
4da62fc7 2162 memset(&entry, 0, sizeof(entry));
1da177e4
LT
2163 ip_vs_copy_service(&entry, svc);
2164 if (copy_to_user(&uptr->entrytable[count],
2165 &entry, sizeof(entry))) {
2166 ret = -EFAULT;
2167 goto out;
2168 }
2169 count++;
2170 }
2171 }
2172 out:
2173 return ret;
2174}
2175
2176static inline int
2177__ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2178 struct ip_vs_get_dests __user *uptr)
2179{
2180 struct ip_vs_service *svc;
b18610de 2181 union nf_inet_addr addr = { .ip = get->addr };
1da177e4
LT
2182 int ret = 0;
2183
2184 if (get->fwmark)
b18610de 2185 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
1da177e4 2186 else
b18610de
JV
2187 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2188 get->port);
2189
1da177e4
LT
2190 if (svc) {
2191 int count = 0;
2192 struct ip_vs_dest *dest;
2193 struct ip_vs_dest_entry entry;
2194
2195 list_for_each_entry(dest, &svc->destinations, n_list) {
2196 if (count >= get->num_dests)
2197 break;
2198
e7ade46a 2199 entry.addr = dest->addr.ip;
1da177e4
LT
2200 entry.port = dest->port;
2201 entry.conn_flags = atomic_read(&dest->conn_flags);
2202 entry.weight = atomic_read(&dest->weight);
2203 entry.u_threshold = dest->u_threshold;
2204 entry.l_threshold = dest->l_threshold;
2205 entry.activeconns = atomic_read(&dest->activeconns);
2206 entry.inactconns = atomic_read(&dest->inactconns);
2207 entry.persistconns = atomic_read(&dest->persistconns);
2208 ip_vs_copy_stats(&entry.stats, &dest->stats);
2209 if (copy_to_user(&uptr->entrytable[count],
2210 &entry, sizeof(entry))) {
2211 ret = -EFAULT;
2212 break;
2213 }
2214 count++;
2215 }
2216 ip_vs_service_put(svc);
2217 } else
2218 ret = -ESRCH;
2219 return ret;
2220}
2221
2222static inline void
2223__ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2224{
2225#ifdef CONFIG_IP_VS_PROTO_TCP
2226 u->tcp_timeout =
2227 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2228 u->tcp_fin_timeout =
2229 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2230#endif
2231#ifdef CONFIG_IP_VS_PROTO_UDP
2232 u->udp_timeout =
2233 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2234#endif
2235}
2236
2237
2238#define GET_CMDID(cmd) (cmd - IP_VS_BASE_CTL)
2239#define GET_INFO_ARG_LEN (sizeof(struct ip_vs_getinfo))
2240#define GET_SERVICES_ARG_LEN (sizeof(struct ip_vs_get_services))
2241#define GET_SERVICE_ARG_LEN (sizeof(struct ip_vs_service_entry))
2242#define GET_DESTS_ARG_LEN (sizeof(struct ip_vs_get_dests))
2243#define GET_TIMEOUT_ARG_LEN (sizeof(struct ip_vs_timeout_user))
2244#define GET_DAEMON_ARG_LEN (sizeof(struct ip_vs_daemon_user) * 2)
2245
9b5b5cff 2246static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
1da177e4
LT
2247 [GET_CMDID(IP_VS_SO_GET_VERSION)] = 64,
2248 [GET_CMDID(IP_VS_SO_GET_INFO)] = GET_INFO_ARG_LEN,
2249 [GET_CMDID(IP_VS_SO_GET_SERVICES)] = GET_SERVICES_ARG_LEN,
2250 [GET_CMDID(IP_VS_SO_GET_SERVICE)] = GET_SERVICE_ARG_LEN,
2251 [GET_CMDID(IP_VS_SO_GET_DESTS)] = GET_DESTS_ARG_LEN,
2252 [GET_CMDID(IP_VS_SO_GET_TIMEOUT)] = GET_TIMEOUT_ARG_LEN,
2253 [GET_CMDID(IP_VS_SO_GET_DAEMON)] = GET_DAEMON_ARG_LEN,
2254};
2255
2256static int
2257do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2258{
2259 unsigned char arg[128];
2260 int ret = 0;
2261
2262 if (!capable(CAP_NET_ADMIN))
2263 return -EPERM;
2264
2265 if (*len < get_arglen[GET_CMDID(cmd)]) {
2266 IP_VS_ERR("get_ctl: len %u < %u\n",
2267 *len, get_arglen[GET_CMDID(cmd)]);
2268 return -EINVAL;
2269 }
2270
2271 if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2272 return -EFAULT;
2273
14cc3e2b 2274 if (mutex_lock_interruptible(&__ip_vs_mutex))
1da177e4
LT
2275 return -ERESTARTSYS;
2276
2277 switch (cmd) {
2278 case IP_VS_SO_GET_VERSION:
2279 {
2280 char buf[64];
2281
2282 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2283 NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2284 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2285 ret = -EFAULT;
2286 goto out;
2287 }
2288 *len = strlen(buf)+1;
2289 }
2290 break;
2291
2292 case IP_VS_SO_GET_INFO:
2293 {
2294 struct ip_vs_getinfo info;
2295 info.version = IP_VS_VERSION_CODE;
2296 info.size = IP_VS_CONN_TAB_SIZE;
2297 info.num_services = ip_vs_num_services;
2298 if (copy_to_user(user, &info, sizeof(info)) != 0)
2299 ret = -EFAULT;
2300 }
2301 break;
2302
2303 case IP_VS_SO_GET_SERVICES:
2304 {
2305 struct ip_vs_get_services *get;
2306 int size;
2307
2308 get = (struct ip_vs_get_services *)arg;
2309 size = sizeof(*get) +
2310 sizeof(struct ip_vs_service_entry) * get->num_services;
2311 if (*len != size) {
2312 IP_VS_ERR("length: %u != %u\n", *len, size);
2313 ret = -EINVAL;
2314 goto out;
2315 }
2316 ret = __ip_vs_get_service_entries(get, user);
2317 }
2318 break;
2319
2320 case IP_VS_SO_GET_SERVICE:
2321 {
2322 struct ip_vs_service_entry *entry;
2323 struct ip_vs_service *svc;
b18610de 2324 union nf_inet_addr addr;
1da177e4
LT
2325
2326 entry = (struct ip_vs_service_entry *)arg;
b18610de 2327 addr.ip = entry->addr;
1da177e4 2328 if (entry->fwmark)
b18610de 2329 svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
1da177e4 2330 else
b18610de
JV
2331 svc = __ip_vs_service_get(AF_INET, entry->protocol,
2332 &addr, entry->port);
1da177e4
LT
2333 if (svc) {
2334 ip_vs_copy_service(entry, svc);
2335 if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2336 ret = -EFAULT;
2337 ip_vs_service_put(svc);
2338 } else
2339 ret = -ESRCH;
2340 }
2341 break;
2342
2343 case IP_VS_SO_GET_DESTS:
2344 {
2345 struct ip_vs_get_dests *get;
2346 int size;
2347
2348 get = (struct ip_vs_get_dests *)arg;
2349 size = sizeof(*get) +
2350 sizeof(struct ip_vs_dest_entry) * get->num_dests;
2351 if (*len != size) {
2352 IP_VS_ERR("length: %u != %u\n", *len, size);
2353 ret = -EINVAL;
2354 goto out;
2355 }
2356 ret = __ip_vs_get_dest_entries(get, user);
2357 }
2358 break;
2359
2360 case IP_VS_SO_GET_TIMEOUT:
2361 {
2362 struct ip_vs_timeout_user t;
2363
2364 __ip_vs_get_timeouts(&t);
2365 if (copy_to_user(user, &t, sizeof(t)) != 0)
2366 ret = -EFAULT;
2367 }
2368 break;
2369
2370 case IP_VS_SO_GET_DAEMON:
2371 {
2372 struct ip_vs_daemon_user d[2];
2373
2374 memset(&d, 0, sizeof(d));
2375 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2376 d[0].state = IP_VS_STATE_MASTER;
4da62fc7 2377 strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
1da177e4
LT
2378 d[0].syncid = ip_vs_master_syncid;
2379 }
2380 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2381 d[1].state = IP_VS_STATE_BACKUP;
4da62fc7 2382 strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
1da177e4
LT
2383 d[1].syncid = ip_vs_backup_syncid;
2384 }
2385 if (copy_to_user(user, &d, sizeof(d)) != 0)
2386 ret = -EFAULT;
2387 }
2388 break;
2389
2390 default:
2391 ret = -EINVAL;
2392 }
2393
2394 out:
14cc3e2b 2395 mutex_unlock(&__ip_vs_mutex);
1da177e4
LT
2396 return ret;
2397}
2398
2399
2400static struct nf_sockopt_ops ip_vs_sockopts = {
2401 .pf = PF_INET,
2402 .set_optmin = IP_VS_BASE_CTL,
2403 .set_optmax = IP_VS_SO_SET_MAX+1,
2404 .set = do_ip_vs_set_ctl,
2405 .get_optmin = IP_VS_BASE_CTL,
2406 .get_optmax = IP_VS_SO_GET_MAX+1,
2407 .get = do_ip_vs_get_ctl,
16fcec35 2408 .owner = THIS_MODULE,
1da177e4
LT
2409};
2410
9a812198
JV
2411/*
2412 * Generic Netlink interface
2413 */
2414
2415/* IPVS genetlink family */
2416static struct genl_family ip_vs_genl_family = {
2417 .id = GENL_ID_GENERATE,
2418 .hdrsize = 0,
2419 .name = IPVS_GENL_NAME,
2420 .version = IPVS_GENL_VERSION,
2421 .maxattr = IPVS_CMD_MAX,
2422};
2423
2424/* Policy used for first-level command attributes */
2425static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2426 [IPVS_CMD_ATTR_SERVICE] = { .type = NLA_NESTED },
2427 [IPVS_CMD_ATTR_DEST] = { .type = NLA_NESTED },
2428 [IPVS_CMD_ATTR_DAEMON] = { .type = NLA_NESTED },
2429 [IPVS_CMD_ATTR_TIMEOUT_TCP] = { .type = NLA_U32 },
2430 [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2431 [IPVS_CMD_ATTR_TIMEOUT_UDP] = { .type = NLA_U32 },
2432};
2433
2434/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2435static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2436 [IPVS_DAEMON_ATTR_STATE] = { .type = NLA_U32 },
2437 [IPVS_DAEMON_ATTR_MCAST_IFN] = { .type = NLA_NUL_STRING,
2438 .len = IP_VS_IFNAME_MAXLEN },
2439 [IPVS_DAEMON_ATTR_SYNC_ID] = { .type = NLA_U32 },
2440};
2441
2442/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2443static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2444 [IPVS_SVC_ATTR_AF] = { .type = NLA_U16 },
2445 [IPVS_SVC_ATTR_PROTOCOL] = { .type = NLA_U16 },
2446 [IPVS_SVC_ATTR_ADDR] = { .type = NLA_BINARY,
2447 .len = sizeof(union nf_inet_addr) },
2448 [IPVS_SVC_ATTR_PORT] = { .type = NLA_U16 },
2449 [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 },
2450 [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING,
2451 .len = IP_VS_SCHEDNAME_MAXLEN },
2452 [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY,
2453 .len = sizeof(struct ip_vs_flags) },
2454 [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 },
2455 [IPVS_SVC_ATTR_NETMASK] = { .type = NLA_U32 },
2456 [IPVS_SVC_ATTR_STATS] = { .type = NLA_NESTED },
2457};
2458
2459/* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2460static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2461 [IPVS_DEST_ATTR_ADDR] = { .type = NLA_BINARY,
2462 .len = sizeof(union nf_inet_addr) },
2463 [IPVS_DEST_ATTR_PORT] = { .type = NLA_U16 },
2464 [IPVS_DEST_ATTR_FWD_METHOD] = { .type = NLA_U32 },
2465 [IPVS_DEST_ATTR_WEIGHT] = { .type = NLA_U32 },
2466 [IPVS_DEST_ATTR_U_THRESH] = { .type = NLA_U32 },
2467 [IPVS_DEST_ATTR_L_THRESH] = { .type = NLA_U32 },
2468 [IPVS_DEST_ATTR_ACTIVE_CONNS] = { .type = NLA_U32 },
2469 [IPVS_DEST_ATTR_INACT_CONNS] = { .type = NLA_U32 },
2470 [IPVS_DEST_ATTR_PERSIST_CONNS] = { .type = NLA_U32 },
2471 [IPVS_DEST_ATTR_STATS] = { .type = NLA_NESTED },
2472};
2473
2474static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2475 struct ip_vs_stats *stats)
2476{
2477 struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2478 if (!nl_stats)
2479 return -EMSGSIZE;
2480
2481 spin_lock_bh(&stats->lock);
2482
2483 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2484 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2485 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2486 NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2487 NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2488 NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2489 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2490 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2491 NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2492 NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2493
2494 spin_unlock_bh(&stats->lock);
2495
2496 nla_nest_end(skb, nl_stats);
2497
2498 return 0;
2499
2500nla_put_failure:
2501 spin_unlock_bh(&stats->lock);
2502 nla_nest_cancel(skb, nl_stats);
2503 return -EMSGSIZE;
2504}
2505
2506static int ip_vs_genl_fill_service(struct sk_buff *skb,
2507 struct ip_vs_service *svc)
2508{
2509 struct nlattr *nl_service;
2510 struct ip_vs_flags flags = { .flags = svc->flags,
2511 .mask = ~0 };
2512
2513 nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2514 if (!nl_service)
2515 return -EMSGSIZE;
2516
2517 NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2518
2519 if (svc->fwmark) {
2520 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2521 } else {
2522 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2523 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2524 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2525 }
2526
2527 NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2528 NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2529 NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2530 NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2531
2532 if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2533 goto nla_put_failure;
2534
2535 nla_nest_end(skb, nl_service);
2536
2537 return 0;
2538
2539nla_put_failure:
2540 nla_nest_cancel(skb, nl_service);
2541 return -EMSGSIZE;
2542}
2543
2544static int ip_vs_genl_dump_service(struct sk_buff *skb,
2545 struct ip_vs_service *svc,
2546 struct netlink_callback *cb)
2547{
2548 void *hdr;
2549
2550 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2551 &ip_vs_genl_family, NLM_F_MULTI,
2552 IPVS_CMD_NEW_SERVICE);
2553 if (!hdr)
2554 return -EMSGSIZE;
2555
2556 if (ip_vs_genl_fill_service(skb, svc) < 0)
2557 goto nla_put_failure;
2558
2559 return genlmsg_end(skb, hdr);
2560
2561nla_put_failure:
2562 genlmsg_cancel(skb, hdr);
2563 return -EMSGSIZE;
2564}
2565
2566static int ip_vs_genl_dump_services(struct sk_buff *skb,
2567 struct netlink_callback *cb)
2568{
2569 int idx = 0, i;
2570 int start = cb->args[0];
2571 struct ip_vs_service *svc;
2572
2573 mutex_lock(&__ip_vs_mutex);
2574 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2575 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2576 if (++idx <= start)
2577 continue;
2578 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2579 idx--;
2580 goto nla_put_failure;
2581 }
2582 }
2583 }
2584
2585 for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2586 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2587 if (++idx <= start)
2588 continue;
2589 if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2590 idx--;
2591 goto nla_put_failure;
2592 }
2593 }
2594 }
2595
2596nla_put_failure:
2597 mutex_unlock(&__ip_vs_mutex);
2598 cb->args[0] = idx;
2599
2600 return skb->len;
2601}
2602
c860c6b1 2603static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
9a812198
JV
2604 struct nlattr *nla, int full_entry)
2605{
2606 struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2607 struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2608
2609 /* Parse mandatory identifying service fields first */
2610 if (nla == NULL ||
2611 nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2612 return -EINVAL;
2613
2614 nla_af = attrs[IPVS_SVC_ATTR_AF];
2615 nla_protocol = attrs[IPVS_SVC_ATTR_PROTOCOL];
2616 nla_addr = attrs[IPVS_SVC_ATTR_ADDR];
2617 nla_port = attrs[IPVS_SVC_ATTR_PORT];
2618 nla_fwmark = attrs[IPVS_SVC_ATTR_FWMARK];
2619
2620 if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2621 return -EINVAL;
2622
c860c6b1 2623 usvc->af = nla_get_u16(nla_af);
9a812198
JV
2624 /* For now, only support IPv4 */
2625 if (nla_get_u16(nla_af) != AF_INET)
2626 return -EAFNOSUPPORT;
2627
2628 if (nla_fwmark) {
2629 usvc->protocol = IPPROTO_TCP;
2630 usvc->fwmark = nla_get_u32(nla_fwmark);
2631 } else {
2632 usvc->protocol = nla_get_u16(nla_protocol);
2633 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2634 usvc->port = nla_get_u16(nla_port);
2635 usvc->fwmark = 0;
2636 }
2637
2638 /* If a full entry was requested, check for the additional fields */
2639 if (full_entry) {
2640 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2641 *nla_netmask;
2642 struct ip_vs_flags flags;
2643 struct ip_vs_service *svc;
2644
2645 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2646 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2647 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2648 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2649
2650 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2651 return -EINVAL;
2652
2653 nla_memcpy(&flags, nla_flags, sizeof(flags));
2654
2655 /* prefill flags from service if it already exists */
2656 if (usvc->fwmark)
b18610de 2657 svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
9a812198 2658 else
b18610de
JV
2659 svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2660 &usvc->addr, usvc->port);
9a812198
JV
2661 if (svc) {
2662 usvc->flags = svc->flags;
2663 ip_vs_service_put(svc);
2664 } else
2665 usvc->flags = 0;
2666
2667 /* set new flags from userland */
2668 usvc->flags = (usvc->flags & ~flags.mask) |
2669 (flags.flags & flags.mask);
c860c6b1 2670 usvc->sched_name = nla_data(nla_sched);
9a812198
JV
2671 usvc->timeout = nla_get_u32(nla_timeout);
2672 usvc->netmask = nla_get_u32(nla_netmask);
2673 }
2674
2675 return 0;
2676}
2677
2678static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2679{
c860c6b1 2680 struct ip_vs_service_user_kern usvc;
9a812198
JV
2681 int ret;
2682
2683 ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2684 if (ret)
2685 return ERR_PTR(ret);
2686
2687 if (usvc.fwmark)
b18610de 2688 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198 2689 else
b18610de
JV
2690 return __ip_vs_service_get(usvc.af, usvc.protocol,
2691 &usvc.addr, usvc.port);
9a812198
JV
2692}
2693
2694static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2695{
2696 struct nlattr *nl_dest;
2697
2698 nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2699 if (!nl_dest)
2700 return -EMSGSIZE;
2701
2702 NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2703 NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2704
2705 NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2706 atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2707 NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2708 NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2709 NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2710 NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2711 atomic_read(&dest->activeconns));
2712 NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2713 atomic_read(&dest->inactconns));
2714 NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2715 atomic_read(&dest->persistconns));
2716
2717 if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2718 goto nla_put_failure;
2719
2720 nla_nest_end(skb, nl_dest);
2721
2722 return 0;
2723
2724nla_put_failure:
2725 nla_nest_cancel(skb, nl_dest);
2726 return -EMSGSIZE;
2727}
2728
2729static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2730 struct netlink_callback *cb)
2731{
2732 void *hdr;
2733
2734 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2735 &ip_vs_genl_family, NLM_F_MULTI,
2736 IPVS_CMD_NEW_DEST);
2737 if (!hdr)
2738 return -EMSGSIZE;
2739
2740 if (ip_vs_genl_fill_dest(skb, dest) < 0)
2741 goto nla_put_failure;
2742
2743 return genlmsg_end(skb, hdr);
2744
2745nla_put_failure:
2746 genlmsg_cancel(skb, hdr);
2747 return -EMSGSIZE;
2748}
2749
2750static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2751 struct netlink_callback *cb)
2752{
2753 int idx = 0;
2754 int start = cb->args[0];
2755 struct ip_vs_service *svc;
2756 struct ip_vs_dest *dest;
2757 struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2758
2759 mutex_lock(&__ip_vs_mutex);
2760
2761 /* Try to find the service for which to dump destinations */
2762 if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2763 IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2764 goto out_err;
2765
2766 svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2767 if (IS_ERR(svc) || svc == NULL)
2768 goto out_err;
2769
2770 /* Dump the destinations */
2771 list_for_each_entry(dest, &svc->destinations, n_list) {
2772 if (++idx <= start)
2773 continue;
2774 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2775 idx--;
2776 goto nla_put_failure;
2777 }
2778 }
2779
2780nla_put_failure:
2781 cb->args[0] = idx;
2782 ip_vs_service_put(svc);
2783
2784out_err:
2785 mutex_unlock(&__ip_vs_mutex);
2786
2787 return skb->len;
2788}
2789
c860c6b1 2790static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
9a812198
JV
2791 struct nlattr *nla, int full_entry)
2792{
2793 struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2794 struct nlattr *nla_addr, *nla_port;
2795
2796 /* Parse mandatory identifying destination fields first */
2797 if (nla == NULL ||
2798 nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2799 return -EINVAL;
2800
2801 nla_addr = attrs[IPVS_DEST_ATTR_ADDR];
2802 nla_port = attrs[IPVS_DEST_ATTR_PORT];
2803
2804 if (!(nla_addr && nla_port))
2805 return -EINVAL;
2806
2807 nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2808 udest->port = nla_get_u16(nla_port);
2809
2810 /* If a full entry was requested, check for the additional fields */
2811 if (full_entry) {
2812 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2813 *nla_l_thresh;
2814
2815 nla_fwd = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2816 nla_weight = attrs[IPVS_DEST_ATTR_WEIGHT];
2817 nla_u_thresh = attrs[IPVS_DEST_ATTR_U_THRESH];
2818 nla_l_thresh = attrs[IPVS_DEST_ATTR_L_THRESH];
2819
2820 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2821 return -EINVAL;
2822
2823 udest->conn_flags = nla_get_u32(nla_fwd)
2824 & IP_VS_CONN_F_FWD_MASK;
2825 udest->weight = nla_get_u32(nla_weight);
2826 udest->u_threshold = nla_get_u32(nla_u_thresh);
2827 udest->l_threshold = nla_get_u32(nla_l_thresh);
2828 }
2829
2830 return 0;
2831}
2832
2833static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2834 const char *mcast_ifn, __be32 syncid)
2835{
2836 struct nlattr *nl_daemon;
2837
2838 nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2839 if (!nl_daemon)
2840 return -EMSGSIZE;
2841
2842 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2843 NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2844 NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2845
2846 nla_nest_end(skb, nl_daemon);
2847
2848 return 0;
2849
2850nla_put_failure:
2851 nla_nest_cancel(skb, nl_daemon);
2852 return -EMSGSIZE;
2853}
2854
2855static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2856 const char *mcast_ifn, __be32 syncid,
2857 struct netlink_callback *cb)
2858{
2859 void *hdr;
2860 hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2861 &ip_vs_genl_family, NLM_F_MULTI,
2862 IPVS_CMD_NEW_DAEMON);
2863 if (!hdr)
2864 return -EMSGSIZE;
2865
2866 if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2867 goto nla_put_failure;
2868
2869 return genlmsg_end(skb, hdr);
2870
2871nla_put_failure:
2872 genlmsg_cancel(skb, hdr);
2873 return -EMSGSIZE;
2874}
2875
2876static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2877 struct netlink_callback *cb)
2878{
2879 mutex_lock(&__ip_vs_mutex);
2880 if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2881 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2882 ip_vs_master_mcast_ifn,
2883 ip_vs_master_syncid, cb) < 0)
2884 goto nla_put_failure;
2885
2886 cb->args[0] = 1;
2887 }
2888
2889 if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2890 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2891 ip_vs_backup_mcast_ifn,
2892 ip_vs_backup_syncid, cb) < 0)
2893 goto nla_put_failure;
2894
2895 cb->args[1] = 1;
2896 }
2897
2898nla_put_failure:
2899 mutex_unlock(&__ip_vs_mutex);
2900
2901 return skb->len;
2902}
2903
2904static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2905{
2906 if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2907 attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2908 attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2909 return -EINVAL;
2910
2911 return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2912 nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2913 nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2914}
2915
2916static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2917{
2918 if (!attrs[IPVS_DAEMON_ATTR_STATE])
2919 return -EINVAL;
2920
2921 return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2922}
2923
2924static int ip_vs_genl_set_config(struct nlattr **attrs)
2925{
2926 struct ip_vs_timeout_user t;
2927
2928 __ip_vs_get_timeouts(&t);
2929
2930 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
2931 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
2932
2933 if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
2934 t.tcp_fin_timeout =
2935 nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
2936
2937 if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
2938 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
2939
2940 return ip_vs_set_timeout(&t);
2941}
2942
2943static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
2944{
2945 struct ip_vs_service *svc = NULL;
c860c6b1
JV
2946 struct ip_vs_service_user_kern usvc;
2947 struct ip_vs_dest_user_kern udest;
9a812198
JV
2948 int ret = 0, cmd;
2949 int need_full_svc = 0, need_full_dest = 0;
2950
2951 cmd = info->genlhdr->cmd;
2952
2953 mutex_lock(&__ip_vs_mutex);
2954
2955 if (cmd == IPVS_CMD_FLUSH) {
2956 ret = ip_vs_flush();
2957 goto out;
2958 } else if (cmd == IPVS_CMD_SET_CONFIG) {
2959 ret = ip_vs_genl_set_config(info->attrs);
2960 goto out;
2961 } else if (cmd == IPVS_CMD_NEW_DAEMON ||
2962 cmd == IPVS_CMD_DEL_DAEMON) {
2963
2964 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
2965
2966 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
2967 nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
2968 info->attrs[IPVS_CMD_ATTR_DAEMON],
2969 ip_vs_daemon_policy)) {
2970 ret = -EINVAL;
2971 goto out;
2972 }
2973
2974 if (cmd == IPVS_CMD_NEW_DAEMON)
2975 ret = ip_vs_genl_new_daemon(daemon_attrs);
2976 else
2977 ret = ip_vs_genl_del_daemon(daemon_attrs);
2978 goto out;
2979 } else if (cmd == IPVS_CMD_ZERO &&
2980 !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
2981 ret = ip_vs_zero_all();
2982 goto out;
2983 }
2984
2985 /* All following commands require a service argument, so check if we
2986 * received a valid one. We need a full service specification when
2987 * adding / editing a service. Only identifying members otherwise. */
2988 if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
2989 need_full_svc = 1;
2990
2991 ret = ip_vs_genl_parse_service(&usvc,
2992 info->attrs[IPVS_CMD_ATTR_SERVICE],
2993 need_full_svc);
2994 if (ret)
2995 goto out;
2996
2997 /* Lookup the exact service by <protocol, addr, port> or fwmark */
2998 if (usvc.fwmark == 0)
b18610de
JV
2999 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3000 &usvc.addr, usvc.port);
9a812198 3001 else
b18610de 3002 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
9a812198
JV
3003
3004 /* Unless we're adding a new service, the service must already exist */
3005 if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3006 ret = -ESRCH;
3007 goto out;
3008 }
3009
3010 /* Destination commands require a valid destination argument. For
3011 * adding / editing a destination, we need a full destination
3012 * specification. */
3013 if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3014 cmd == IPVS_CMD_DEL_DEST) {
3015 if (cmd != IPVS_CMD_DEL_DEST)
3016 need_full_dest = 1;
3017
3018 ret = ip_vs_genl_parse_dest(&udest,
3019 info->attrs[IPVS_CMD_ATTR_DEST],
3020 need_full_dest);
3021 if (ret)
3022 goto out;
3023 }
3024
3025 switch (cmd) {
3026 case IPVS_CMD_NEW_SERVICE:
3027 if (svc == NULL)
3028 ret = ip_vs_add_service(&usvc, &svc);
3029 else
3030 ret = -EEXIST;
3031 break;
3032 case IPVS_CMD_SET_SERVICE:
3033 ret = ip_vs_edit_service(svc, &usvc);
3034 break;
3035 case IPVS_CMD_DEL_SERVICE:
3036 ret = ip_vs_del_service(svc);
3037 break;
3038 case IPVS_CMD_NEW_DEST:
3039 ret = ip_vs_add_dest(svc, &udest);
3040 break;
3041 case IPVS_CMD_SET_DEST:
3042 ret = ip_vs_edit_dest(svc, &udest);
3043 break;
3044 case IPVS_CMD_DEL_DEST:
3045 ret = ip_vs_del_dest(svc, &udest);
3046 break;
3047 case IPVS_CMD_ZERO:
3048 ret = ip_vs_zero_service(svc);
3049 break;
3050 default:
3051 ret = -EINVAL;
3052 }
3053
3054out:
3055 if (svc)
3056 ip_vs_service_put(svc);
3057 mutex_unlock(&__ip_vs_mutex);
3058
3059 return ret;
3060}
3061
3062static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3063{
3064 struct sk_buff *msg;
3065 void *reply;
3066 int ret, cmd, reply_cmd;
3067
3068 cmd = info->genlhdr->cmd;
3069
3070 if (cmd == IPVS_CMD_GET_SERVICE)
3071 reply_cmd = IPVS_CMD_NEW_SERVICE;
3072 else if (cmd == IPVS_CMD_GET_INFO)
3073 reply_cmd = IPVS_CMD_SET_INFO;
3074 else if (cmd == IPVS_CMD_GET_CONFIG)
3075 reply_cmd = IPVS_CMD_SET_CONFIG;
3076 else {
3077 IP_VS_ERR("unknown Generic Netlink command\n");
3078 return -EINVAL;
3079 }
3080
3081 msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3082 if (!msg)
3083 return -ENOMEM;
3084
3085 mutex_lock(&__ip_vs_mutex);
3086
3087 reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3088 if (reply == NULL)
3089 goto nla_put_failure;
3090
3091 switch (cmd) {
3092 case IPVS_CMD_GET_SERVICE:
3093 {
3094 struct ip_vs_service *svc;
3095
3096 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3097 if (IS_ERR(svc)) {
3098 ret = PTR_ERR(svc);
3099 goto out_err;
3100 } else if (svc) {
3101 ret = ip_vs_genl_fill_service(msg, svc);
3102 ip_vs_service_put(svc);
3103 if (ret)
3104 goto nla_put_failure;
3105 } else {
3106 ret = -ESRCH;
3107 goto out_err;
3108 }
3109
3110 break;
3111 }
3112
3113 case IPVS_CMD_GET_CONFIG:
3114 {
3115 struct ip_vs_timeout_user t;
3116
3117 __ip_vs_get_timeouts(&t);
3118#ifdef CONFIG_IP_VS_PROTO_TCP
3119 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3120 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3121 t.tcp_fin_timeout);
3122#endif
3123#ifdef CONFIG_IP_VS_PROTO_UDP
3124 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3125#endif
3126
3127 break;
3128 }
3129
3130 case IPVS_CMD_GET_INFO:
3131 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3132 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3133 IP_VS_CONN_TAB_SIZE);
3134 break;
3135 }
3136
3137 genlmsg_end(msg, reply);
3138 ret = genlmsg_unicast(msg, info->snd_pid);
3139 goto out;
3140
3141nla_put_failure:
3142 IP_VS_ERR("not enough space in Netlink message\n");
3143 ret = -EMSGSIZE;
3144
3145out_err:
3146 nlmsg_free(msg);
3147out:
3148 mutex_unlock(&__ip_vs_mutex);
3149
3150 return ret;
3151}
3152
3153
3154static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3155 {
3156 .cmd = IPVS_CMD_NEW_SERVICE,
3157 .flags = GENL_ADMIN_PERM,
3158 .policy = ip_vs_cmd_policy,
3159 .doit = ip_vs_genl_set_cmd,
3160 },
3161 {
3162 .cmd = IPVS_CMD_SET_SERVICE,
3163 .flags = GENL_ADMIN_PERM,
3164 .policy = ip_vs_cmd_policy,
3165 .doit = ip_vs_genl_set_cmd,
3166 },
3167 {
3168 .cmd = IPVS_CMD_DEL_SERVICE,
3169 .flags = GENL_ADMIN_PERM,
3170 .policy = ip_vs_cmd_policy,
3171 .doit = ip_vs_genl_set_cmd,
3172 },
3173 {
3174 .cmd = IPVS_CMD_GET_SERVICE,
3175 .flags = GENL_ADMIN_PERM,
3176 .doit = ip_vs_genl_get_cmd,
3177 .dumpit = ip_vs_genl_dump_services,
3178 .policy = ip_vs_cmd_policy,
3179 },
3180 {
3181 .cmd = IPVS_CMD_NEW_DEST,
3182 .flags = GENL_ADMIN_PERM,
3183 .policy = ip_vs_cmd_policy,
3184 .doit = ip_vs_genl_set_cmd,
3185 },
3186 {
3187 .cmd = IPVS_CMD_SET_DEST,
3188 .flags = GENL_ADMIN_PERM,
3189 .policy = ip_vs_cmd_policy,
3190 .doit = ip_vs_genl_set_cmd,
3191 },
3192 {
3193 .cmd = IPVS_CMD_DEL_DEST,
3194 .flags = GENL_ADMIN_PERM,
3195 .policy = ip_vs_cmd_policy,
3196 .doit = ip_vs_genl_set_cmd,
3197 },
3198 {
3199 .cmd = IPVS_CMD_GET_DEST,
3200 .flags = GENL_ADMIN_PERM,
3201 .policy = ip_vs_cmd_policy,
3202 .dumpit = ip_vs_genl_dump_dests,
3203 },
3204 {
3205 .cmd = IPVS_CMD_NEW_DAEMON,
3206 .flags = GENL_ADMIN_PERM,
3207 .policy = ip_vs_cmd_policy,
3208 .doit = ip_vs_genl_set_cmd,
3209 },
3210 {
3211 .cmd = IPVS_CMD_DEL_DAEMON,
3212 .flags = GENL_ADMIN_PERM,
3213 .policy = ip_vs_cmd_policy,
3214 .doit = ip_vs_genl_set_cmd,
3215 },
3216 {
3217 .cmd = IPVS_CMD_GET_DAEMON,
3218 .flags = GENL_ADMIN_PERM,
3219 .dumpit = ip_vs_genl_dump_daemons,
3220 },
3221 {
3222 .cmd = IPVS_CMD_SET_CONFIG,
3223 .flags = GENL_ADMIN_PERM,
3224 .policy = ip_vs_cmd_policy,
3225 .doit = ip_vs_genl_set_cmd,
3226 },
3227 {
3228 .cmd = IPVS_CMD_GET_CONFIG,
3229 .flags = GENL_ADMIN_PERM,
3230 .doit = ip_vs_genl_get_cmd,
3231 },
3232 {
3233 .cmd = IPVS_CMD_GET_INFO,
3234 .flags = GENL_ADMIN_PERM,
3235 .doit = ip_vs_genl_get_cmd,
3236 },
3237 {
3238 .cmd = IPVS_CMD_ZERO,
3239 .flags = GENL_ADMIN_PERM,
3240 .policy = ip_vs_cmd_policy,
3241 .doit = ip_vs_genl_set_cmd,
3242 },
3243 {
3244 .cmd = IPVS_CMD_FLUSH,
3245 .flags = GENL_ADMIN_PERM,
3246 .doit = ip_vs_genl_set_cmd,
3247 },
3248};
3249
3250static int __init ip_vs_genl_register(void)
3251{
3252 int ret, i;
3253
3254 ret = genl_register_family(&ip_vs_genl_family);
3255 if (ret)
3256 return ret;
3257
3258 for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3259 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3260 if (ret)
3261 goto err_out;
3262 }
3263 return 0;
3264
3265err_out:
3266 genl_unregister_family(&ip_vs_genl_family);
3267 return ret;
3268}
3269
3270static void ip_vs_genl_unregister(void)
3271{
3272 genl_unregister_family(&ip_vs_genl_family);
3273}
3274
3275/* End of Generic Netlink interface definitions */
3276
1da177e4 3277
048cf48b 3278int __init ip_vs_control_init(void)
1da177e4
LT
3279{
3280 int ret;
3281 int idx;
3282
3283 EnterFunction(2);
3284
3285 ret = nf_register_sockopt(&ip_vs_sockopts);
3286 if (ret) {
3287 IP_VS_ERR("cannot register sockopt.\n");
3288 return ret;
3289 }
3290
9a812198
JV
3291 ret = ip_vs_genl_register();
3292 if (ret) {
3293 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3294 nf_unregister_sockopt(&ip_vs_sockopts);
3295 return ret;
3296 }
3297
457c4cbc
EB
3298 proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3299 proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
1da177e4 3300
90754f8e 3301 sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
1da177e4
LT
3302
3303 /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3304 for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
3305 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3306 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3307 }
3308 for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++) {
3309 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3310 }
3311
1da177e4
LT
3312 ip_vs_new_estimator(&ip_vs_stats);
3313
3314 /* Hook the defense timer */
3315 schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3316
3317 LeaveFunction(2);
3318 return 0;
3319}
3320
3321
3322void ip_vs_control_cleanup(void)
3323{
3324 EnterFunction(2);
3325 ip_vs_trash_cleanup();
3326 cancel_rearming_delayed_work(&defense_work);
28e53bdd 3327 cancel_work_sync(&defense_work.work);
1da177e4
LT
3328 ip_vs_kill_estimator(&ip_vs_stats);
3329 unregister_sysctl_table(sysctl_header);
457c4cbc
EB
3330 proc_net_remove(&init_net, "ip_vs_stats");
3331 proc_net_remove(&init_net, "ip_vs");
9a812198 3332 ip_vs_genl_unregister();
1da177e4
LT
3333 nf_unregister_sockopt(&ip_vs_sockopts);
3334 LeaveFunction(2);
3335}