]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/ipvs/ip_vs_ctl.c
ipvs: Mark tcp/udp v4 and v6 debug functions static
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/capability.h>
25 #include <linux/fs.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/workqueue.h>
29 #include <linux/swap.h>
30 #include <linux/seq_file.h>
31
32 #include <linux/netfilter.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/mutex.h>
35
36 #include <net/net_namespace.h>
37 #include <net/ip.h>
38 #ifdef CONFIG_IP_VS_IPV6
39 #include <net/ipv6.h>
40 #include <net/ip6_route.h>
41 #endif
42 #include <net/route.h>
43 #include <net/sock.h>
44 #include <net/genetlink.h>
45
46 #include <asm/uaccess.h>
47
48 #include <net/ip_vs.h>
49
50 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
51 static DEFINE_MUTEX(__ip_vs_mutex);
52
53 /* lock for service table */
54 static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56 /* lock for table with the real services */
57 static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59 /* lock for state and timeout tables */
60 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62 /* lock for drop entry handling */
63 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65 /* lock for drop packet handling */
66 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68 /* 1/rate drop and drop-entry variables */
69 int ip_vs_drop_rate = 0;
70 int ip_vs_drop_counter = 0;
71 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73 /* number of virtual services */
74 static int ip_vs_num_services = 0;
75
76 /* sysctl variables */
77 static int sysctl_ip_vs_drop_entry = 0;
78 static int sysctl_ip_vs_drop_packet = 0;
79 static int sysctl_ip_vs_secure_tcp = 0;
80 static int sysctl_ip_vs_amemthresh = 1024;
81 static int sysctl_ip_vs_am_droprate = 10;
82 int sysctl_ip_vs_cache_bypass = 0;
83 int sysctl_ip_vs_expire_nodest_conn = 0;
84 int sysctl_ip_vs_expire_quiescent_template = 0;
85 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86 int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89 #ifdef CONFIG_IP_VS_DEBUG
90 static int sysctl_ip_vs_debug_level = 0;
91
92 int ip_vs_get_debug_level(void)
93 {
94         return sysctl_ip_vs_debug_level;
95 }
96 #endif
97
98 #ifdef CONFIG_IP_VS_IPV6
99 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101 {
102         struct rt6_info *rt;
103         struct flowi fl = {
104                 .oif = 0,
105                 .nl_u = {
106                         .ip6_u = {
107                                 .daddr = *addr,
108                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109         };
110
111         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113                         return 1;
114
115         return 0;
116 }
117 #endif
118 /*
119  *      update_defense_level is called from keventd and from sysctl,
120  *      so it needs to protect itself from softirqs
121  */
122 static void update_defense_level(void)
123 {
124         struct sysinfo i;
125         static int old_secure_tcp = 0;
126         int availmem;
127         int nomem;
128         int to_change = -1;
129
130         /* we only count free and buffered memory (in pages) */
131         si_meminfo(&i);
132         availmem = i.freeram + i.bufferram;
133         /* however in linux 2.5 the i.bufferram is total page cache size,
134            we need adjust it */
135         /* si_swapinfo(&i); */
136         /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138         nomem = (availmem < sysctl_ip_vs_amemthresh);
139
140         local_bh_disable();
141
142         /* drop_entry */
143         spin_lock(&__ip_vs_dropentry_lock);
144         switch (sysctl_ip_vs_drop_entry) {
145         case 0:
146                 atomic_set(&ip_vs_dropentry, 0);
147                 break;
148         case 1:
149                 if (nomem) {
150                         atomic_set(&ip_vs_dropentry, 1);
151                         sysctl_ip_vs_drop_entry = 2;
152                 } else {
153                         atomic_set(&ip_vs_dropentry, 0);
154                 }
155                 break;
156         case 2:
157                 if (nomem) {
158                         atomic_set(&ip_vs_dropentry, 1);
159                 } else {
160                         atomic_set(&ip_vs_dropentry, 0);
161                         sysctl_ip_vs_drop_entry = 1;
162                 };
163                 break;
164         case 3:
165                 atomic_set(&ip_vs_dropentry, 1);
166                 break;
167         }
168         spin_unlock(&__ip_vs_dropentry_lock);
169
170         /* drop_packet */
171         spin_lock(&__ip_vs_droppacket_lock);
172         switch (sysctl_ip_vs_drop_packet) {
173         case 0:
174                 ip_vs_drop_rate = 0;
175                 break;
176         case 1:
177                 if (nomem) {
178                         ip_vs_drop_rate = ip_vs_drop_counter
179                                 = sysctl_ip_vs_amemthresh /
180                                 (sysctl_ip_vs_amemthresh-availmem);
181                         sysctl_ip_vs_drop_packet = 2;
182                 } else {
183                         ip_vs_drop_rate = 0;
184                 }
185                 break;
186         case 2:
187                 if (nomem) {
188                         ip_vs_drop_rate = ip_vs_drop_counter
189                                 = sysctl_ip_vs_amemthresh /
190                                 (sysctl_ip_vs_amemthresh-availmem);
191                 } else {
192                         ip_vs_drop_rate = 0;
193                         sysctl_ip_vs_drop_packet = 1;
194                 }
195                 break;
196         case 3:
197                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198                 break;
199         }
200         spin_unlock(&__ip_vs_droppacket_lock);
201
202         /* secure_tcp */
203         write_lock(&__ip_vs_securetcp_lock);
204         switch (sysctl_ip_vs_secure_tcp) {
205         case 0:
206                 if (old_secure_tcp >= 2)
207                         to_change = 0;
208                 break;
209         case 1:
210                 if (nomem) {
211                         if (old_secure_tcp < 2)
212                                 to_change = 1;
213                         sysctl_ip_vs_secure_tcp = 2;
214                 } else {
215                         if (old_secure_tcp >= 2)
216                                 to_change = 0;
217                 }
218                 break;
219         case 2:
220                 if (nomem) {
221                         if (old_secure_tcp < 2)
222                                 to_change = 1;
223                 } else {
224                         if (old_secure_tcp >= 2)
225                                 to_change = 0;
226                         sysctl_ip_vs_secure_tcp = 1;
227                 }
228                 break;
229         case 3:
230                 if (old_secure_tcp < 2)
231                         to_change = 1;
232                 break;
233         }
234         old_secure_tcp = sysctl_ip_vs_secure_tcp;
235         if (to_change >= 0)
236                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237         write_unlock(&__ip_vs_securetcp_lock);
238
239         local_bh_enable();
240 }
241
242
243 /*
244  *      Timer for checking the defense
245  */
246 #define DEFENSE_TIMER_PERIOD    1*HZ
247 static void defense_work_handler(struct work_struct *work);
248 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
249
250 static void defense_work_handler(struct work_struct *work)
251 {
252         update_defense_level();
253         if (atomic_read(&ip_vs_dropentry))
254                 ip_vs_random_dropentry();
255
256         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257 }
258
259 int
260 ip_vs_use_count_inc(void)
261 {
262         return try_module_get(THIS_MODULE);
263 }
264
265 void
266 ip_vs_use_count_dec(void)
267 {
268         module_put(THIS_MODULE);
269 }
270
271
272 /*
273  *      Hash table: for virtual service lookups
274  */
275 #define IP_VS_SVC_TAB_BITS 8
276 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279 /* the service table hashed by <protocol, addr, port> */
280 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281 /* the service table hashed by fwmark */
282 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284 /*
285  *      Hash table: for real service lookups
286  */
287 #define IP_VS_RTAB_BITS 4
288 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293 /*
294  *      Trash for destinations
295  */
296 static LIST_HEAD(ip_vs_dest_trash);
297
298 /*
299  *      FTP & NULL virtual service counters
300  */
301 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305 /*
306  *      Returns hash value for virtual service
307  */
308 static __inline__ unsigned
309 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310                   __be16 port)
311 {
312         register unsigned porth = ntohs(port);
313         __be32 addr_fold = addr->ip;
314
315 #ifdef CONFIG_IP_VS_IPV6
316         if (af == AF_INET6)
317                 addr_fold = addr->ip6[0]^addr->ip6[1]^
318                             addr->ip6[2]^addr->ip6[3];
319 #endif
320
321         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
322                 & IP_VS_SVC_TAB_MASK;
323 }
324
325 /*
326  *      Returns hash value of fwmark for virtual service lookup
327  */
328 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329 {
330         return fwmark & IP_VS_SVC_TAB_MASK;
331 }
332
333 /*
334  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335  *      or in the ip_vs_svc_fwm_table by fwmark.
336  *      Should be called with locked tables.
337  */
338 static int ip_vs_svc_hash(struct ip_vs_service *svc)
339 {
340         unsigned hash;
341
342         if (svc->flags & IP_VS_SVC_F_HASHED) {
343                 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344                           "called from %p\n", __builtin_return_address(0));
345                 return 0;
346         }
347
348         if (svc->fwmark == 0) {
349                 /*
350                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
351                  */
352                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
353                                          svc->port);
354                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355         } else {
356                 /*
357                  *  Hash it by fwmark in ip_vs_svc_fwm_table
358                  */
359                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361         }
362
363         svc->flags |= IP_VS_SVC_F_HASHED;
364         /* increase its refcnt because it is referenced by the svc table */
365         atomic_inc(&svc->refcnt);
366         return 1;
367 }
368
369
370 /*
371  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372  *      Should be called with locked tables.
373  */
374 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375 {
376         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377                 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378                           "called from %p\n", __builtin_return_address(0));
379                 return 0;
380         }
381
382         if (svc->fwmark == 0) {
383                 /* Remove it from the ip_vs_svc_table table */
384                 list_del(&svc->s_list);
385         } else {
386                 /* Remove it from the ip_vs_svc_fwm_table table */
387                 list_del(&svc->f_list);
388         }
389
390         svc->flags &= ~IP_VS_SVC_F_HASHED;
391         atomic_dec(&svc->refcnt);
392         return 1;
393 }
394
395
396 /*
397  *      Get service by {proto,addr,port} in the service table.
398  */
399 static inline struct ip_vs_service *
400 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401                     __be16 vport)
402 {
403         unsigned hash;
404         struct ip_vs_service *svc;
405
406         /* Check for "full" addressed entries */
407         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
408
409         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
410                 if ((svc->af == af)
411                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
412                     && (svc->port == vport)
413                     && (svc->protocol == protocol)) {
414                         /* HIT */
415                         atomic_inc(&svc->usecnt);
416                         return svc;
417                 }
418         }
419
420         return NULL;
421 }
422
423
424 /*
425  *      Get service by {fwmark} in the service table.
426  */
427 static inline struct ip_vs_service *
428 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
429 {
430         unsigned hash;
431         struct ip_vs_service *svc;
432
433         /* Check for fwmark addressed entries */
434         hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
437                 if (svc->fwmark == fwmark && svc->af == af) {
438                         /* HIT */
439                         atomic_inc(&svc->usecnt);
440                         return svc;
441                 }
442         }
443
444         return NULL;
445 }
446
447 struct ip_vs_service *
448 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449                   const union nf_inet_addr *vaddr, __be16 vport)
450 {
451         struct ip_vs_service *svc;
452
453         read_lock(&__ip_vs_svc_lock);
454
455         /*
456          *      Check the table hashed by fwmark first
457          */
458         if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
459                 goto out;
460
461         /*
462          *      Check the table hashed by <protocol,addr,port>
463          *      for "full" addressed entries
464          */
465         svc = __ip_vs_service_get(af, protocol, vaddr, vport);
466
467         if (svc == NULL
468             && protocol == IPPROTO_TCP
469             && atomic_read(&ip_vs_ftpsvc_counter)
470             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471                 /*
472                  * Check if ftp service entry exists, the packet
473                  * might belong to FTP data connections.
474                  */
475                 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
476         }
477
478         if (svc == NULL
479             && atomic_read(&ip_vs_nullsvc_counter)) {
480                 /*
481                  * Check if the catch-all port (port zero) exists
482                  */
483                 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
484         }
485
486   out:
487         read_unlock(&__ip_vs_svc_lock);
488
489         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490                       fwmark, ip_vs_proto_name(protocol),
491                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492                       svc ? "hit" : "not hit");
493
494         return svc;
495 }
496
497
498 static inline void
499 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500 {
501         atomic_inc(&svc->refcnt);
502         dest->svc = svc;
503 }
504
505 static inline void
506 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
507 {
508         struct ip_vs_service *svc = dest->svc;
509
510         dest->svc = NULL;
511         if (atomic_dec_and_test(&svc->refcnt))
512                 kfree(svc);
513 }
514
515
516 /*
517  *      Returns hash value for real service
518  */
519 static inline unsigned ip_vs_rs_hashkey(int af,
520                                             const union nf_inet_addr *addr,
521                                             __be16 port)
522 {
523         register unsigned porth = ntohs(port);
524         __be32 addr_fold = addr->ip;
525
526 #ifdef CONFIG_IP_VS_IPV6
527         if (af == AF_INET6)
528                 addr_fold = addr->ip6[0]^addr->ip6[1]^
529                             addr->ip6[2]^addr->ip6[3];
530 #endif
531
532         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
533                 & IP_VS_RTAB_MASK;
534 }
535
536 /*
537  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538  *      should be called with locked tables.
539  */
540 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541 {
542         unsigned hash;
543
544         if (!list_empty(&dest->d_list)) {
545                 return 0;
546         }
547
548         /*
549          *      Hash by proto,addr,port,
550          *      which are the parameters of the real service.
551          */
552         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
554         list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556         return 1;
557 }
558
559 /*
560  *      UNhashes ip_vs_dest from ip_vs_rtable.
561  *      should be called with locked tables.
562  */
563 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564 {
565         /*
566          * Remove it from the ip_vs_rtable table.
567          */
568         if (!list_empty(&dest->d_list)) {
569                 list_del(&dest->d_list);
570                 INIT_LIST_HEAD(&dest->d_list);
571         }
572
573         return 1;
574 }
575
576 /*
577  *      Lookup real service by <proto,addr,port> in the real service table.
578  */
579 struct ip_vs_dest *
580 ip_vs_lookup_real_service(int af, __u16 protocol,
581                           const union nf_inet_addr *daddr,
582                           __be16 dport)
583 {
584         unsigned hash;
585         struct ip_vs_dest *dest;
586
587         /*
588          *      Check for "full" addressed entries
589          *      Return the first found entry
590          */
591         hash = ip_vs_rs_hashkey(af, daddr, dport);
592
593         read_lock(&__ip_vs_rs_lock);
594         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
595                 if ((dest->af == af)
596                     && ip_vs_addr_equal(af, &dest->addr, daddr)
597                     && (dest->port == dport)
598                     && ((dest->protocol == protocol) ||
599                         dest->vfwmark)) {
600                         /* HIT */
601                         read_unlock(&__ip_vs_rs_lock);
602                         return dest;
603                 }
604         }
605         read_unlock(&__ip_vs_rs_lock);
606
607         return NULL;
608 }
609
610 /*
611  *      Lookup destination by {addr,port} in the given service
612  */
613 static struct ip_vs_dest *
614 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615                   __be16 dport)
616 {
617         struct ip_vs_dest *dest;
618
619         /*
620          * Find the destination for the given service
621          */
622         list_for_each_entry(dest, &svc->destinations, n_list) {
623                 if ((dest->af == svc->af)
624                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625                     && (dest->port == dport)) {
626                         /* HIT */
627                         return dest;
628                 }
629         }
630
631         return NULL;
632 }
633
634 /*
635  * Find destination by {daddr,dport,vaddr,protocol}
636  * Cretaed to be used in ip_vs_process_message() in
637  * the backup synchronization daemon. It finds the
638  * destination to be bound to the received connection
639  * on the backup.
640  *
641  * ip_vs_lookup_real_service() looked promissing, but
642  * seems not working as expected.
643  */
644 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645                                    __be16 dport,
646                                    const union nf_inet_addr *vaddr,
647                                    __be16 vport, __u16 protocol)
648 {
649         struct ip_vs_dest *dest;
650         struct ip_vs_service *svc;
651
652         svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
653         if (!svc)
654                 return NULL;
655         dest = ip_vs_lookup_dest(svc, daddr, dport);
656         if (dest)
657                 atomic_inc(&dest->refcnt);
658         ip_vs_service_put(svc);
659         return dest;
660 }
661
662 /*
663  *  Lookup dest by {svc,addr,port} in the destination trash.
664  *  The destination trash is used to hold the destinations that are removed
665  *  from the service table but are still referenced by some conn entries.
666  *  The reason to add the destination trash is when the dest is temporary
667  *  down (either by administrator or by monitor program), the dest can be
668  *  picked back from the trash, the remaining connections to the dest can
669  *  continue, and the counting information of the dest is also useful for
670  *  scheduling.
671  */
672 static struct ip_vs_dest *
673 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674                      __be16 dport)
675 {
676         struct ip_vs_dest *dest, *nxt;
677
678         /*
679          * Find the destination in trash
680          */
681         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
682                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683                               "dest->refcnt=%d\n",
684                               dest->vfwmark,
685                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
686                               ntohs(dest->port),
687                               atomic_read(&dest->refcnt));
688                 if (dest->af == svc->af &&
689                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
690                     dest->port == dport &&
691                     dest->vfwmark == svc->fwmark &&
692                     dest->protocol == svc->protocol &&
693                     (svc->fwmark ||
694                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
695                       dest->vport == svc->port))) {
696                         /* HIT */
697                         return dest;
698                 }
699
700                 /*
701                  * Try to purge the destination from trash if not referenced
702                  */
703                 if (atomic_read(&dest->refcnt) == 1) {
704                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705                                       "from trash\n",
706                                       dest->vfwmark,
707                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
708                                       ntohs(dest->port));
709                         list_del(&dest->n_list);
710                         ip_vs_dst_reset(dest);
711                         __ip_vs_unbind_svc(dest);
712                         kfree(dest);
713                 }
714         }
715
716         return NULL;
717 }
718
719
720 /*
721  *  Clean up all the destinations in the trash
722  *  Called by the ip_vs_control_cleanup()
723  *
724  *  When the ip_vs_control_clearup is activated by ipvs module exit,
725  *  the service tables must have been flushed and all the connections
726  *  are expired, and the refcnt of each destination in the trash must
727  *  be 1, so we simply release them here.
728  */
729 static void ip_vs_trash_cleanup(void)
730 {
731         struct ip_vs_dest *dest, *nxt;
732
733         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734                 list_del(&dest->n_list);
735                 ip_vs_dst_reset(dest);
736                 __ip_vs_unbind_svc(dest);
737                 kfree(dest);
738         }
739 }
740
741
742 static void
743 ip_vs_zero_stats(struct ip_vs_stats *stats)
744 {
745         spin_lock_bh(&stats->lock);
746
747         stats->conns = 0;
748         stats->inpkts = 0;
749         stats->outpkts = 0;
750         stats->inbytes = 0;
751         stats->outbytes = 0;
752
753         stats->cps = 0;
754         stats->inpps = 0;
755         stats->outpps = 0;
756         stats->inbps = 0;
757         stats->outbps = 0;
758
759         ip_vs_zero_estimator(stats);
760
761         spin_unlock_bh(&stats->lock);
762 }
763
764 /*
765  *      Update a destination in the given service
766  */
767 static void
768 __ip_vs_update_dest(struct ip_vs_service *svc,
769                     struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
770 {
771         int conn_flags;
772
773         /* set the weight and the flags */
774         atomic_set(&dest->weight, udest->weight);
775         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
776
777         /* check if local node and update the flags */
778 #ifdef CONFIG_IP_VS_IPV6
779         if (svc->af == AF_INET6) {
780                 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782                                 | IP_VS_CONN_F_LOCALNODE;
783                 }
784         } else
785 #endif
786                 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788                                 | IP_VS_CONN_F_LOCALNODE;
789                 }
790
791         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794         } else {
795                 /*
796                  *    Put the real service in ip_vs_rtable if not present.
797                  *    For now only for NAT!
798                  */
799                 write_lock_bh(&__ip_vs_rs_lock);
800                 ip_vs_rs_hash(dest);
801                 write_unlock_bh(&__ip_vs_rs_lock);
802         }
803         atomic_set(&dest->conn_flags, conn_flags);
804
805         /* bind the service */
806         if (!dest->svc) {
807                 __ip_vs_bind_svc(dest, svc);
808         } else {
809                 if (dest->svc != svc) {
810                         __ip_vs_unbind_svc(dest);
811                         ip_vs_zero_stats(&dest->stats);
812                         __ip_vs_bind_svc(dest, svc);
813                 }
814         }
815
816         /* set the dest status flags */
817         dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821         dest->u_threshold = udest->u_threshold;
822         dest->l_threshold = udest->l_threshold;
823 }
824
825
826 /*
827  *      Create a destination for the given service
828  */
829 static int
830 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
831                struct ip_vs_dest **dest_p)
832 {
833         struct ip_vs_dest *dest;
834         unsigned atype;
835
836         EnterFunction(2);
837
838 #ifdef CONFIG_IP_VS_IPV6
839         if (svc->af == AF_INET6) {
840                 atype = ipv6_addr_type(&udest->addr.in6);
841                 if (!(atype & IPV6_ADDR_UNICAST) &&
842                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
843                         return -EINVAL;
844         } else
845 #endif
846         {
847                 atype = inet_addr_type(&init_net, udest->addr.ip);
848                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
849                         return -EINVAL;
850         }
851
852         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
853         if (dest == NULL) {
854                 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
855                 return -ENOMEM;
856         }
857
858         dest->af = svc->af;
859         dest->protocol = svc->protocol;
860         dest->vaddr = svc->addr;
861         dest->vport = svc->port;
862         dest->vfwmark = svc->fwmark;
863         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
864         dest->port = udest->port;
865
866         atomic_set(&dest->activeconns, 0);
867         atomic_set(&dest->inactconns, 0);
868         atomic_set(&dest->persistconns, 0);
869         atomic_set(&dest->refcnt, 0);
870
871         INIT_LIST_HEAD(&dest->d_list);
872         spin_lock_init(&dest->dst_lock);
873         spin_lock_init(&dest->stats.lock);
874         __ip_vs_update_dest(svc, dest, udest);
875         ip_vs_new_estimator(&dest->stats);
876
877         *dest_p = dest;
878
879         LeaveFunction(2);
880         return 0;
881 }
882
883
884 /*
885  *      Add a destination into an existing service
886  */
887 static int
888 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
889 {
890         struct ip_vs_dest *dest;
891         union nf_inet_addr daddr;
892         __be16 dport = udest->port;
893         int ret;
894
895         EnterFunction(2);
896
897         if (udest->weight < 0) {
898                 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
899                 return -ERANGE;
900         }
901
902         if (udest->l_threshold > udest->u_threshold) {
903                 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
904                           "upper threshold\n");
905                 return -ERANGE;
906         }
907
908         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
909
910         /*
911          * Check if the dest already exists in the list
912          */
913         dest = ip_vs_lookup_dest(svc, &daddr, dport);
914
915         if (dest != NULL) {
916                 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
917                 return -EEXIST;
918         }
919
920         /*
921          * Check if the dest already exists in the trash and
922          * is from the same service
923          */
924         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
925
926         if (dest != NULL) {
927                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
928                               "dest->refcnt=%d, service %u/%s:%u\n",
929                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
930                               atomic_read(&dest->refcnt),
931                               dest->vfwmark,
932                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
933                               ntohs(dest->vport));
934
935                 __ip_vs_update_dest(svc, dest, udest);
936
937                 /*
938                  * Get the destination from the trash
939                  */
940                 list_del(&dest->n_list);
941
942                 ip_vs_new_estimator(&dest->stats);
943
944                 write_lock_bh(&__ip_vs_svc_lock);
945
946                 /*
947                  * Wait until all other svc users go away.
948                  */
949                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
950
951                 list_add(&dest->n_list, &svc->destinations);
952                 svc->num_dests++;
953
954                 /* call the update_service function of its scheduler */
955                 if (svc->scheduler->update_service)
956                         svc->scheduler->update_service(svc);
957
958                 write_unlock_bh(&__ip_vs_svc_lock);
959                 return 0;
960         }
961
962         /*
963          * Allocate and initialize the dest structure
964          */
965         ret = ip_vs_new_dest(svc, udest, &dest);
966         if (ret) {
967                 return ret;
968         }
969
970         /*
971          * Add the dest entry into the list
972          */
973         atomic_inc(&dest->refcnt);
974
975         write_lock_bh(&__ip_vs_svc_lock);
976
977         /*
978          * Wait until all other svc users go away.
979          */
980         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
981
982         list_add(&dest->n_list, &svc->destinations);
983         svc->num_dests++;
984
985         /* call the update_service function of its scheduler */
986         if (svc->scheduler->update_service)
987                 svc->scheduler->update_service(svc);
988
989         write_unlock_bh(&__ip_vs_svc_lock);
990
991         LeaveFunction(2);
992
993         return 0;
994 }
995
996
997 /*
998  *      Edit a destination in the given service
999  */
1000 static int
1001 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1002 {
1003         struct ip_vs_dest *dest;
1004         union nf_inet_addr daddr;
1005         __be16 dport = udest->port;
1006
1007         EnterFunction(2);
1008
1009         if (udest->weight < 0) {
1010                 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1011                 return -ERANGE;
1012         }
1013
1014         if (udest->l_threshold > udest->u_threshold) {
1015                 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1016                           "upper threshold\n");
1017                 return -ERANGE;
1018         }
1019
1020         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1021
1022         /*
1023          *  Lookup the destination list
1024          */
1025         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1026
1027         if (dest == NULL) {
1028                 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1029                 return -ENOENT;
1030         }
1031
1032         __ip_vs_update_dest(svc, dest, udest);
1033
1034         write_lock_bh(&__ip_vs_svc_lock);
1035
1036         /* Wait until all other svc users go away */
1037         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1038
1039         /* call the update_service, because server weight may be changed */
1040         if (svc->scheduler->update_service)
1041                 svc->scheduler->update_service(svc);
1042
1043         write_unlock_bh(&__ip_vs_svc_lock);
1044
1045         LeaveFunction(2);
1046
1047         return 0;
1048 }
1049
1050
1051 /*
1052  *      Delete a destination (must be already unlinked from the service)
1053  */
1054 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1055 {
1056         ip_vs_kill_estimator(&dest->stats);
1057
1058         /*
1059          *  Remove it from the d-linked list with the real services.
1060          */
1061         write_lock_bh(&__ip_vs_rs_lock);
1062         ip_vs_rs_unhash(dest);
1063         write_unlock_bh(&__ip_vs_rs_lock);
1064
1065         /*
1066          *  Decrease the refcnt of the dest, and free the dest
1067          *  if nobody refers to it (refcnt=0). Otherwise, throw
1068          *  the destination into the trash.
1069          */
1070         if (atomic_dec_and_test(&dest->refcnt)) {
1071                 ip_vs_dst_reset(dest);
1072                 /* simply decrease svc->refcnt here, let the caller check
1073                    and release the service if nobody refers to it.
1074                    Only user context can release destination and service,
1075                    and only one user context can update virtual service at a
1076                    time, so the operation here is OK */
1077                 atomic_dec(&dest->svc->refcnt);
1078                 kfree(dest);
1079         } else {
1080                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1081                               "dest->refcnt=%d\n",
1082                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1083                               ntohs(dest->port),
1084                               atomic_read(&dest->refcnt));
1085                 list_add(&dest->n_list, &ip_vs_dest_trash);
1086                 atomic_inc(&dest->refcnt);
1087         }
1088 }
1089
1090
1091 /*
1092  *      Unlink a destination from the given service
1093  */
1094 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1095                                 struct ip_vs_dest *dest,
1096                                 int svcupd)
1097 {
1098         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1099
1100         /*
1101          *  Remove it from the d-linked destination list.
1102          */
1103         list_del(&dest->n_list);
1104         svc->num_dests--;
1105
1106         /*
1107          *  Call the update_service function of its scheduler
1108          */
1109         if (svcupd && svc->scheduler->update_service)
1110                         svc->scheduler->update_service(svc);
1111 }
1112
1113
1114 /*
1115  *      Delete a destination server in the given service
1116  */
1117 static int
1118 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1119 {
1120         struct ip_vs_dest *dest;
1121         __be16 dport = udest->port;
1122
1123         EnterFunction(2);
1124
1125         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1126
1127         if (dest == NULL) {
1128                 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1129                 return -ENOENT;
1130         }
1131
1132         write_lock_bh(&__ip_vs_svc_lock);
1133
1134         /*
1135          *      Wait until all other svc users go away.
1136          */
1137         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1138
1139         /*
1140          *      Unlink dest from the service
1141          */
1142         __ip_vs_unlink_dest(svc, dest, 1);
1143
1144         write_unlock_bh(&__ip_vs_svc_lock);
1145
1146         /*
1147          *      Delete the destination
1148          */
1149         __ip_vs_del_dest(dest);
1150
1151         LeaveFunction(2);
1152
1153         return 0;
1154 }
1155
1156
1157 /*
1158  *      Add a service into the service hash table
1159  */
1160 static int
1161 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1162                   struct ip_vs_service **svc_p)
1163 {
1164         int ret = 0;
1165         struct ip_vs_scheduler *sched = NULL;
1166         struct ip_vs_service *svc = NULL;
1167
1168         /* increase the module use count */
1169         ip_vs_use_count_inc();
1170
1171         /* Lookup the scheduler by 'u->sched_name' */
1172         sched = ip_vs_scheduler_get(u->sched_name);
1173         if (sched == NULL) {
1174                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1175                            u->sched_name);
1176                 ret = -ENOENT;
1177                 goto out_mod_dec;
1178         }
1179
1180 #ifdef CONFIG_IP_VS_IPV6
1181         if (u->af == AF_INET6) {
1182                 if (!sched->supports_ipv6) {
1183                         ret = -EAFNOSUPPORT;
1184                         goto out_err;
1185                 }
1186                 if ((u->netmask < 1) || (u->netmask > 128)) {
1187                         ret = -EINVAL;
1188                         goto out_err;
1189                 }
1190         }
1191 #endif
1192
1193         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1194         if (svc == NULL) {
1195                 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1196                 ret = -ENOMEM;
1197                 goto out_err;
1198         }
1199
1200         /* I'm the first user of the service */
1201         atomic_set(&svc->usecnt, 1);
1202         atomic_set(&svc->refcnt, 0);
1203
1204         svc->af = u->af;
1205         svc->protocol = u->protocol;
1206         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1207         svc->port = u->port;
1208         svc->fwmark = u->fwmark;
1209         svc->flags = u->flags;
1210         svc->timeout = u->timeout * HZ;
1211         svc->netmask = u->netmask;
1212
1213         INIT_LIST_HEAD(&svc->destinations);
1214         rwlock_init(&svc->sched_lock);
1215         spin_lock_init(&svc->stats.lock);
1216
1217         /* Bind the scheduler */
1218         ret = ip_vs_bind_scheduler(svc, sched);
1219         if (ret)
1220                 goto out_err;
1221         sched = NULL;
1222
1223         /* Update the virtual service counters */
1224         if (svc->port == FTPPORT)
1225                 atomic_inc(&ip_vs_ftpsvc_counter);
1226         else if (svc->port == 0)
1227                 atomic_inc(&ip_vs_nullsvc_counter);
1228
1229         ip_vs_new_estimator(&svc->stats);
1230
1231         /* Count only IPv4 services for old get/setsockopt interface */
1232         if (svc->af == AF_INET)
1233                 ip_vs_num_services++;
1234
1235         /* Hash the service into the service table */
1236         write_lock_bh(&__ip_vs_svc_lock);
1237         ip_vs_svc_hash(svc);
1238         write_unlock_bh(&__ip_vs_svc_lock);
1239
1240         *svc_p = svc;
1241         return 0;
1242
1243   out_err:
1244         if (svc != NULL) {
1245                 if (svc->scheduler)
1246                         ip_vs_unbind_scheduler(svc);
1247                 if (svc->inc) {
1248                         local_bh_disable();
1249                         ip_vs_app_inc_put(svc->inc);
1250                         local_bh_enable();
1251                 }
1252                 kfree(svc);
1253         }
1254         ip_vs_scheduler_put(sched);
1255
1256   out_mod_dec:
1257         /* decrease the module use count */
1258         ip_vs_use_count_dec();
1259
1260         return ret;
1261 }
1262
1263
1264 /*
1265  *      Edit a service and bind it with a new scheduler
1266  */
1267 static int
1268 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1269 {
1270         struct ip_vs_scheduler *sched, *old_sched;
1271         int ret = 0;
1272
1273         /*
1274          * Lookup the scheduler, by 'u->sched_name'
1275          */
1276         sched = ip_vs_scheduler_get(u->sched_name);
1277         if (sched == NULL) {
1278                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1279                            u->sched_name);
1280                 return -ENOENT;
1281         }
1282         old_sched = sched;
1283
1284 #ifdef CONFIG_IP_VS_IPV6
1285         if (u->af == AF_INET6) {
1286                 if (!sched->supports_ipv6) {
1287                         ret = -EAFNOSUPPORT;
1288                         goto out;
1289                 }
1290                 if ((u->netmask < 1) || (u->netmask > 128)) {
1291                         ret = -EINVAL;
1292                         goto out;
1293                 }
1294         }
1295 #endif
1296
1297         write_lock_bh(&__ip_vs_svc_lock);
1298
1299         /*
1300          * Wait until all other svc users go away.
1301          */
1302         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1303
1304         /*
1305          * Set the flags and timeout value
1306          */
1307         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1308         svc->timeout = u->timeout * HZ;
1309         svc->netmask = u->netmask;
1310
1311         old_sched = svc->scheduler;
1312         if (sched != old_sched) {
1313                 /*
1314                  * Unbind the old scheduler
1315                  */
1316                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1317                         old_sched = sched;
1318                         goto out;
1319                 }
1320
1321                 /*
1322                  * Bind the new scheduler
1323                  */
1324                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1325                         /*
1326                          * If ip_vs_bind_scheduler fails, restore the old
1327                          * scheduler.
1328                          * The main reason of failure is out of memory.
1329                          *
1330                          * The question is if the old scheduler can be
1331                          * restored all the time. TODO: if it cannot be
1332                          * restored some time, we must delete the service,
1333                          * otherwise the system may crash.
1334                          */
1335                         ip_vs_bind_scheduler(svc, old_sched);
1336                         old_sched = sched;
1337                         goto out;
1338                 }
1339         }
1340
1341   out:
1342         write_unlock_bh(&__ip_vs_svc_lock);
1343
1344         if (old_sched)
1345                 ip_vs_scheduler_put(old_sched);
1346
1347         return ret;
1348 }
1349
1350
1351 /*
1352  *      Delete a service from the service list
1353  *      - The service must be unlinked, unlocked and not referenced!
1354  *      - We are called under _bh lock
1355  */
1356 static void __ip_vs_del_service(struct ip_vs_service *svc)
1357 {
1358         struct ip_vs_dest *dest, *nxt;
1359         struct ip_vs_scheduler *old_sched;
1360
1361         /* Count only IPv4 services for old get/setsockopt interface */
1362         if (svc->af == AF_INET)
1363                 ip_vs_num_services--;
1364
1365         ip_vs_kill_estimator(&svc->stats);
1366
1367         /* Unbind scheduler */
1368         old_sched = svc->scheduler;
1369         ip_vs_unbind_scheduler(svc);
1370         if (old_sched)
1371                 ip_vs_scheduler_put(old_sched);
1372
1373         /* Unbind app inc */
1374         if (svc->inc) {
1375                 ip_vs_app_inc_put(svc->inc);
1376                 svc->inc = NULL;
1377         }
1378
1379         /*
1380          *    Unlink the whole destination list
1381          */
1382         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1383                 __ip_vs_unlink_dest(svc, dest, 0);
1384                 __ip_vs_del_dest(dest);
1385         }
1386
1387         /*
1388          *    Update the virtual service counters
1389          */
1390         if (svc->port == FTPPORT)
1391                 atomic_dec(&ip_vs_ftpsvc_counter);
1392         else if (svc->port == 0)
1393                 atomic_dec(&ip_vs_nullsvc_counter);
1394
1395         /*
1396          *    Free the service if nobody refers to it
1397          */
1398         if (atomic_read(&svc->refcnt) == 0)
1399                 kfree(svc);
1400
1401         /* decrease the module use count */
1402         ip_vs_use_count_dec();
1403 }
1404
1405 /*
1406  *      Delete a service from the service list
1407  */
1408 static int ip_vs_del_service(struct ip_vs_service *svc)
1409 {
1410         if (svc == NULL)
1411                 return -EEXIST;
1412
1413         /*
1414          * Unhash it from the service table
1415          */
1416         write_lock_bh(&__ip_vs_svc_lock);
1417
1418         ip_vs_svc_unhash(svc);
1419
1420         /*
1421          * Wait until all the svc users go away.
1422          */
1423         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1424
1425         __ip_vs_del_service(svc);
1426
1427         write_unlock_bh(&__ip_vs_svc_lock);
1428
1429         return 0;
1430 }
1431
1432
1433 /*
1434  *      Flush all the virtual services
1435  */
1436 static int ip_vs_flush(void)
1437 {
1438         int idx;
1439         struct ip_vs_service *svc, *nxt;
1440
1441         /*
1442          * Flush the service table hashed by <protocol,addr,port>
1443          */
1444         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1445                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1446                         write_lock_bh(&__ip_vs_svc_lock);
1447                         ip_vs_svc_unhash(svc);
1448                         /*
1449                          * Wait until all the svc users go away.
1450                          */
1451                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1452                         __ip_vs_del_service(svc);
1453                         write_unlock_bh(&__ip_vs_svc_lock);
1454                 }
1455         }
1456
1457         /*
1458          * Flush the service table hashed by fwmark
1459          */
1460         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1461                 list_for_each_entry_safe(svc, nxt,
1462                                          &ip_vs_svc_fwm_table[idx], f_list) {
1463                         write_lock_bh(&__ip_vs_svc_lock);
1464                         ip_vs_svc_unhash(svc);
1465                         /*
1466                          * Wait until all the svc users go away.
1467                          */
1468                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1469                         __ip_vs_del_service(svc);
1470                         write_unlock_bh(&__ip_vs_svc_lock);
1471                 }
1472         }
1473
1474         return 0;
1475 }
1476
1477
1478 /*
1479  *      Zero counters in a service or all services
1480  */
1481 static int ip_vs_zero_service(struct ip_vs_service *svc)
1482 {
1483         struct ip_vs_dest *dest;
1484
1485         write_lock_bh(&__ip_vs_svc_lock);
1486         list_for_each_entry(dest, &svc->destinations, n_list) {
1487                 ip_vs_zero_stats(&dest->stats);
1488         }
1489         ip_vs_zero_stats(&svc->stats);
1490         write_unlock_bh(&__ip_vs_svc_lock);
1491         return 0;
1492 }
1493
1494 static int ip_vs_zero_all(void)
1495 {
1496         int idx;
1497         struct ip_vs_service *svc;
1498
1499         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1500                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1501                         ip_vs_zero_service(svc);
1502                 }
1503         }
1504
1505         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1506                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1507                         ip_vs_zero_service(svc);
1508                 }
1509         }
1510
1511         ip_vs_zero_stats(&ip_vs_stats);
1512         return 0;
1513 }
1514
1515
1516 static int
1517 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1518                      void __user *buffer, size_t *lenp, loff_t *ppos)
1519 {
1520         int *valp = table->data;
1521         int val = *valp;
1522         int rc;
1523
1524         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1525         if (write && (*valp != val)) {
1526                 if ((*valp < 0) || (*valp > 3)) {
1527                         /* Restore the correct value */
1528                         *valp = val;
1529                 } else {
1530                         update_defense_level();
1531                 }
1532         }
1533         return rc;
1534 }
1535
1536
1537 static int
1538 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1539                        void __user *buffer, size_t *lenp, loff_t *ppos)
1540 {
1541         int *valp = table->data;
1542         int val[2];
1543         int rc;
1544
1545         /* backup the value first */
1546         memcpy(val, valp, sizeof(val));
1547
1548         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1549         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1550                 /* Restore the correct value */
1551                 memcpy(valp, val, sizeof(val));
1552         }
1553         return rc;
1554 }
1555
1556
1557 /*
1558  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1559  */
1560
1561 static struct ctl_table vs_vars[] = {
1562         {
1563                 .procname       = "amemthresh",
1564                 .data           = &sysctl_ip_vs_amemthresh,
1565                 .maxlen         = sizeof(int),
1566                 .mode           = 0644,
1567                 .proc_handler   = &proc_dointvec,
1568         },
1569 #ifdef CONFIG_IP_VS_DEBUG
1570         {
1571                 .procname       = "debug_level",
1572                 .data           = &sysctl_ip_vs_debug_level,
1573                 .maxlen         = sizeof(int),
1574                 .mode           = 0644,
1575                 .proc_handler   = &proc_dointvec,
1576         },
1577 #endif
1578         {
1579                 .procname       = "am_droprate",
1580                 .data           = &sysctl_ip_vs_am_droprate,
1581                 .maxlen         = sizeof(int),
1582                 .mode           = 0644,
1583                 .proc_handler   = &proc_dointvec,
1584         },
1585         {
1586                 .procname       = "drop_entry",
1587                 .data           = &sysctl_ip_vs_drop_entry,
1588                 .maxlen         = sizeof(int),
1589                 .mode           = 0644,
1590                 .proc_handler   = &proc_do_defense_mode,
1591         },
1592         {
1593                 .procname       = "drop_packet",
1594                 .data           = &sysctl_ip_vs_drop_packet,
1595                 .maxlen         = sizeof(int),
1596                 .mode           = 0644,
1597                 .proc_handler   = &proc_do_defense_mode,
1598         },
1599         {
1600                 .procname       = "secure_tcp",
1601                 .data           = &sysctl_ip_vs_secure_tcp,
1602                 .maxlen         = sizeof(int),
1603                 .mode           = 0644,
1604                 .proc_handler   = &proc_do_defense_mode,
1605         },
1606 #if 0
1607         {
1608                 .procname       = "timeout_established",
1609                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1610                 .maxlen         = sizeof(int),
1611                 .mode           = 0644,
1612                 .proc_handler   = &proc_dointvec_jiffies,
1613         },
1614         {
1615                 .procname       = "timeout_synsent",
1616                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1617                 .maxlen         = sizeof(int),
1618                 .mode           = 0644,
1619                 .proc_handler   = &proc_dointvec_jiffies,
1620         },
1621         {
1622                 .procname       = "timeout_synrecv",
1623                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1624                 .maxlen         = sizeof(int),
1625                 .mode           = 0644,
1626                 .proc_handler   = &proc_dointvec_jiffies,
1627         },
1628         {
1629                 .procname       = "timeout_finwait",
1630                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1631                 .maxlen         = sizeof(int),
1632                 .mode           = 0644,
1633                 .proc_handler   = &proc_dointvec_jiffies,
1634         },
1635         {
1636                 .procname       = "timeout_timewait",
1637                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1638                 .maxlen         = sizeof(int),
1639                 .mode           = 0644,
1640                 .proc_handler   = &proc_dointvec_jiffies,
1641         },
1642         {
1643                 .procname       = "timeout_close",
1644                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1645                 .maxlen         = sizeof(int),
1646                 .mode           = 0644,
1647                 .proc_handler   = &proc_dointvec_jiffies,
1648         },
1649         {
1650                 .procname       = "timeout_closewait",
1651                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1652                 .maxlen         = sizeof(int),
1653                 .mode           = 0644,
1654                 .proc_handler   = &proc_dointvec_jiffies,
1655         },
1656         {
1657                 .procname       = "timeout_lastack",
1658                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1659                 .maxlen         = sizeof(int),
1660                 .mode           = 0644,
1661                 .proc_handler   = &proc_dointvec_jiffies,
1662         },
1663         {
1664                 .procname       = "timeout_listen",
1665                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1666                 .maxlen         = sizeof(int),
1667                 .mode           = 0644,
1668                 .proc_handler   = &proc_dointvec_jiffies,
1669         },
1670         {
1671                 .procname       = "timeout_synack",
1672                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1673                 .maxlen         = sizeof(int),
1674                 .mode           = 0644,
1675                 .proc_handler   = &proc_dointvec_jiffies,
1676         },
1677         {
1678                 .procname       = "timeout_udp",
1679                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1680                 .maxlen         = sizeof(int),
1681                 .mode           = 0644,
1682                 .proc_handler   = &proc_dointvec_jiffies,
1683         },
1684         {
1685                 .procname       = "timeout_icmp",
1686                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1687                 .maxlen         = sizeof(int),
1688                 .mode           = 0644,
1689                 .proc_handler   = &proc_dointvec_jiffies,
1690         },
1691 #endif
1692         {
1693                 .procname       = "cache_bypass",
1694                 .data           = &sysctl_ip_vs_cache_bypass,
1695                 .maxlen         = sizeof(int),
1696                 .mode           = 0644,
1697                 .proc_handler   = &proc_dointvec,
1698         },
1699         {
1700                 .procname       = "expire_nodest_conn",
1701                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1702                 .maxlen         = sizeof(int),
1703                 .mode           = 0644,
1704                 .proc_handler   = &proc_dointvec,
1705         },
1706         {
1707                 .procname       = "expire_quiescent_template",
1708                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1709                 .maxlen         = sizeof(int),
1710                 .mode           = 0644,
1711                 .proc_handler   = &proc_dointvec,
1712         },
1713         {
1714                 .procname       = "sync_threshold",
1715                 .data           = &sysctl_ip_vs_sync_threshold,
1716                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1717                 .mode           = 0644,
1718                 .proc_handler   = &proc_do_sync_threshold,
1719         },
1720         {
1721                 .procname       = "nat_icmp_send",
1722                 .data           = &sysctl_ip_vs_nat_icmp_send,
1723                 .maxlen         = sizeof(int),
1724                 .mode           = 0644,
1725                 .proc_handler   = &proc_dointvec,
1726         },
1727         { .ctl_name = 0 }
1728 };
1729
1730 const struct ctl_path net_vs_ctl_path[] = {
1731         { .procname = "net", .ctl_name = CTL_NET, },
1732         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1733         { .procname = "vs", },
1734         { }
1735 };
1736 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1737
1738 static struct ctl_table_header * sysctl_header;
1739
1740 #ifdef CONFIG_PROC_FS
1741
1742 struct ip_vs_iter {
1743         struct list_head *table;
1744         int bucket;
1745 };
1746
1747 /*
1748  *      Write the contents of the VS rule table to a PROCfs file.
1749  *      (It is kept just for backward compatibility)
1750  */
1751 static inline const char *ip_vs_fwd_name(unsigned flags)
1752 {
1753         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1754         case IP_VS_CONN_F_LOCALNODE:
1755                 return "Local";
1756         case IP_VS_CONN_F_TUNNEL:
1757                 return "Tunnel";
1758         case IP_VS_CONN_F_DROUTE:
1759                 return "Route";
1760         default:
1761                 return "Masq";
1762         }
1763 }
1764
1765
1766 /* Get the Nth entry in the two lists */
1767 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1768 {
1769         struct ip_vs_iter *iter = seq->private;
1770         int idx;
1771         struct ip_vs_service *svc;
1772
1773         /* look in hash by protocol */
1774         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1775                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1776                         if (pos-- == 0){
1777                                 iter->table = ip_vs_svc_table;
1778                                 iter->bucket = idx;
1779                                 return svc;
1780                         }
1781                 }
1782         }
1783
1784         /* keep looking in fwmark */
1785         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1786                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1787                         if (pos-- == 0) {
1788                                 iter->table = ip_vs_svc_fwm_table;
1789                                 iter->bucket = idx;
1790                                 return svc;
1791                         }
1792                 }
1793         }
1794
1795         return NULL;
1796 }
1797
1798 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1799 {
1800
1801         read_lock_bh(&__ip_vs_svc_lock);
1802         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1803 }
1804
1805
1806 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1807 {
1808         struct list_head *e;
1809         struct ip_vs_iter *iter;
1810         struct ip_vs_service *svc;
1811
1812         ++*pos;
1813         if (v == SEQ_START_TOKEN)
1814                 return ip_vs_info_array(seq,0);
1815
1816         svc = v;
1817         iter = seq->private;
1818
1819         if (iter->table == ip_vs_svc_table) {
1820                 /* next service in table hashed by protocol */
1821                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1822                         return list_entry(e, struct ip_vs_service, s_list);
1823
1824
1825                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1826                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1827                                             s_list) {
1828                                 return svc;
1829                         }
1830                 }
1831
1832                 iter->table = ip_vs_svc_fwm_table;
1833                 iter->bucket = -1;
1834                 goto scan_fwmark;
1835         }
1836
1837         /* next service in hashed by fwmark */
1838         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1839                 return list_entry(e, struct ip_vs_service, f_list);
1840
1841  scan_fwmark:
1842         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1843                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1844                                     f_list)
1845                         return svc;
1846         }
1847
1848         return NULL;
1849 }
1850
1851 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1852 {
1853         read_unlock_bh(&__ip_vs_svc_lock);
1854 }
1855
1856
1857 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1858 {
1859         if (v == SEQ_START_TOKEN) {
1860                 seq_printf(seq,
1861                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1862                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1863                 seq_puts(seq,
1864                          "Prot LocalAddress:Port Scheduler Flags\n");
1865                 seq_puts(seq,
1866                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1867         } else {
1868                 const struct ip_vs_service *svc = v;
1869                 const struct ip_vs_iter *iter = seq->private;
1870                 const struct ip_vs_dest *dest;
1871
1872                 if (iter->table == ip_vs_svc_table) {
1873 #ifdef CONFIG_IP_VS_IPV6
1874                         if (svc->af == AF_INET6)
1875                                 seq_printf(seq, "%s  [" NIP6_FMT "]:%04X %s ",
1876                                            ip_vs_proto_name(svc->protocol),
1877                                            NIP6(svc->addr.in6),
1878                                            ntohs(svc->port),
1879                                            svc->scheduler->name);
1880                         else
1881 #endif
1882                                 seq_printf(seq, "%s  %08X:%04X %s ",
1883                                            ip_vs_proto_name(svc->protocol),
1884                                            ntohl(svc->addr.ip),
1885                                            ntohs(svc->port),
1886                                            svc->scheduler->name);
1887                 } else {
1888                         seq_printf(seq, "FWM  %08X %s ",
1889                                    svc->fwmark, svc->scheduler->name);
1890                 }
1891
1892                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1893                         seq_printf(seq, "persistent %d %08X\n",
1894                                 svc->timeout,
1895                                 ntohl(svc->netmask));
1896                 else
1897                         seq_putc(seq, '\n');
1898
1899                 list_for_each_entry(dest, &svc->destinations, n_list) {
1900 #ifdef CONFIG_IP_VS_IPV6
1901                         if (dest->af == AF_INET6)
1902                                 seq_printf(seq,
1903                                            "  -> [" NIP6_FMT "]:%04X"
1904                                            "      %-7s %-6d %-10d %-10d\n",
1905                                            NIP6(dest->addr.in6),
1906                                            ntohs(dest->port),
1907                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1908                                            atomic_read(&dest->weight),
1909                                            atomic_read(&dest->activeconns),
1910                                            atomic_read(&dest->inactconns));
1911                         else
1912 #endif
1913                                 seq_printf(seq,
1914                                            "  -> %08X:%04X      "
1915                                            "%-7s %-6d %-10d %-10d\n",
1916                                            ntohl(dest->addr.ip),
1917                                            ntohs(dest->port),
1918                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1919                                            atomic_read(&dest->weight),
1920                                            atomic_read(&dest->activeconns),
1921                                            atomic_read(&dest->inactconns));
1922
1923                 }
1924         }
1925         return 0;
1926 }
1927
1928 static const struct seq_operations ip_vs_info_seq_ops = {
1929         .start = ip_vs_info_seq_start,
1930         .next  = ip_vs_info_seq_next,
1931         .stop  = ip_vs_info_seq_stop,
1932         .show  = ip_vs_info_seq_show,
1933 };
1934
1935 static int ip_vs_info_open(struct inode *inode, struct file *file)
1936 {
1937         return seq_open_private(file, &ip_vs_info_seq_ops,
1938                         sizeof(struct ip_vs_iter));
1939 }
1940
1941 static const struct file_operations ip_vs_info_fops = {
1942         .owner   = THIS_MODULE,
1943         .open    = ip_vs_info_open,
1944         .read    = seq_read,
1945         .llseek  = seq_lseek,
1946         .release = seq_release_private,
1947 };
1948
1949 #endif
1950
1951 struct ip_vs_stats ip_vs_stats = {
1952         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1953 };
1954
1955 #ifdef CONFIG_PROC_FS
1956 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1957 {
1958
1959 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1960         seq_puts(seq,
1961                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1962         seq_printf(seq,
1963                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1964
1965         spin_lock_bh(&ip_vs_stats.lock);
1966         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1967                    ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1968                    (unsigned long long) ip_vs_stats.inbytes,
1969                    (unsigned long long) ip_vs_stats.outbytes);
1970
1971 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1972         seq_puts(seq,
1973                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1974         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1975                         ip_vs_stats.cps,
1976                         ip_vs_stats.inpps,
1977                         ip_vs_stats.outpps,
1978                         ip_vs_stats.inbps,
1979                         ip_vs_stats.outbps);
1980         spin_unlock_bh(&ip_vs_stats.lock);
1981
1982         return 0;
1983 }
1984
1985 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1986 {
1987         return single_open(file, ip_vs_stats_show, NULL);
1988 }
1989
1990 static const struct file_operations ip_vs_stats_fops = {
1991         .owner = THIS_MODULE,
1992         .open = ip_vs_stats_seq_open,
1993         .read = seq_read,
1994         .llseek = seq_lseek,
1995         .release = single_release,
1996 };
1997
1998 #endif
1999
2000 /*
2001  *      Set timeout values for tcp tcpfin udp in the timeout_table.
2002  */
2003 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
2004 {
2005         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
2006                   u->tcp_timeout,
2007                   u->tcp_fin_timeout,
2008                   u->udp_timeout);
2009
2010 #ifdef CONFIG_IP_VS_PROTO_TCP
2011         if (u->tcp_timeout) {
2012                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
2013                         = u->tcp_timeout * HZ;
2014         }
2015
2016         if (u->tcp_fin_timeout) {
2017                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
2018                         = u->tcp_fin_timeout * HZ;
2019         }
2020 #endif
2021
2022 #ifdef CONFIG_IP_VS_PROTO_UDP
2023         if (u->udp_timeout) {
2024                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
2025                         = u->udp_timeout * HZ;
2026         }
2027 #endif
2028         return 0;
2029 }
2030
2031
2032 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2033 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2034 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2035                                  sizeof(struct ip_vs_dest_user))
2036 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2037 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2038 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2039
2040 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2041         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2042         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2043         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2044         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2045         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2046         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2047         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2048         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2049         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2050         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2051         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2052 };
2053
2054 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2055                                   struct ip_vs_service_user *usvc_compat)
2056 {
2057         usvc->af                = AF_INET;
2058         usvc->protocol          = usvc_compat->protocol;
2059         usvc->addr.ip           = usvc_compat->addr;
2060         usvc->port              = usvc_compat->port;
2061         usvc->fwmark            = usvc_compat->fwmark;
2062
2063         /* Deep copy of sched_name is not needed here */
2064         usvc->sched_name        = usvc_compat->sched_name;
2065
2066         usvc->flags             = usvc_compat->flags;
2067         usvc->timeout           = usvc_compat->timeout;
2068         usvc->netmask           = usvc_compat->netmask;
2069 }
2070
2071 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2072                                    struct ip_vs_dest_user *udest_compat)
2073 {
2074         udest->addr.ip          = udest_compat->addr;
2075         udest->port             = udest_compat->port;
2076         udest->conn_flags       = udest_compat->conn_flags;
2077         udest->weight           = udest_compat->weight;
2078         udest->u_threshold      = udest_compat->u_threshold;
2079         udest->l_threshold      = udest_compat->l_threshold;
2080 }
2081
2082 static int
2083 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2084 {
2085         int ret;
2086         unsigned char arg[MAX_ARG_LEN];
2087         struct ip_vs_service_user *usvc_compat;
2088         struct ip_vs_service_user_kern usvc;
2089         struct ip_vs_service *svc;
2090         struct ip_vs_dest_user *udest_compat;
2091         struct ip_vs_dest_user_kern udest;
2092
2093         if (!capable(CAP_NET_ADMIN))
2094                 return -EPERM;
2095
2096         if (len != set_arglen[SET_CMDID(cmd)]) {
2097                 IP_VS_ERR("set_ctl: len %u != %u\n",
2098                           len, set_arglen[SET_CMDID(cmd)]);
2099                 return -EINVAL;
2100         }
2101
2102         if (copy_from_user(arg, user, len) != 0)
2103                 return -EFAULT;
2104
2105         /* increase the module use count */
2106         ip_vs_use_count_inc();
2107
2108         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2109                 ret = -ERESTARTSYS;
2110                 goto out_dec;
2111         }
2112
2113         if (cmd == IP_VS_SO_SET_FLUSH) {
2114                 /* Flush the virtual service */
2115                 ret = ip_vs_flush();
2116                 goto out_unlock;
2117         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2118                 /* Set timeout values for (tcp tcpfin udp) */
2119                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2120                 goto out_unlock;
2121         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2122                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2123                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2124                 goto out_unlock;
2125         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2126                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2127                 ret = stop_sync_thread(dm->state);
2128                 goto out_unlock;
2129         }
2130
2131         usvc_compat = (struct ip_vs_service_user *)arg;
2132         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2133
2134         /* We only use the new structs internally, so copy userspace compat
2135          * structs to extended internal versions */
2136         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2137         ip_vs_copy_udest_compat(&udest, udest_compat);
2138
2139         if (cmd == IP_VS_SO_SET_ZERO) {
2140                 /* if no service address is set, zero counters in all */
2141                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2142                         ret = ip_vs_zero_all();
2143                         goto out_unlock;
2144                 }
2145         }
2146
2147         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
2148         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2149                 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
2150                           usvc.protocol, NIPQUAD(usvc.addr.ip),
2151                           ntohs(usvc.port), usvc.sched_name);
2152                 ret = -EFAULT;
2153                 goto out_unlock;
2154         }
2155
2156         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2157         if (usvc.fwmark == 0)
2158                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2159                                           &usvc.addr, usvc.port);
2160         else
2161                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2162
2163         if (cmd != IP_VS_SO_SET_ADD
2164             && (svc == NULL || svc->protocol != usvc.protocol)) {
2165                 ret = -ESRCH;
2166                 goto out_unlock;
2167         }
2168
2169         switch (cmd) {
2170         case IP_VS_SO_SET_ADD:
2171                 if (svc != NULL)
2172                         ret = -EEXIST;
2173                 else
2174                         ret = ip_vs_add_service(&usvc, &svc);
2175                 break;
2176         case IP_VS_SO_SET_EDIT:
2177                 ret = ip_vs_edit_service(svc, &usvc);
2178                 break;
2179         case IP_VS_SO_SET_DEL:
2180                 ret = ip_vs_del_service(svc);
2181                 if (!ret)
2182                         goto out_unlock;
2183                 break;
2184         case IP_VS_SO_SET_ZERO:
2185                 ret = ip_vs_zero_service(svc);
2186                 break;
2187         case IP_VS_SO_SET_ADDDEST:
2188                 ret = ip_vs_add_dest(svc, &udest);
2189                 break;
2190         case IP_VS_SO_SET_EDITDEST:
2191                 ret = ip_vs_edit_dest(svc, &udest);
2192                 break;
2193         case IP_VS_SO_SET_DELDEST:
2194                 ret = ip_vs_del_dest(svc, &udest);
2195                 break;
2196         default:
2197                 ret = -EINVAL;
2198         }
2199
2200         if (svc)
2201                 ip_vs_service_put(svc);
2202
2203   out_unlock:
2204         mutex_unlock(&__ip_vs_mutex);
2205   out_dec:
2206         /* decrease the module use count */
2207         ip_vs_use_count_dec();
2208
2209         return ret;
2210 }
2211
2212
2213 static void
2214 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2215 {
2216         spin_lock_bh(&src->lock);
2217         memcpy(dst, src, (char*)&src->lock - (char*)src);
2218         spin_unlock_bh(&src->lock);
2219 }
2220
2221 static void
2222 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2223 {
2224         dst->protocol = src->protocol;
2225         dst->addr = src->addr.ip;
2226         dst->port = src->port;
2227         dst->fwmark = src->fwmark;
2228         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2229         dst->flags = src->flags;
2230         dst->timeout = src->timeout / HZ;
2231         dst->netmask = src->netmask;
2232         dst->num_dests = src->num_dests;
2233         ip_vs_copy_stats(&dst->stats, &src->stats);
2234 }
2235
2236 static inline int
2237 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2238                             struct ip_vs_get_services __user *uptr)
2239 {
2240         int idx, count=0;
2241         struct ip_vs_service *svc;
2242         struct ip_vs_service_entry entry;
2243         int ret = 0;
2244
2245         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2246                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2247                         /* Only expose IPv4 entries to old interface */
2248                         if (svc->af != AF_INET)
2249                                 continue;
2250
2251                         if (count >= get->num_services)
2252                                 goto out;
2253                         memset(&entry, 0, sizeof(entry));
2254                         ip_vs_copy_service(&entry, svc);
2255                         if (copy_to_user(&uptr->entrytable[count],
2256                                          &entry, sizeof(entry))) {
2257                                 ret = -EFAULT;
2258                                 goto out;
2259                         }
2260                         count++;
2261                 }
2262         }
2263
2264         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2265                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2266                         /* Only expose IPv4 entries to old interface */
2267                         if (svc->af != AF_INET)
2268                                 continue;
2269
2270                         if (count >= get->num_services)
2271                                 goto out;
2272                         memset(&entry, 0, sizeof(entry));
2273                         ip_vs_copy_service(&entry, svc);
2274                         if (copy_to_user(&uptr->entrytable[count],
2275                                          &entry, sizeof(entry))) {
2276                                 ret = -EFAULT;
2277                                 goto out;
2278                         }
2279                         count++;
2280                 }
2281         }
2282   out:
2283         return ret;
2284 }
2285
2286 static inline int
2287 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2288                          struct ip_vs_get_dests __user *uptr)
2289 {
2290         struct ip_vs_service *svc;
2291         union nf_inet_addr addr = { .ip = get->addr };
2292         int ret = 0;
2293
2294         if (get->fwmark)
2295                 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2296         else
2297                 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2298                                           get->port);
2299
2300         if (svc) {
2301                 int count = 0;
2302                 struct ip_vs_dest *dest;
2303                 struct ip_vs_dest_entry entry;
2304
2305                 list_for_each_entry(dest, &svc->destinations, n_list) {
2306                         if (count >= get->num_dests)
2307                                 break;
2308
2309                         entry.addr = dest->addr.ip;
2310                         entry.port = dest->port;
2311                         entry.conn_flags = atomic_read(&dest->conn_flags);
2312                         entry.weight = atomic_read(&dest->weight);
2313                         entry.u_threshold = dest->u_threshold;
2314                         entry.l_threshold = dest->l_threshold;
2315                         entry.activeconns = atomic_read(&dest->activeconns);
2316                         entry.inactconns = atomic_read(&dest->inactconns);
2317                         entry.persistconns = atomic_read(&dest->persistconns);
2318                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2319                         if (copy_to_user(&uptr->entrytable[count],
2320                                          &entry, sizeof(entry))) {
2321                                 ret = -EFAULT;
2322                                 break;
2323                         }
2324                         count++;
2325                 }
2326                 ip_vs_service_put(svc);
2327         } else
2328                 ret = -ESRCH;
2329         return ret;
2330 }
2331
2332 static inline void
2333 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2334 {
2335 #ifdef CONFIG_IP_VS_PROTO_TCP
2336         u->tcp_timeout =
2337                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2338         u->tcp_fin_timeout =
2339                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2340 #endif
2341 #ifdef CONFIG_IP_VS_PROTO_UDP
2342         u->udp_timeout =
2343                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2344 #endif
2345 }
2346
2347
2348 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2349 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2350 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2351 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2352 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2353 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2354 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2355
2356 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2357         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2358         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2359         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2360         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2361         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2362         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2363         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2364 };
2365
2366 static int
2367 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2368 {
2369         unsigned char arg[128];
2370         int ret = 0;
2371
2372         if (!capable(CAP_NET_ADMIN))
2373                 return -EPERM;
2374
2375         if (*len < get_arglen[GET_CMDID(cmd)]) {
2376                 IP_VS_ERR("get_ctl: len %u < %u\n",
2377                           *len, get_arglen[GET_CMDID(cmd)]);
2378                 return -EINVAL;
2379         }
2380
2381         if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2382                 return -EFAULT;
2383
2384         if (mutex_lock_interruptible(&__ip_vs_mutex))
2385                 return -ERESTARTSYS;
2386
2387         switch (cmd) {
2388         case IP_VS_SO_GET_VERSION:
2389         {
2390                 char buf[64];
2391
2392                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2393                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2394                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2395                         ret = -EFAULT;
2396                         goto out;
2397                 }
2398                 *len = strlen(buf)+1;
2399         }
2400         break;
2401
2402         case IP_VS_SO_GET_INFO:
2403         {
2404                 struct ip_vs_getinfo info;
2405                 info.version = IP_VS_VERSION_CODE;
2406                 info.size = IP_VS_CONN_TAB_SIZE;
2407                 info.num_services = ip_vs_num_services;
2408                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2409                         ret = -EFAULT;
2410         }
2411         break;
2412
2413         case IP_VS_SO_GET_SERVICES:
2414         {
2415                 struct ip_vs_get_services *get;
2416                 int size;
2417
2418                 get = (struct ip_vs_get_services *)arg;
2419                 size = sizeof(*get) +
2420                         sizeof(struct ip_vs_service_entry) * get->num_services;
2421                 if (*len != size) {
2422                         IP_VS_ERR("length: %u != %u\n", *len, size);
2423                         ret = -EINVAL;
2424                         goto out;
2425                 }
2426                 ret = __ip_vs_get_service_entries(get, user);
2427         }
2428         break;
2429
2430         case IP_VS_SO_GET_SERVICE:
2431         {
2432                 struct ip_vs_service_entry *entry;
2433                 struct ip_vs_service *svc;
2434                 union nf_inet_addr addr;
2435
2436                 entry = (struct ip_vs_service_entry *)arg;
2437                 addr.ip = entry->addr;
2438                 if (entry->fwmark)
2439                         svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2440                 else
2441                         svc = __ip_vs_service_get(AF_INET, entry->protocol,
2442                                                   &addr, entry->port);
2443                 if (svc) {
2444                         ip_vs_copy_service(entry, svc);
2445                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2446                                 ret = -EFAULT;
2447                         ip_vs_service_put(svc);
2448                 } else
2449                         ret = -ESRCH;
2450         }
2451         break;
2452
2453         case IP_VS_SO_GET_DESTS:
2454         {
2455                 struct ip_vs_get_dests *get;
2456                 int size;
2457
2458                 get = (struct ip_vs_get_dests *)arg;
2459                 size = sizeof(*get) +
2460                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2461                 if (*len != size) {
2462                         IP_VS_ERR("length: %u != %u\n", *len, size);
2463                         ret = -EINVAL;
2464                         goto out;
2465                 }
2466                 ret = __ip_vs_get_dest_entries(get, user);
2467         }
2468         break;
2469
2470         case IP_VS_SO_GET_TIMEOUT:
2471         {
2472                 struct ip_vs_timeout_user t;
2473
2474                 __ip_vs_get_timeouts(&t);
2475                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2476                         ret = -EFAULT;
2477         }
2478         break;
2479
2480         case IP_VS_SO_GET_DAEMON:
2481         {
2482                 struct ip_vs_daemon_user d[2];
2483
2484                 memset(&d, 0, sizeof(d));
2485                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2486                         d[0].state = IP_VS_STATE_MASTER;
2487                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2488                         d[0].syncid = ip_vs_master_syncid;
2489                 }
2490                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2491                         d[1].state = IP_VS_STATE_BACKUP;
2492                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2493                         d[1].syncid = ip_vs_backup_syncid;
2494                 }
2495                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2496                         ret = -EFAULT;
2497         }
2498         break;
2499
2500         default:
2501                 ret = -EINVAL;
2502         }
2503
2504   out:
2505         mutex_unlock(&__ip_vs_mutex);
2506         return ret;
2507 }
2508
2509
2510 static struct nf_sockopt_ops ip_vs_sockopts = {
2511         .pf             = PF_INET,
2512         .set_optmin     = IP_VS_BASE_CTL,
2513         .set_optmax     = IP_VS_SO_SET_MAX+1,
2514         .set            = do_ip_vs_set_ctl,
2515         .get_optmin     = IP_VS_BASE_CTL,
2516         .get_optmax     = IP_VS_SO_GET_MAX+1,
2517         .get            = do_ip_vs_get_ctl,
2518         .owner          = THIS_MODULE,
2519 };
2520
2521 /*
2522  * Generic Netlink interface
2523  */
2524
2525 /* IPVS genetlink family */
2526 static struct genl_family ip_vs_genl_family = {
2527         .id             = GENL_ID_GENERATE,
2528         .hdrsize        = 0,
2529         .name           = IPVS_GENL_NAME,
2530         .version        = IPVS_GENL_VERSION,
2531         .maxattr        = IPVS_CMD_MAX,
2532 };
2533
2534 /* Policy used for first-level command attributes */
2535 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2536         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2537         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2538         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2539         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2540         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2541         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2542 };
2543
2544 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2545 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2546         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2547         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2548                                             .len = IP_VS_IFNAME_MAXLEN },
2549         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2550 };
2551
2552 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2553 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2554         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2555         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2556         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2557                                             .len = sizeof(union nf_inet_addr) },
2558         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2559         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2560         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2561                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2562         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2563                                             .len = sizeof(struct ip_vs_flags) },
2564         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2565         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2566         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2567 };
2568
2569 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2570 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2571         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2572                                             .len = sizeof(union nf_inet_addr) },
2573         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2574         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2575         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2576         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2577         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2578         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2579         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2580         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2581         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2582 };
2583
2584 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2585                                  struct ip_vs_stats *stats)
2586 {
2587         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2588         if (!nl_stats)
2589                 return -EMSGSIZE;
2590
2591         spin_lock_bh(&stats->lock);
2592
2593         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2594         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2595         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2596         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2597         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2598         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2599         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2600         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2601         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2602         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2603
2604         spin_unlock_bh(&stats->lock);
2605
2606         nla_nest_end(skb, nl_stats);
2607
2608         return 0;
2609
2610 nla_put_failure:
2611         spin_unlock_bh(&stats->lock);
2612         nla_nest_cancel(skb, nl_stats);
2613         return -EMSGSIZE;
2614 }
2615
2616 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2617                                    struct ip_vs_service *svc)
2618 {
2619         struct nlattr *nl_service;
2620         struct ip_vs_flags flags = { .flags = svc->flags,
2621                                      .mask = ~0 };
2622
2623         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2624         if (!nl_service)
2625                 return -EMSGSIZE;
2626
2627         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, svc->af);
2628
2629         if (svc->fwmark) {
2630                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2631         } else {
2632                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2633                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2634                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2635         }
2636
2637         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2638         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2639         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2640         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2641
2642         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2643                 goto nla_put_failure;
2644
2645         nla_nest_end(skb, nl_service);
2646
2647         return 0;
2648
2649 nla_put_failure:
2650         nla_nest_cancel(skb, nl_service);
2651         return -EMSGSIZE;
2652 }
2653
2654 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2655                                    struct ip_vs_service *svc,
2656                                    struct netlink_callback *cb)
2657 {
2658         void *hdr;
2659
2660         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2661                           &ip_vs_genl_family, NLM_F_MULTI,
2662                           IPVS_CMD_NEW_SERVICE);
2663         if (!hdr)
2664                 return -EMSGSIZE;
2665
2666         if (ip_vs_genl_fill_service(skb, svc) < 0)
2667                 goto nla_put_failure;
2668
2669         return genlmsg_end(skb, hdr);
2670
2671 nla_put_failure:
2672         genlmsg_cancel(skb, hdr);
2673         return -EMSGSIZE;
2674 }
2675
2676 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2677                                     struct netlink_callback *cb)
2678 {
2679         int idx = 0, i;
2680         int start = cb->args[0];
2681         struct ip_vs_service *svc;
2682
2683         mutex_lock(&__ip_vs_mutex);
2684         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2685                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2686                         if (++idx <= start)
2687                                 continue;
2688                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2689                                 idx--;
2690                                 goto nla_put_failure;
2691                         }
2692                 }
2693         }
2694
2695         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2696                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2697                         if (++idx <= start)
2698                                 continue;
2699                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2700                                 idx--;
2701                                 goto nla_put_failure;
2702                         }
2703                 }
2704         }
2705
2706 nla_put_failure:
2707         mutex_unlock(&__ip_vs_mutex);
2708         cb->args[0] = idx;
2709
2710         return skb->len;
2711 }
2712
2713 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2714                                     struct nlattr *nla, int full_entry)
2715 {
2716         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2717         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2718
2719         /* Parse mandatory identifying service fields first */
2720         if (nla == NULL ||
2721             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2722                 return -EINVAL;
2723
2724         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2725         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2726         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2727         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2728         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2729
2730         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2731                 return -EINVAL;
2732
2733         usvc->af = nla_get_u16(nla_af);
2734 #ifdef CONFIG_IP_VS_IPV6
2735         if (usvc->af != AF_INET && usvc->af != AF_INET6)
2736 #else
2737         if (usvc->af != AF_INET)
2738 #endif
2739                 return -EAFNOSUPPORT;
2740
2741         if (nla_fwmark) {
2742                 usvc->protocol = IPPROTO_TCP;
2743                 usvc->fwmark = nla_get_u32(nla_fwmark);
2744         } else {
2745                 usvc->protocol = nla_get_u16(nla_protocol);
2746                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2747                 usvc->port = nla_get_u16(nla_port);
2748                 usvc->fwmark = 0;
2749         }
2750
2751         /* If a full entry was requested, check for the additional fields */
2752         if (full_entry) {
2753                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2754                               *nla_netmask;
2755                 struct ip_vs_flags flags;
2756                 struct ip_vs_service *svc;
2757
2758                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2759                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2760                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2761                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2762
2763                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2764                         return -EINVAL;
2765
2766                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2767
2768                 /* prefill flags from service if it already exists */
2769                 if (usvc->fwmark)
2770                         svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2771                 else
2772                         svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2773                                                   &usvc->addr, usvc->port);
2774                 if (svc) {
2775                         usvc->flags = svc->flags;
2776                         ip_vs_service_put(svc);
2777                 } else
2778                         usvc->flags = 0;
2779
2780                 /* set new flags from userland */
2781                 usvc->flags = (usvc->flags & ~flags.mask) |
2782                               (flags.flags & flags.mask);
2783                 usvc->sched_name = nla_data(nla_sched);
2784                 usvc->timeout = nla_get_u32(nla_timeout);
2785                 usvc->netmask = nla_get_u32(nla_netmask);
2786         }
2787
2788         return 0;
2789 }
2790
2791 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2792 {
2793         struct ip_vs_service_user_kern usvc;
2794         int ret;
2795
2796         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2797         if (ret)
2798                 return ERR_PTR(ret);
2799
2800         if (usvc.fwmark)
2801                 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2802         else
2803                 return __ip_vs_service_get(usvc.af, usvc.protocol,
2804                                            &usvc.addr, usvc.port);
2805 }
2806
2807 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2808 {
2809         struct nlattr *nl_dest;
2810
2811         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2812         if (!nl_dest)
2813                 return -EMSGSIZE;
2814
2815         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2816         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2817
2818         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2819                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2820         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2821         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2822         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2823         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2824                     atomic_read(&dest->activeconns));
2825         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2826                     atomic_read(&dest->inactconns));
2827         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2828                     atomic_read(&dest->persistconns));
2829
2830         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2831                 goto nla_put_failure;
2832
2833         nla_nest_end(skb, nl_dest);
2834
2835         return 0;
2836
2837 nla_put_failure:
2838         nla_nest_cancel(skb, nl_dest);
2839         return -EMSGSIZE;
2840 }
2841
2842 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2843                                 struct netlink_callback *cb)
2844 {
2845         void *hdr;
2846
2847         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2848                           &ip_vs_genl_family, NLM_F_MULTI,
2849                           IPVS_CMD_NEW_DEST);
2850         if (!hdr)
2851                 return -EMSGSIZE;
2852
2853         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2854                 goto nla_put_failure;
2855
2856         return genlmsg_end(skb, hdr);
2857
2858 nla_put_failure:
2859         genlmsg_cancel(skb, hdr);
2860         return -EMSGSIZE;
2861 }
2862
2863 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2864                                  struct netlink_callback *cb)
2865 {
2866         int idx = 0;
2867         int start = cb->args[0];
2868         struct ip_vs_service *svc;
2869         struct ip_vs_dest *dest;
2870         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2871
2872         mutex_lock(&__ip_vs_mutex);
2873
2874         /* Try to find the service for which to dump destinations */
2875         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2876                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2877                 goto out_err;
2878
2879         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2880         if (IS_ERR(svc) || svc == NULL)
2881                 goto out_err;
2882
2883         /* Dump the destinations */
2884         list_for_each_entry(dest, &svc->destinations, n_list) {
2885                 if (++idx <= start)
2886                         continue;
2887                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2888                         idx--;
2889                         goto nla_put_failure;
2890                 }
2891         }
2892
2893 nla_put_failure:
2894         cb->args[0] = idx;
2895         ip_vs_service_put(svc);
2896
2897 out_err:
2898         mutex_unlock(&__ip_vs_mutex);
2899
2900         return skb->len;
2901 }
2902
2903 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2904                                  struct nlattr *nla, int full_entry)
2905 {
2906         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2907         struct nlattr *nla_addr, *nla_port;
2908
2909         /* Parse mandatory identifying destination fields first */
2910         if (nla == NULL ||
2911             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2912                 return -EINVAL;
2913
2914         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2915         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2916
2917         if (!(nla_addr && nla_port))
2918                 return -EINVAL;
2919
2920         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2921         udest->port = nla_get_u16(nla_port);
2922
2923         /* If a full entry was requested, check for the additional fields */
2924         if (full_entry) {
2925                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2926                               *nla_l_thresh;
2927
2928                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2929                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2930                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2931                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2932
2933                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2934                         return -EINVAL;
2935
2936                 udest->conn_flags = nla_get_u32(nla_fwd)
2937                                     & IP_VS_CONN_F_FWD_MASK;
2938                 udest->weight = nla_get_u32(nla_weight);
2939                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2940                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2941         }
2942
2943         return 0;
2944 }
2945
2946 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2947                                   const char *mcast_ifn, __be32 syncid)
2948 {
2949         struct nlattr *nl_daemon;
2950
2951         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2952         if (!nl_daemon)
2953                 return -EMSGSIZE;
2954
2955         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2956         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2957         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2958
2959         nla_nest_end(skb, nl_daemon);
2960
2961         return 0;
2962
2963 nla_put_failure:
2964         nla_nest_cancel(skb, nl_daemon);
2965         return -EMSGSIZE;
2966 }
2967
2968 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2969                                   const char *mcast_ifn, __be32 syncid,
2970                                   struct netlink_callback *cb)
2971 {
2972         void *hdr;
2973         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2974                           &ip_vs_genl_family, NLM_F_MULTI,
2975                           IPVS_CMD_NEW_DAEMON);
2976         if (!hdr)
2977                 return -EMSGSIZE;
2978
2979         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2980                 goto nla_put_failure;
2981
2982         return genlmsg_end(skb, hdr);
2983
2984 nla_put_failure:
2985         genlmsg_cancel(skb, hdr);
2986         return -EMSGSIZE;
2987 }
2988
2989 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2990                                    struct netlink_callback *cb)
2991 {
2992         mutex_lock(&__ip_vs_mutex);
2993         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2994                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2995                                            ip_vs_master_mcast_ifn,
2996                                            ip_vs_master_syncid, cb) < 0)
2997                         goto nla_put_failure;
2998
2999                 cb->args[0] = 1;
3000         }
3001
3002         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
3003                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
3004                                            ip_vs_backup_mcast_ifn,
3005                                            ip_vs_backup_syncid, cb) < 0)
3006                         goto nla_put_failure;
3007
3008                 cb->args[1] = 1;
3009         }
3010
3011 nla_put_failure:
3012         mutex_unlock(&__ip_vs_mutex);
3013
3014         return skb->len;
3015 }
3016
3017 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
3018 {
3019         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
3020               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
3021               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
3022                 return -EINVAL;
3023
3024         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
3025                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
3026                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
3027 }
3028
3029 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
3030 {
3031         if (!attrs[IPVS_DAEMON_ATTR_STATE])
3032                 return -EINVAL;
3033
3034         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
3035 }
3036
3037 static int ip_vs_genl_set_config(struct nlattr **attrs)
3038 {
3039         struct ip_vs_timeout_user t;
3040
3041         __ip_vs_get_timeouts(&t);
3042
3043         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3044                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3045
3046         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3047                 t.tcp_fin_timeout =
3048                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3049
3050         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3051                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3052
3053         return ip_vs_set_timeout(&t);
3054 }
3055
3056 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3057 {
3058         struct ip_vs_service *svc = NULL;
3059         struct ip_vs_service_user_kern usvc;
3060         struct ip_vs_dest_user_kern udest;
3061         int ret = 0, cmd;
3062         int need_full_svc = 0, need_full_dest = 0;
3063
3064         cmd = info->genlhdr->cmd;
3065
3066         mutex_lock(&__ip_vs_mutex);
3067
3068         if (cmd == IPVS_CMD_FLUSH) {
3069                 ret = ip_vs_flush();
3070                 goto out;
3071         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3072                 ret = ip_vs_genl_set_config(info->attrs);
3073                 goto out;
3074         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3075                    cmd == IPVS_CMD_DEL_DAEMON) {
3076
3077                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3078
3079                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3080                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3081                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3082                                      ip_vs_daemon_policy)) {
3083                         ret = -EINVAL;
3084                         goto out;
3085                 }
3086
3087                 if (cmd == IPVS_CMD_NEW_DAEMON)
3088                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3089                 else
3090                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3091                 goto out;
3092         } else if (cmd == IPVS_CMD_ZERO &&
3093                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3094                 ret = ip_vs_zero_all();
3095                 goto out;
3096         }
3097
3098         /* All following commands require a service argument, so check if we
3099          * received a valid one. We need a full service specification when
3100          * adding / editing a service. Only identifying members otherwise. */
3101         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3102                 need_full_svc = 1;
3103
3104         ret = ip_vs_genl_parse_service(&usvc,
3105                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3106                                        need_full_svc);
3107         if (ret)
3108                 goto out;
3109
3110         /* Lookup the exact service by <protocol, addr, port> or fwmark */
3111         if (usvc.fwmark == 0)
3112                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3113                                           &usvc.addr, usvc.port);
3114         else
3115                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3116
3117         /* Unless we're adding a new service, the service must already exist */
3118         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3119                 ret = -ESRCH;
3120                 goto out;
3121         }
3122
3123         /* Destination commands require a valid destination argument. For
3124          * adding / editing a destination, we need a full destination
3125          * specification. */
3126         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3127             cmd == IPVS_CMD_DEL_DEST) {
3128                 if (cmd != IPVS_CMD_DEL_DEST)
3129                         need_full_dest = 1;
3130
3131                 ret = ip_vs_genl_parse_dest(&udest,
3132                                             info->attrs[IPVS_CMD_ATTR_DEST],
3133                                             need_full_dest);
3134                 if (ret)
3135                         goto out;
3136         }
3137
3138         switch (cmd) {
3139         case IPVS_CMD_NEW_SERVICE:
3140                 if (svc == NULL)
3141                         ret = ip_vs_add_service(&usvc, &svc);
3142                 else
3143                         ret = -EEXIST;
3144                 break;
3145         case IPVS_CMD_SET_SERVICE:
3146                 ret = ip_vs_edit_service(svc, &usvc);
3147                 break;
3148         case IPVS_CMD_DEL_SERVICE:
3149                 ret = ip_vs_del_service(svc);
3150                 break;
3151         case IPVS_CMD_NEW_DEST:
3152                 ret = ip_vs_add_dest(svc, &udest);
3153                 break;
3154         case IPVS_CMD_SET_DEST:
3155                 ret = ip_vs_edit_dest(svc, &udest);
3156                 break;
3157         case IPVS_CMD_DEL_DEST:
3158                 ret = ip_vs_del_dest(svc, &udest);
3159                 break;
3160         case IPVS_CMD_ZERO:
3161                 ret = ip_vs_zero_service(svc);
3162                 break;
3163         default:
3164                 ret = -EINVAL;
3165         }
3166
3167 out:
3168         if (svc)
3169                 ip_vs_service_put(svc);
3170         mutex_unlock(&__ip_vs_mutex);
3171
3172         return ret;
3173 }
3174
3175 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3176 {
3177         struct sk_buff *msg;
3178         void *reply;
3179         int ret, cmd, reply_cmd;
3180
3181         cmd = info->genlhdr->cmd;
3182
3183         if (cmd == IPVS_CMD_GET_SERVICE)
3184                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3185         else if (cmd == IPVS_CMD_GET_INFO)
3186                 reply_cmd = IPVS_CMD_SET_INFO;
3187         else if (cmd == IPVS_CMD_GET_CONFIG)
3188                 reply_cmd = IPVS_CMD_SET_CONFIG;
3189         else {
3190                 IP_VS_ERR("unknown Generic Netlink command\n");
3191                 return -EINVAL;
3192         }
3193
3194         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3195         if (!msg)
3196                 return -ENOMEM;
3197
3198         mutex_lock(&__ip_vs_mutex);
3199
3200         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3201         if (reply == NULL)
3202                 goto nla_put_failure;
3203
3204         switch (cmd) {
3205         case IPVS_CMD_GET_SERVICE:
3206         {
3207                 struct ip_vs_service *svc;
3208
3209                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3210                 if (IS_ERR(svc)) {
3211                         ret = PTR_ERR(svc);
3212                         goto out_err;
3213                 } else if (svc) {
3214                         ret = ip_vs_genl_fill_service(msg, svc);
3215                         ip_vs_service_put(svc);
3216                         if (ret)
3217                                 goto nla_put_failure;
3218                 } else {
3219                         ret = -ESRCH;
3220                         goto out_err;
3221                 }
3222
3223                 break;
3224         }
3225
3226         case IPVS_CMD_GET_CONFIG:
3227         {
3228                 struct ip_vs_timeout_user t;
3229
3230                 __ip_vs_get_timeouts(&t);
3231 #ifdef CONFIG_IP_VS_PROTO_TCP
3232                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3233                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3234                             t.tcp_fin_timeout);
3235 #endif
3236 #ifdef CONFIG_IP_VS_PROTO_UDP
3237                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3238 #endif
3239
3240                 break;
3241         }
3242
3243         case IPVS_CMD_GET_INFO:
3244                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3245                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3246                             IP_VS_CONN_TAB_SIZE);
3247                 break;
3248         }
3249
3250         genlmsg_end(msg, reply);
3251         ret = genlmsg_unicast(msg, info->snd_pid);
3252         goto out;
3253
3254 nla_put_failure:
3255         IP_VS_ERR("not enough space in Netlink message\n");
3256         ret = -EMSGSIZE;
3257
3258 out_err:
3259         nlmsg_free(msg);
3260 out:
3261         mutex_unlock(&__ip_vs_mutex);
3262
3263         return ret;
3264 }
3265
3266
3267 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3268         {
3269                 .cmd    = IPVS_CMD_NEW_SERVICE,
3270                 .flags  = GENL_ADMIN_PERM,
3271                 .policy = ip_vs_cmd_policy,
3272                 .doit   = ip_vs_genl_set_cmd,
3273         },
3274         {
3275                 .cmd    = IPVS_CMD_SET_SERVICE,
3276                 .flags  = GENL_ADMIN_PERM,
3277                 .policy = ip_vs_cmd_policy,
3278                 .doit   = ip_vs_genl_set_cmd,
3279         },
3280         {
3281                 .cmd    = IPVS_CMD_DEL_SERVICE,
3282                 .flags  = GENL_ADMIN_PERM,
3283                 .policy = ip_vs_cmd_policy,
3284                 .doit   = ip_vs_genl_set_cmd,
3285         },
3286         {
3287                 .cmd    = IPVS_CMD_GET_SERVICE,
3288                 .flags  = GENL_ADMIN_PERM,
3289                 .doit   = ip_vs_genl_get_cmd,
3290                 .dumpit = ip_vs_genl_dump_services,
3291                 .policy = ip_vs_cmd_policy,
3292         },
3293         {
3294                 .cmd    = IPVS_CMD_NEW_DEST,
3295                 .flags  = GENL_ADMIN_PERM,
3296                 .policy = ip_vs_cmd_policy,
3297                 .doit   = ip_vs_genl_set_cmd,
3298         },
3299         {
3300                 .cmd    = IPVS_CMD_SET_DEST,
3301                 .flags  = GENL_ADMIN_PERM,
3302                 .policy = ip_vs_cmd_policy,
3303                 .doit   = ip_vs_genl_set_cmd,
3304         },
3305         {
3306                 .cmd    = IPVS_CMD_DEL_DEST,
3307                 .flags  = GENL_ADMIN_PERM,
3308                 .policy = ip_vs_cmd_policy,
3309                 .doit   = ip_vs_genl_set_cmd,
3310         },
3311         {
3312                 .cmd    = IPVS_CMD_GET_DEST,
3313                 .flags  = GENL_ADMIN_PERM,
3314                 .policy = ip_vs_cmd_policy,
3315                 .dumpit = ip_vs_genl_dump_dests,
3316         },
3317         {
3318                 .cmd    = IPVS_CMD_NEW_DAEMON,
3319                 .flags  = GENL_ADMIN_PERM,
3320                 .policy = ip_vs_cmd_policy,
3321                 .doit   = ip_vs_genl_set_cmd,
3322         },
3323         {
3324                 .cmd    = IPVS_CMD_DEL_DAEMON,
3325                 .flags  = GENL_ADMIN_PERM,
3326                 .policy = ip_vs_cmd_policy,
3327                 .doit   = ip_vs_genl_set_cmd,
3328         },
3329         {
3330                 .cmd    = IPVS_CMD_GET_DAEMON,
3331                 .flags  = GENL_ADMIN_PERM,
3332                 .dumpit = ip_vs_genl_dump_daemons,
3333         },
3334         {
3335                 .cmd    = IPVS_CMD_SET_CONFIG,
3336                 .flags  = GENL_ADMIN_PERM,
3337                 .policy = ip_vs_cmd_policy,
3338                 .doit   = ip_vs_genl_set_cmd,
3339         },
3340         {
3341                 .cmd    = IPVS_CMD_GET_CONFIG,
3342                 .flags  = GENL_ADMIN_PERM,
3343                 .doit   = ip_vs_genl_get_cmd,
3344         },
3345         {
3346                 .cmd    = IPVS_CMD_GET_INFO,
3347                 .flags  = GENL_ADMIN_PERM,
3348                 .doit   = ip_vs_genl_get_cmd,
3349         },
3350         {
3351                 .cmd    = IPVS_CMD_ZERO,
3352                 .flags  = GENL_ADMIN_PERM,
3353                 .policy = ip_vs_cmd_policy,
3354                 .doit   = ip_vs_genl_set_cmd,
3355         },
3356         {
3357                 .cmd    = IPVS_CMD_FLUSH,
3358                 .flags  = GENL_ADMIN_PERM,
3359                 .doit   = ip_vs_genl_set_cmd,
3360         },
3361 };
3362
3363 static int __init ip_vs_genl_register(void)
3364 {
3365         int ret, i;
3366
3367         ret = genl_register_family(&ip_vs_genl_family);
3368         if (ret)
3369                 return ret;
3370
3371         for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3372                 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3373                 if (ret)
3374                         goto err_out;
3375         }
3376         return 0;
3377
3378 err_out:
3379         genl_unregister_family(&ip_vs_genl_family);
3380         return ret;
3381 }
3382
3383 static void ip_vs_genl_unregister(void)
3384 {
3385         genl_unregister_family(&ip_vs_genl_family);
3386 }
3387
3388 /* End of Generic Netlink interface definitions */
3389
3390
3391 int __init ip_vs_control_init(void)
3392 {
3393         int ret;
3394         int idx;
3395
3396         EnterFunction(2);
3397
3398         ret = nf_register_sockopt(&ip_vs_sockopts);
3399         if (ret) {
3400                 IP_VS_ERR("cannot register sockopt.\n");
3401                 return ret;
3402         }
3403
3404         ret = ip_vs_genl_register();
3405         if (ret) {
3406                 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3407                 nf_unregister_sockopt(&ip_vs_sockopts);
3408                 return ret;
3409         }
3410
3411         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3412         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3413
3414         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3415
3416         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3417         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3418                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3419                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3420         }
3421         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3422                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3423         }
3424
3425         ip_vs_new_estimator(&ip_vs_stats);
3426
3427         /* Hook the defense timer */
3428         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3429
3430         LeaveFunction(2);
3431         return 0;
3432 }
3433
3434
3435 void ip_vs_control_cleanup(void)
3436 {
3437         EnterFunction(2);
3438         ip_vs_trash_cleanup();
3439         cancel_rearming_delayed_work(&defense_work);
3440         cancel_work_sync(&defense_work.work);
3441         ip_vs_kill_estimator(&ip_vs_stats);
3442         unregister_sysctl_table(sysctl_header);
3443         proc_net_remove(&init_net, "ip_vs_stats");
3444         proc_net_remove(&init_net, "ip_vs");
3445         ip_vs_genl_unregister();
3446         nf_unregister_sockopt(&ip_vs_sockopts);
3447         LeaveFunction(2);
3448 }