]> bbs.cooldavid.org Git - net-next-2.6.git/blob - net/ipv4/ipvs/ip_vs_ctl.c
IPVS: Activate IPv6 Netfilter hooks
[net-next-2.6.git] / net / ipv4 / ipvs / ip_vs_ctl.c
1 /*
2  * IPVS         An implementation of the IP virtual server support for the
3  *              LINUX operating system.  IPVS is now implemented as a module
4  *              over the NetFilter framework. IPVS can be used to build a
5  *              high-performance and highly available server based on a
6  *              cluster of servers.
7  *
8  * Authors:     Wensong Zhang <wensong@linuxvirtualserver.org>
9  *              Peter Kese <peter.kese@ijs.si>
10  *              Julian Anastasov <ja@ssi.bg>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  *
17  * Changes:
18  *
19  */
20
21 #include <linux/module.h>
22 #include <linux/init.h>
23 #include <linux/types.h>
24 #include <linux/capability.h>
25 #include <linux/fs.h>
26 #include <linux/sysctl.h>
27 #include <linux/proc_fs.h>
28 #include <linux/workqueue.h>
29 #include <linux/swap.h>
30 #include <linux/seq_file.h>
31
32 #include <linux/netfilter.h>
33 #include <linux/netfilter_ipv4.h>
34 #include <linux/mutex.h>
35
36 #include <net/net_namespace.h>
37 #include <net/ip.h>
38 #ifdef CONFIG_IP_VS_IPV6
39 #include <net/ipv6.h>
40 #include <net/ip6_route.h>
41 #endif
42 #include <net/route.h>
43 #include <net/sock.h>
44 #include <net/genetlink.h>
45
46 #include <asm/uaccess.h>
47
48 #include <net/ip_vs.h>
49
50 /* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
51 static DEFINE_MUTEX(__ip_vs_mutex);
52
53 /* lock for service table */
54 static DEFINE_RWLOCK(__ip_vs_svc_lock);
55
56 /* lock for table with the real services */
57 static DEFINE_RWLOCK(__ip_vs_rs_lock);
58
59 /* lock for state and timeout tables */
60 static DEFINE_RWLOCK(__ip_vs_securetcp_lock);
61
62 /* lock for drop entry handling */
63 static DEFINE_SPINLOCK(__ip_vs_dropentry_lock);
64
65 /* lock for drop packet handling */
66 static DEFINE_SPINLOCK(__ip_vs_droppacket_lock);
67
68 /* 1/rate drop and drop-entry variables */
69 int ip_vs_drop_rate = 0;
70 int ip_vs_drop_counter = 0;
71 static atomic_t ip_vs_dropentry = ATOMIC_INIT(0);
72
73 /* number of virtual services */
74 static int ip_vs_num_services = 0;
75
76 /* sysctl variables */
77 static int sysctl_ip_vs_drop_entry = 0;
78 static int sysctl_ip_vs_drop_packet = 0;
79 static int sysctl_ip_vs_secure_tcp = 0;
80 static int sysctl_ip_vs_amemthresh = 1024;
81 static int sysctl_ip_vs_am_droprate = 10;
82 int sysctl_ip_vs_cache_bypass = 0;
83 int sysctl_ip_vs_expire_nodest_conn = 0;
84 int sysctl_ip_vs_expire_quiescent_template = 0;
85 int sysctl_ip_vs_sync_threshold[2] = { 3, 50 };
86 int sysctl_ip_vs_nat_icmp_send = 0;
87
88
89 #ifdef CONFIG_IP_VS_DEBUG
90 static int sysctl_ip_vs_debug_level = 0;
91
92 int ip_vs_get_debug_level(void)
93 {
94         return sysctl_ip_vs_debug_level;
95 }
96 #endif
97
98 #ifdef CONFIG_IP_VS_IPV6
99 /* Taken from rt6_fill_node() in net/ipv6/route.c, is there a better way? */
100 static int __ip_vs_addr_is_local_v6(const struct in6_addr *addr)
101 {
102         struct rt6_info *rt;
103         struct flowi fl = {
104                 .oif = 0,
105                 .nl_u = {
106                         .ip6_u = {
107                                 .daddr = *addr,
108                                 .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } },
109         };
110
111         rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl);
112         if (rt && rt->rt6i_dev && (rt->rt6i_dev->flags & IFF_LOOPBACK))
113                         return 1;
114
115         return 0;
116 }
117 #endif
118 /*
119  *      update_defense_level is called from keventd and from sysctl,
120  *      so it needs to protect itself from softirqs
121  */
122 static void update_defense_level(void)
123 {
124         struct sysinfo i;
125         static int old_secure_tcp = 0;
126         int availmem;
127         int nomem;
128         int to_change = -1;
129
130         /* we only count free and buffered memory (in pages) */
131         si_meminfo(&i);
132         availmem = i.freeram + i.bufferram;
133         /* however in linux 2.5 the i.bufferram is total page cache size,
134            we need adjust it */
135         /* si_swapinfo(&i); */
136         /* availmem = availmem - (i.totalswap - i.freeswap); */
137
138         nomem = (availmem < sysctl_ip_vs_amemthresh);
139
140         local_bh_disable();
141
142         /* drop_entry */
143         spin_lock(&__ip_vs_dropentry_lock);
144         switch (sysctl_ip_vs_drop_entry) {
145         case 0:
146                 atomic_set(&ip_vs_dropentry, 0);
147                 break;
148         case 1:
149                 if (nomem) {
150                         atomic_set(&ip_vs_dropentry, 1);
151                         sysctl_ip_vs_drop_entry = 2;
152                 } else {
153                         atomic_set(&ip_vs_dropentry, 0);
154                 }
155                 break;
156         case 2:
157                 if (nomem) {
158                         atomic_set(&ip_vs_dropentry, 1);
159                 } else {
160                         atomic_set(&ip_vs_dropentry, 0);
161                         sysctl_ip_vs_drop_entry = 1;
162                 };
163                 break;
164         case 3:
165                 atomic_set(&ip_vs_dropentry, 1);
166                 break;
167         }
168         spin_unlock(&__ip_vs_dropentry_lock);
169
170         /* drop_packet */
171         spin_lock(&__ip_vs_droppacket_lock);
172         switch (sysctl_ip_vs_drop_packet) {
173         case 0:
174                 ip_vs_drop_rate = 0;
175                 break;
176         case 1:
177                 if (nomem) {
178                         ip_vs_drop_rate = ip_vs_drop_counter
179                                 = sysctl_ip_vs_amemthresh /
180                                 (sysctl_ip_vs_amemthresh-availmem);
181                         sysctl_ip_vs_drop_packet = 2;
182                 } else {
183                         ip_vs_drop_rate = 0;
184                 }
185                 break;
186         case 2:
187                 if (nomem) {
188                         ip_vs_drop_rate = ip_vs_drop_counter
189                                 = sysctl_ip_vs_amemthresh /
190                                 (sysctl_ip_vs_amemthresh-availmem);
191                 } else {
192                         ip_vs_drop_rate = 0;
193                         sysctl_ip_vs_drop_packet = 1;
194                 }
195                 break;
196         case 3:
197                 ip_vs_drop_rate = sysctl_ip_vs_am_droprate;
198                 break;
199         }
200         spin_unlock(&__ip_vs_droppacket_lock);
201
202         /* secure_tcp */
203         write_lock(&__ip_vs_securetcp_lock);
204         switch (sysctl_ip_vs_secure_tcp) {
205         case 0:
206                 if (old_secure_tcp >= 2)
207                         to_change = 0;
208                 break;
209         case 1:
210                 if (nomem) {
211                         if (old_secure_tcp < 2)
212                                 to_change = 1;
213                         sysctl_ip_vs_secure_tcp = 2;
214                 } else {
215                         if (old_secure_tcp >= 2)
216                                 to_change = 0;
217                 }
218                 break;
219         case 2:
220                 if (nomem) {
221                         if (old_secure_tcp < 2)
222                                 to_change = 1;
223                 } else {
224                         if (old_secure_tcp >= 2)
225                                 to_change = 0;
226                         sysctl_ip_vs_secure_tcp = 1;
227                 }
228                 break;
229         case 3:
230                 if (old_secure_tcp < 2)
231                         to_change = 1;
232                 break;
233         }
234         old_secure_tcp = sysctl_ip_vs_secure_tcp;
235         if (to_change >= 0)
236                 ip_vs_protocol_timeout_change(sysctl_ip_vs_secure_tcp>1);
237         write_unlock(&__ip_vs_securetcp_lock);
238
239         local_bh_enable();
240 }
241
242
243 /*
244  *      Timer for checking the defense
245  */
246 #define DEFENSE_TIMER_PERIOD    1*HZ
247 static void defense_work_handler(struct work_struct *work);
248 static DECLARE_DELAYED_WORK(defense_work, defense_work_handler);
249
250 static void defense_work_handler(struct work_struct *work)
251 {
252         update_defense_level();
253         if (atomic_read(&ip_vs_dropentry))
254                 ip_vs_random_dropentry();
255
256         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
257 }
258
259 int
260 ip_vs_use_count_inc(void)
261 {
262         return try_module_get(THIS_MODULE);
263 }
264
265 void
266 ip_vs_use_count_dec(void)
267 {
268         module_put(THIS_MODULE);
269 }
270
271
272 /*
273  *      Hash table: for virtual service lookups
274  */
275 #define IP_VS_SVC_TAB_BITS 8
276 #define IP_VS_SVC_TAB_SIZE (1 << IP_VS_SVC_TAB_BITS)
277 #define IP_VS_SVC_TAB_MASK (IP_VS_SVC_TAB_SIZE - 1)
278
279 /* the service table hashed by <protocol, addr, port> */
280 static struct list_head ip_vs_svc_table[IP_VS_SVC_TAB_SIZE];
281 /* the service table hashed by fwmark */
282 static struct list_head ip_vs_svc_fwm_table[IP_VS_SVC_TAB_SIZE];
283
284 /*
285  *      Hash table: for real service lookups
286  */
287 #define IP_VS_RTAB_BITS 4
288 #define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
289 #define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
290
291 static struct list_head ip_vs_rtable[IP_VS_RTAB_SIZE];
292
293 /*
294  *      Trash for destinations
295  */
296 static LIST_HEAD(ip_vs_dest_trash);
297
298 /*
299  *      FTP & NULL virtual service counters
300  */
301 static atomic_t ip_vs_ftpsvc_counter = ATOMIC_INIT(0);
302 static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0);
303
304
305 /*
306  *      Returns hash value for virtual service
307  */
308 static __inline__ unsigned
309 ip_vs_svc_hashkey(int af, unsigned proto, const union nf_inet_addr *addr,
310                   __be16 port)
311 {
312         register unsigned porth = ntohs(port);
313         __be32 addr_fold = addr->ip;
314
315 #ifdef CONFIG_IP_VS_IPV6
316         if (af == AF_INET6)
317                 addr_fold = addr->ip6[0]^addr->ip6[1]^
318                             addr->ip6[2]^addr->ip6[3];
319 #endif
320
321         return (proto^ntohl(addr_fold)^(porth>>IP_VS_SVC_TAB_BITS)^porth)
322                 & IP_VS_SVC_TAB_MASK;
323 }
324
325 /*
326  *      Returns hash value of fwmark for virtual service lookup
327  */
328 static __inline__ unsigned ip_vs_svc_fwm_hashkey(__u32 fwmark)
329 {
330         return fwmark & IP_VS_SVC_TAB_MASK;
331 }
332
333 /*
334  *      Hashes a service in the ip_vs_svc_table by <proto,addr,port>
335  *      or in the ip_vs_svc_fwm_table by fwmark.
336  *      Should be called with locked tables.
337  */
338 static int ip_vs_svc_hash(struct ip_vs_service *svc)
339 {
340         unsigned hash;
341
342         if (svc->flags & IP_VS_SVC_F_HASHED) {
343                 IP_VS_ERR("ip_vs_svc_hash(): request for already hashed, "
344                           "called from %p\n", __builtin_return_address(0));
345                 return 0;
346         }
347
348         if (svc->fwmark == 0) {
349                 /*
350                  *  Hash it by <protocol,addr,port> in ip_vs_svc_table
351                  */
352                 hash = ip_vs_svc_hashkey(svc->af, svc->protocol, &svc->addr,
353                                          svc->port);
354                 list_add(&svc->s_list, &ip_vs_svc_table[hash]);
355         } else {
356                 /*
357                  *  Hash it by fwmark in ip_vs_svc_fwm_table
358                  */
359                 hash = ip_vs_svc_fwm_hashkey(svc->fwmark);
360                 list_add(&svc->f_list, &ip_vs_svc_fwm_table[hash]);
361         }
362
363         svc->flags |= IP_VS_SVC_F_HASHED;
364         /* increase its refcnt because it is referenced by the svc table */
365         atomic_inc(&svc->refcnt);
366         return 1;
367 }
368
369
370 /*
371  *      Unhashes a service from ip_vs_svc_table/ip_vs_svc_fwm_table.
372  *      Should be called with locked tables.
373  */
374 static int ip_vs_svc_unhash(struct ip_vs_service *svc)
375 {
376         if (!(svc->flags & IP_VS_SVC_F_HASHED)) {
377                 IP_VS_ERR("ip_vs_svc_unhash(): request for unhash flagged, "
378                           "called from %p\n", __builtin_return_address(0));
379                 return 0;
380         }
381
382         if (svc->fwmark == 0) {
383                 /* Remove it from the ip_vs_svc_table table */
384                 list_del(&svc->s_list);
385         } else {
386                 /* Remove it from the ip_vs_svc_fwm_table table */
387                 list_del(&svc->f_list);
388         }
389
390         svc->flags &= ~IP_VS_SVC_F_HASHED;
391         atomic_dec(&svc->refcnt);
392         return 1;
393 }
394
395
396 /*
397  *      Get service by {proto,addr,port} in the service table.
398  */
399 static inline struct ip_vs_service *
400 __ip_vs_service_get(int af, __u16 protocol, const union nf_inet_addr *vaddr,
401                     __be16 vport)
402 {
403         unsigned hash;
404         struct ip_vs_service *svc;
405
406         /* Check for "full" addressed entries */
407         hash = ip_vs_svc_hashkey(af, protocol, vaddr, vport);
408
409         list_for_each_entry(svc, &ip_vs_svc_table[hash], s_list){
410                 if ((svc->af == af)
411                     && ip_vs_addr_equal(af, &svc->addr, vaddr)
412                     && (svc->port == vport)
413                     && (svc->protocol == protocol)) {
414                         /* HIT */
415                         atomic_inc(&svc->usecnt);
416                         return svc;
417                 }
418         }
419
420         return NULL;
421 }
422
423
424 /*
425  *      Get service by {fwmark} in the service table.
426  */
427 static inline struct ip_vs_service *
428 __ip_vs_svc_fwm_get(int af, __u32 fwmark)
429 {
430         unsigned hash;
431         struct ip_vs_service *svc;
432
433         /* Check for fwmark addressed entries */
434         hash = ip_vs_svc_fwm_hashkey(fwmark);
435
436         list_for_each_entry(svc, &ip_vs_svc_fwm_table[hash], f_list) {
437                 if (svc->fwmark == fwmark && svc->af == af) {
438                         /* HIT */
439                         atomic_inc(&svc->usecnt);
440                         return svc;
441                 }
442         }
443
444         return NULL;
445 }
446
447 struct ip_vs_service *
448 ip_vs_service_get(int af, __u32 fwmark, __u16 protocol,
449                   const union nf_inet_addr *vaddr, __be16 vport)
450 {
451         struct ip_vs_service *svc;
452
453         read_lock(&__ip_vs_svc_lock);
454
455         /*
456          *      Check the table hashed by fwmark first
457          */
458         if (fwmark && (svc = __ip_vs_svc_fwm_get(af, fwmark)))
459                 goto out;
460
461         /*
462          *      Check the table hashed by <protocol,addr,port>
463          *      for "full" addressed entries
464          */
465         svc = __ip_vs_service_get(af, protocol, vaddr, vport);
466
467         if (svc == NULL
468             && protocol == IPPROTO_TCP
469             && atomic_read(&ip_vs_ftpsvc_counter)
470             && (vport == FTPDATA || ntohs(vport) >= PROT_SOCK)) {
471                 /*
472                  * Check if ftp service entry exists, the packet
473                  * might belong to FTP data connections.
474                  */
475                 svc = __ip_vs_service_get(af, protocol, vaddr, FTPPORT);
476         }
477
478         if (svc == NULL
479             && atomic_read(&ip_vs_nullsvc_counter)) {
480                 /*
481                  * Check if the catch-all port (port zero) exists
482                  */
483                 svc = __ip_vs_service_get(af, protocol, vaddr, 0);
484         }
485
486   out:
487         read_unlock(&__ip_vs_svc_lock);
488
489         IP_VS_DBG_BUF(9, "lookup service: fwm %u %s %s:%u %s\n",
490                       fwmark, ip_vs_proto_name(protocol),
491                       IP_VS_DBG_ADDR(af, vaddr), ntohs(vport),
492                       svc ? "hit" : "not hit");
493
494         return svc;
495 }
496
497
498 static inline void
499 __ip_vs_bind_svc(struct ip_vs_dest *dest, struct ip_vs_service *svc)
500 {
501         atomic_inc(&svc->refcnt);
502         dest->svc = svc;
503 }
504
505 static inline void
506 __ip_vs_unbind_svc(struct ip_vs_dest *dest)
507 {
508         struct ip_vs_service *svc = dest->svc;
509
510         dest->svc = NULL;
511         if (atomic_dec_and_test(&svc->refcnt))
512                 kfree(svc);
513 }
514
515
516 /*
517  *      Returns hash value for real service
518  */
519 static inline unsigned ip_vs_rs_hashkey(int af,
520                                             const union nf_inet_addr *addr,
521                                             __be16 port)
522 {
523         register unsigned porth = ntohs(port);
524         __be32 addr_fold = addr->ip;
525
526 #ifdef CONFIG_IP_VS_IPV6
527         if (af == AF_INET6)
528                 addr_fold = addr->ip6[0]^addr->ip6[1]^
529                             addr->ip6[2]^addr->ip6[3];
530 #endif
531
532         return (ntohl(addr_fold)^(porth>>IP_VS_RTAB_BITS)^porth)
533                 & IP_VS_RTAB_MASK;
534 }
535
536 /*
537  *      Hashes ip_vs_dest in ip_vs_rtable by <proto,addr,port>.
538  *      should be called with locked tables.
539  */
540 static int ip_vs_rs_hash(struct ip_vs_dest *dest)
541 {
542         unsigned hash;
543
544         if (!list_empty(&dest->d_list)) {
545                 return 0;
546         }
547
548         /*
549          *      Hash by proto,addr,port,
550          *      which are the parameters of the real service.
551          */
552         hash = ip_vs_rs_hashkey(dest->af, &dest->addr, dest->port);
553
554         list_add(&dest->d_list, &ip_vs_rtable[hash]);
555
556         return 1;
557 }
558
559 /*
560  *      UNhashes ip_vs_dest from ip_vs_rtable.
561  *      should be called with locked tables.
562  */
563 static int ip_vs_rs_unhash(struct ip_vs_dest *dest)
564 {
565         /*
566          * Remove it from the ip_vs_rtable table.
567          */
568         if (!list_empty(&dest->d_list)) {
569                 list_del(&dest->d_list);
570                 INIT_LIST_HEAD(&dest->d_list);
571         }
572
573         return 1;
574 }
575
576 /*
577  *      Lookup real service by <proto,addr,port> in the real service table.
578  */
579 struct ip_vs_dest *
580 ip_vs_lookup_real_service(int af, __u16 protocol,
581                           const union nf_inet_addr *daddr,
582                           __be16 dport)
583 {
584         unsigned hash;
585         struct ip_vs_dest *dest;
586
587         /*
588          *      Check for "full" addressed entries
589          *      Return the first found entry
590          */
591         hash = ip_vs_rs_hashkey(af, daddr, dport);
592
593         read_lock(&__ip_vs_rs_lock);
594         list_for_each_entry(dest, &ip_vs_rtable[hash], d_list) {
595                 if ((dest->af == af)
596                     && ip_vs_addr_equal(af, &dest->addr, daddr)
597                     && (dest->port == dport)
598                     && ((dest->protocol == protocol) ||
599                         dest->vfwmark)) {
600                         /* HIT */
601                         read_unlock(&__ip_vs_rs_lock);
602                         return dest;
603                 }
604         }
605         read_unlock(&__ip_vs_rs_lock);
606
607         return NULL;
608 }
609
610 /*
611  *      Lookup destination by {addr,port} in the given service
612  */
613 static struct ip_vs_dest *
614 ip_vs_lookup_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
615                   __be16 dport)
616 {
617         struct ip_vs_dest *dest;
618
619         /*
620          * Find the destination for the given service
621          */
622         list_for_each_entry(dest, &svc->destinations, n_list) {
623                 if ((dest->af == svc->af)
624                     && ip_vs_addr_equal(svc->af, &dest->addr, daddr)
625                     && (dest->port == dport)) {
626                         /* HIT */
627                         return dest;
628                 }
629         }
630
631         return NULL;
632 }
633
634 /*
635  * Find destination by {daddr,dport,vaddr,protocol}
636  * Cretaed to be used in ip_vs_process_message() in
637  * the backup synchronization daemon. It finds the
638  * destination to be bound to the received connection
639  * on the backup.
640  *
641  * ip_vs_lookup_real_service() looked promissing, but
642  * seems not working as expected.
643  */
644 struct ip_vs_dest *ip_vs_find_dest(int af, const union nf_inet_addr *daddr,
645                                    __be16 dport,
646                                    const union nf_inet_addr *vaddr,
647                                    __be16 vport, __u16 protocol)
648 {
649         struct ip_vs_dest *dest;
650         struct ip_vs_service *svc;
651
652         svc = ip_vs_service_get(af, 0, protocol, vaddr, vport);
653         if (!svc)
654                 return NULL;
655         dest = ip_vs_lookup_dest(svc, daddr, dport);
656         if (dest)
657                 atomic_inc(&dest->refcnt);
658         ip_vs_service_put(svc);
659         return dest;
660 }
661
662 /*
663  *  Lookup dest by {svc,addr,port} in the destination trash.
664  *  The destination trash is used to hold the destinations that are removed
665  *  from the service table but are still referenced by some conn entries.
666  *  The reason to add the destination trash is when the dest is temporary
667  *  down (either by administrator or by monitor program), the dest can be
668  *  picked back from the trash, the remaining connections to the dest can
669  *  continue, and the counting information of the dest is also useful for
670  *  scheduling.
671  */
672 static struct ip_vs_dest *
673 ip_vs_trash_get_dest(struct ip_vs_service *svc, const union nf_inet_addr *daddr,
674                      __be16 dport)
675 {
676         struct ip_vs_dest *dest, *nxt;
677
678         /*
679          * Find the destination in trash
680          */
681         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
682                 IP_VS_DBG_BUF(3, "Destination %u/%s:%u still in trash, "
683                               "dest->refcnt=%d\n",
684                               dest->vfwmark,
685                               IP_VS_DBG_ADDR(svc->af, &dest->addr),
686                               ntohs(dest->port),
687                               atomic_read(&dest->refcnt));
688                 if (dest->af == svc->af &&
689                     ip_vs_addr_equal(svc->af, &dest->addr, daddr) &&
690                     dest->port == dport &&
691                     dest->vfwmark == svc->fwmark &&
692                     dest->protocol == svc->protocol &&
693                     (svc->fwmark ||
694                      (ip_vs_addr_equal(svc->af, &dest->vaddr, &svc->addr) &&
695                       dest->vport == svc->port))) {
696                         /* HIT */
697                         return dest;
698                 }
699
700                 /*
701                  * Try to purge the destination from trash if not referenced
702                  */
703                 if (atomic_read(&dest->refcnt) == 1) {
704                         IP_VS_DBG_BUF(3, "Removing destination %u/%s:%u "
705                                       "from trash\n",
706                                       dest->vfwmark,
707                                       IP_VS_DBG_ADDR(svc->af, &dest->addr),
708                                       ntohs(dest->port));
709                         list_del(&dest->n_list);
710                         ip_vs_dst_reset(dest);
711                         __ip_vs_unbind_svc(dest);
712                         kfree(dest);
713                 }
714         }
715
716         return NULL;
717 }
718
719
720 /*
721  *  Clean up all the destinations in the trash
722  *  Called by the ip_vs_control_cleanup()
723  *
724  *  When the ip_vs_control_clearup is activated by ipvs module exit,
725  *  the service tables must have been flushed and all the connections
726  *  are expired, and the refcnt of each destination in the trash must
727  *  be 1, so we simply release them here.
728  */
729 static void ip_vs_trash_cleanup(void)
730 {
731         struct ip_vs_dest *dest, *nxt;
732
733         list_for_each_entry_safe(dest, nxt, &ip_vs_dest_trash, n_list) {
734                 list_del(&dest->n_list);
735                 ip_vs_dst_reset(dest);
736                 __ip_vs_unbind_svc(dest);
737                 kfree(dest);
738         }
739 }
740
741
742 static void
743 ip_vs_zero_stats(struct ip_vs_stats *stats)
744 {
745         spin_lock_bh(&stats->lock);
746
747         stats->conns = 0;
748         stats->inpkts = 0;
749         stats->outpkts = 0;
750         stats->inbytes = 0;
751         stats->outbytes = 0;
752
753         stats->cps = 0;
754         stats->inpps = 0;
755         stats->outpps = 0;
756         stats->inbps = 0;
757         stats->outbps = 0;
758
759         ip_vs_zero_estimator(stats);
760
761         spin_unlock_bh(&stats->lock);
762 }
763
764 /*
765  *      Update a destination in the given service
766  */
767 static void
768 __ip_vs_update_dest(struct ip_vs_service *svc,
769                     struct ip_vs_dest *dest, struct ip_vs_dest_user_kern *udest)
770 {
771         int conn_flags;
772
773         /* set the weight and the flags */
774         atomic_set(&dest->weight, udest->weight);
775         conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE;
776
777         /* check if local node and update the flags */
778 #ifdef CONFIG_IP_VS_IPV6
779         if (svc->af == AF_INET6) {
780                 if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) {
781                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
782                                 | IP_VS_CONN_F_LOCALNODE;
783                 }
784         } else
785 #endif
786                 if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) {
787                         conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK)
788                                 | IP_VS_CONN_F_LOCALNODE;
789                 }
790
791         /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */
792         if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) {
793                 conn_flags |= IP_VS_CONN_F_NOOUTPUT;
794         } else {
795                 /*
796                  *    Put the real service in ip_vs_rtable if not present.
797                  *    For now only for NAT!
798                  */
799                 write_lock_bh(&__ip_vs_rs_lock);
800                 ip_vs_rs_hash(dest);
801                 write_unlock_bh(&__ip_vs_rs_lock);
802         }
803         atomic_set(&dest->conn_flags, conn_flags);
804
805         /* bind the service */
806         if (!dest->svc) {
807                 __ip_vs_bind_svc(dest, svc);
808         } else {
809                 if (dest->svc != svc) {
810                         __ip_vs_unbind_svc(dest);
811                         ip_vs_zero_stats(&dest->stats);
812                         __ip_vs_bind_svc(dest, svc);
813                 }
814         }
815
816         /* set the dest status flags */
817         dest->flags |= IP_VS_DEST_F_AVAILABLE;
818
819         if (udest->u_threshold == 0 || udest->u_threshold > dest->u_threshold)
820                 dest->flags &= ~IP_VS_DEST_F_OVERLOAD;
821         dest->u_threshold = udest->u_threshold;
822         dest->l_threshold = udest->l_threshold;
823 }
824
825
826 /*
827  *      Create a destination for the given service
828  */
829 static int
830 ip_vs_new_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest,
831                struct ip_vs_dest **dest_p)
832 {
833         struct ip_vs_dest *dest;
834         unsigned atype;
835
836         EnterFunction(2);
837
838 #ifdef CONFIG_IP_VS_IPV6
839         if (svc->af == AF_INET6) {
840                 atype = ipv6_addr_type(&udest->addr.in6);
841                 if (!(atype & IPV6_ADDR_UNICAST) &&
842                         !__ip_vs_addr_is_local_v6(&udest->addr.in6))
843                         return -EINVAL;
844         } else
845 #endif
846         {
847                 atype = inet_addr_type(&init_net, udest->addr.ip);
848                 if (atype != RTN_LOCAL && atype != RTN_UNICAST)
849                         return -EINVAL;
850         }
851
852         dest = kzalloc(sizeof(struct ip_vs_dest), GFP_ATOMIC);
853         if (dest == NULL) {
854                 IP_VS_ERR("ip_vs_new_dest: kmalloc failed.\n");
855                 return -ENOMEM;
856         }
857
858         dest->af = svc->af;
859         dest->protocol = svc->protocol;
860         dest->vaddr = svc->addr;
861         dest->vport = svc->port;
862         dest->vfwmark = svc->fwmark;
863         ip_vs_addr_copy(svc->af, &dest->addr, &udest->addr);
864         dest->port = udest->port;
865
866         atomic_set(&dest->activeconns, 0);
867         atomic_set(&dest->inactconns, 0);
868         atomic_set(&dest->persistconns, 0);
869         atomic_set(&dest->refcnt, 0);
870
871         INIT_LIST_HEAD(&dest->d_list);
872         spin_lock_init(&dest->dst_lock);
873         spin_lock_init(&dest->stats.lock);
874         __ip_vs_update_dest(svc, dest, udest);
875         ip_vs_new_estimator(&dest->stats);
876
877         *dest_p = dest;
878
879         LeaveFunction(2);
880         return 0;
881 }
882
883
884 /*
885  *      Add a destination into an existing service
886  */
887 static int
888 ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
889 {
890         struct ip_vs_dest *dest;
891         union nf_inet_addr daddr;
892         __be16 dport = udest->port;
893         int ret;
894
895         EnterFunction(2);
896
897         if (udest->weight < 0) {
898                 IP_VS_ERR("ip_vs_add_dest(): server weight less than zero\n");
899                 return -ERANGE;
900         }
901
902         if (udest->l_threshold > udest->u_threshold) {
903                 IP_VS_ERR("ip_vs_add_dest(): lower threshold is higher than "
904                           "upper threshold\n");
905                 return -ERANGE;
906         }
907
908         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
909
910         /*
911          * Check if the dest already exists in the list
912          */
913         dest = ip_vs_lookup_dest(svc, &daddr, dport);
914
915         if (dest != NULL) {
916                 IP_VS_DBG(1, "ip_vs_add_dest(): dest already exists\n");
917                 return -EEXIST;
918         }
919
920         /*
921          * Check if the dest already exists in the trash and
922          * is from the same service
923          */
924         dest = ip_vs_trash_get_dest(svc, &daddr, dport);
925
926         if (dest != NULL) {
927                 IP_VS_DBG_BUF(3, "Get destination %s:%u from trash, "
928                               "dest->refcnt=%d, service %u/%s:%u\n",
929                               IP_VS_DBG_ADDR(svc->af, &daddr), ntohs(dport),
930                               atomic_read(&dest->refcnt),
931                               dest->vfwmark,
932                               IP_VS_DBG_ADDR(svc->af, &dest->vaddr),
933                               ntohs(dest->vport));
934
935                 __ip_vs_update_dest(svc, dest, udest);
936
937                 /*
938                  * Get the destination from the trash
939                  */
940                 list_del(&dest->n_list);
941
942                 ip_vs_new_estimator(&dest->stats);
943
944                 write_lock_bh(&__ip_vs_svc_lock);
945
946                 /*
947                  * Wait until all other svc users go away.
948                  */
949                 IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
950
951                 list_add(&dest->n_list, &svc->destinations);
952                 svc->num_dests++;
953
954                 /* call the update_service function of its scheduler */
955                 if (svc->scheduler->update_service)
956                         svc->scheduler->update_service(svc);
957
958                 write_unlock_bh(&__ip_vs_svc_lock);
959                 return 0;
960         }
961
962         /*
963          * Allocate and initialize the dest structure
964          */
965         ret = ip_vs_new_dest(svc, udest, &dest);
966         if (ret) {
967                 return ret;
968         }
969
970         /*
971          * Add the dest entry into the list
972          */
973         atomic_inc(&dest->refcnt);
974
975         write_lock_bh(&__ip_vs_svc_lock);
976
977         /*
978          * Wait until all other svc users go away.
979          */
980         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
981
982         list_add(&dest->n_list, &svc->destinations);
983         svc->num_dests++;
984
985         /* call the update_service function of its scheduler */
986         if (svc->scheduler->update_service)
987                 svc->scheduler->update_service(svc);
988
989         write_unlock_bh(&__ip_vs_svc_lock);
990
991         LeaveFunction(2);
992
993         return 0;
994 }
995
996
997 /*
998  *      Edit a destination in the given service
999  */
1000 static int
1001 ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1002 {
1003         struct ip_vs_dest *dest;
1004         union nf_inet_addr daddr;
1005         __be16 dport = udest->port;
1006
1007         EnterFunction(2);
1008
1009         if (udest->weight < 0) {
1010                 IP_VS_ERR("ip_vs_edit_dest(): server weight less than zero\n");
1011                 return -ERANGE;
1012         }
1013
1014         if (udest->l_threshold > udest->u_threshold) {
1015                 IP_VS_ERR("ip_vs_edit_dest(): lower threshold is higher than "
1016                           "upper threshold\n");
1017                 return -ERANGE;
1018         }
1019
1020         ip_vs_addr_copy(svc->af, &daddr, &udest->addr);
1021
1022         /*
1023          *  Lookup the destination list
1024          */
1025         dest = ip_vs_lookup_dest(svc, &daddr, dport);
1026
1027         if (dest == NULL) {
1028                 IP_VS_DBG(1, "ip_vs_edit_dest(): dest doesn't exist\n");
1029                 return -ENOENT;
1030         }
1031
1032         __ip_vs_update_dest(svc, dest, udest);
1033
1034         write_lock_bh(&__ip_vs_svc_lock);
1035
1036         /* Wait until all other svc users go away */
1037         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1038
1039         /* call the update_service, because server weight may be changed */
1040         if (svc->scheduler->update_service)
1041                 svc->scheduler->update_service(svc);
1042
1043         write_unlock_bh(&__ip_vs_svc_lock);
1044
1045         LeaveFunction(2);
1046
1047         return 0;
1048 }
1049
1050
1051 /*
1052  *      Delete a destination (must be already unlinked from the service)
1053  */
1054 static void __ip_vs_del_dest(struct ip_vs_dest *dest)
1055 {
1056         ip_vs_kill_estimator(&dest->stats);
1057
1058         /*
1059          *  Remove it from the d-linked list with the real services.
1060          */
1061         write_lock_bh(&__ip_vs_rs_lock);
1062         ip_vs_rs_unhash(dest);
1063         write_unlock_bh(&__ip_vs_rs_lock);
1064
1065         /*
1066          *  Decrease the refcnt of the dest, and free the dest
1067          *  if nobody refers to it (refcnt=0). Otherwise, throw
1068          *  the destination into the trash.
1069          */
1070         if (atomic_dec_and_test(&dest->refcnt)) {
1071                 ip_vs_dst_reset(dest);
1072                 /* simply decrease svc->refcnt here, let the caller check
1073                    and release the service if nobody refers to it.
1074                    Only user context can release destination and service,
1075                    and only one user context can update virtual service at a
1076                    time, so the operation here is OK */
1077                 atomic_dec(&dest->svc->refcnt);
1078                 kfree(dest);
1079         } else {
1080                 IP_VS_DBG_BUF(3, "Moving dest %s:%u into trash, "
1081                               "dest->refcnt=%d\n",
1082                               IP_VS_DBG_ADDR(dest->af, &dest->addr),
1083                               ntohs(dest->port),
1084                               atomic_read(&dest->refcnt));
1085                 list_add(&dest->n_list, &ip_vs_dest_trash);
1086                 atomic_inc(&dest->refcnt);
1087         }
1088 }
1089
1090
1091 /*
1092  *      Unlink a destination from the given service
1093  */
1094 static void __ip_vs_unlink_dest(struct ip_vs_service *svc,
1095                                 struct ip_vs_dest *dest,
1096                                 int svcupd)
1097 {
1098         dest->flags &= ~IP_VS_DEST_F_AVAILABLE;
1099
1100         /*
1101          *  Remove it from the d-linked destination list.
1102          */
1103         list_del(&dest->n_list);
1104         svc->num_dests--;
1105
1106         /*
1107          *  Call the update_service function of its scheduler
1108          */
1109         if (svcupd && svc->scheduler->update_service)
1110                         svc->scheduler->update_service(svc);
1111 }
1112
1113
1114 /*
1115  *      Delete a destination server in the given service
1116  */
1117 static int
1118 ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest)
1119 {
1120         struct ip_vs_dest *dest;
1121         __be16 dport = udest->port;
1122
1123         EnterFunction(2);
1124
1125         dest = ip_vs_lookup_dest(svc, &udest->addr, dport);
1126
1127         if (dest == NULL) {
1128                 IP_VS_DBG(1, "ip_vs_del_dest(): destination not found!\n");
1129                 return -ENOENT;
1130         }
1131
1132         write_lock_bh(&__ip_vs_svc_lock);
1133
1134         /*
1135          *      Wait until all other svc users go away.
1136          */
1137         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1138
1139         /*
1140          *      Unlink dest from the service
1141          */
1142         __ip_vs_unlink_dest(svc, dest, 1);
1143
1144         write_unlock_bh(&__ip_vs_svc_lock);
1145
1146         /*
1147          *      Delete the destination
1148          */
1149         __ip_vs_del_dest(dest);
1150
1151         LeaveFunction(2);
1152
1153         return 0;
1154 }
1155
1156
1157 /*
1158  *      Add a service into the service hash table
1159  */
1160 static int
1161 ip_vs_add_service(struct ip_vs_service_user_kern *u,
1162                   struct ip_vs_service **svc_p)
1163 {
1164         int ret = 0;
1165         struct ip_vs_scheduler *sched = NULL;
1166         struct ip_vs_service *svc = NULL;
1167
1168         /* increase the module use count */
1169         ip_vs_use_count_inc();
1170
1171         /* Lookup the scheduler by 'u->sched_name' */
1172         sched = ip_vs_scheduler_get(u->sched_name);
1173         if (sched == NULL) {
1174                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1175                            u->sched_name);
1176                 ret = -ENOENT;
1177                 goto out_mod_dec;
1178         }
1179
1180         svc = kzalloc(sizeof(struct ip_vs_service), GFP_ATOMIC);
1181         if (svc == NULL) {
1182                 IP_VS_DBG(1, "ip_vs_add_service: kmalloc failed.\n");
1183                 ret = -ENOMEM;
1184                 goto out_err;
1185         }
1186
1187         /* I'm the first user of the service */
1188         atomic_set(&svc->usecnt, 1);
1189         atomic_set(&svc->refcnt, 0);
1190
1191         svc->af = u->af;
1192         svc->protocol = u->protocol;
1193         ip_vs_addr_copy(svc->af, &svc->addr, &u->addr);
1194         svc->port = u->port;
1195         svc->fwmark = u->fwmark;
1196         svc->flags = u->flags;
1197         svc->timeout = u->timeout * HZ;
1198         svc->netmask = u->netmask;
1199
1200         INIT_LIST_HEAD(&svc->destinations);
1201         rwlock_init(&svc->sched_lock);
1202         spin_lock_init(&svc->stats.lock);
1203
1204         /* Bind the scheduler */
1205         ret = ip_vs_bind_scheduler(svc, sched);
1206         if (ret)
1207                 goto out_err;
1208         sched = NULL;
1209
1210         /* Update the virtual service counters */
1211         if (svc->port == FTPPORT)
1212                 atomic_inc(&ip_vs_ftpsvc_counter);
1213         else if (svc->port == 0)
1214                 atomic_inc(&ip_vs_nullsvc_counter);
1215
1216         ip_vs_new_estimator(&svc->stats);
1217         ip_vs_num_services++;
1218
1219         /* Hash the service into the service table */
1220         write_lock_bh(&__ip_vs_svc_lock);
1221         ip_vs_svc_hash(svc);
1222         write_unlock_bh(&__ip_vs_svc_lock);
1223
1224         *svc_p = svc;
1225         return 0;
1226
1227   out_err:
1228         if (svc != NULL) {
1229                 if (svc->scheduler)
1230                         ip_vs_unbind_scheduler(svc);
1231                 if (svc->inc) {
1232                         local_bh_disable();
1233                         ip_vs_app_inc_put(svc->inc);
1234                         local_bh_enable();
1235                 }
1236                 kfree(svc);
1237         }
1238         ip_vs_scheduler_put(sched);
1239
1240   out_mod_dec:
1241         /* decrease the module use count */
1242         ip_vs_use_count_dec();
1243
1244         return ret;
1245 }
1246
1247
1248 /*
1249  *      Edit a service and bind it with a new scheduler
1250  */
1251 static int
1252 ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u)
1253 {
1254         struct ip_vs_scheduler *sched, *old_sched;
1255         int ret = 0;
1256
1257         /*
1258          * Lookup the scheduler, by 'u->sched_name'
1259          */
1260         sched = ip_vs_scheduler_get(u->sched_name);
1261         if (sched == NULL) {
1262                 IP_VS_INFO("Scheduler module ip_vs_%s not found\n",
1263                            u->sched_name);
1264                 return -ENOENT;
1265         }
1266         old_sched = sched;
1267
1268         write_lock_bh(&__ip_vs_svc_lock);
1269
1270         /*
1271          * Wait until all other svc users go away.
1272          */
1273         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1274
1275         /*
1276          * Set the flags and timeout value
1277          */
1278         svc->flags = u->flags | IP_VS_SVC_F_HASHED;
1279         svc->timeout = u->timeout * HZ;
1280         svc->netmask = u->netmask;
1281
1282         old_sched = svc->scheduler;
1283         if (sched != old_sched) {
1284                 /*
1285                  * Unbind the old scheduler
1286                  */
1287                 if ((ret = ip_vs_unbind_scheduler(svc))) {
1288                         old_sched = sched;
1289                         goto out;
1290                 }
1291
1292                 /*
1293                  * Bind the new scheduler
1294                  */
1295                 if ((ret = ip_vs_bind_scheduler(svc, sched))) {
1296                         /*
1297                          * If ip_vs_bind_scheduler fails, restore the old
1298                          * scheduler.
1299                          * The main reason of failure is out of memory.
1300                          *
1301                          * The question is if the old scheduler can be
1302                          * restored all the time. TODO: if it cannot be
1303                          * restored some time, we must delete the service,
1304                          * otherwise the system may crash.
1305                          */
1306                         ip_vs_bind_scheduler(svc, old_sched);
1307                         old_sched = sched;
1308                         goto out;
1309                 }
1310         }
1311
1312   out:
1313         write_unlock_bh(&__ip_vs_svc_lock);
1314
1315         if (old_sched)
1316                 ip_vs_scheduler_put(old_sched);
1317
1318         return ret;
1319 }
1320
1321
1322 /*
1323  *      Delete a service from the service list
1324  *      - The service must be unlinked, unlocked and not referenced!
1325  *      - We are called under _bh lock
1326  */
1327 static void __ip_vs_del_service(struct ip_vs_service *svc)
1328 {
1329         struct ip_vs_dest *dest, *nxt;
1330         struct ip_vs_scheduler *old_sched;
1331
1332         ip_vs_num_services--;
1333         ip_vs_kill_estimator(&svc->stats);
1334
1335         /* Unbind scheduler */
1336         old_sched = svc->scheduler;
1337         ip_vs_unbind_scheduler(svc);
1338         if (old_sched)
1339                 ip_vs_scheduler_put(old_sched);
1340
1341         /* Unbind app inc */
1342         if (svc->inc) {
1343                 ip_vs_app_inc_put(svc->inc);
1344                 svc->inc = NULL;
1345         }
1346
1347         /*
1348          *    Unlink the whole destination list
1349          */
1350         list_for_each_entry_safe(dest, nxt, &svc->destinations, n_list) {
1351                 __ip_vs_unlink_dest(svc, dest, 0);
1352                 __ip_vs_del_dest(dest);
1353         }
1354
1355         /*
1356          *    Update the virtual service counters
1357          */
1358         if (svc->port == FTPPORT)
1359                 atomic_dec(&ip_vs_ftpsvc_counter);
1360         else if (svc->port == 0)
1361                 atomic_dec(&ip_vs_nullsvc_counter);
1362
1363         /*
1364          *    Free the service if nobody refers to it
1365          */
1366         if (atomic_read(&svc->refcnt) == 0)
1367                 kfree(svc);
1368
1369         /* decrease the module use count */
1370         ip_vs_use_count_dec();
1371 }
1372
1373 /*
1374  *      Delete a service from the service list
1375  */
1376 static int ip_vs_del_service(struct ip_vs_service *svc)
1377 {
1378         if (svc == NULL)
1379                 return -EEXIST;
1380
1381         /*
1382          * Unhash it from the service table
1383          */
1384         write_lock_bh(&__ip_vs_svc_lock);
1385
1386         ip_vs_svc_unhash(svc);
1387
1388         /*
1389          * Wait until all the svc users go away.
1390          */
1391         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 1);
1392
1393         __ip_vs_del_service(svc);
1394
1395         write_unlock_bh(&__ip_vs_svc_lock);
1396
1397         return 0;
1398 }
1399
1400
1401 /*
1402  *      Flush all the virtual services
1403  */
1404 static int ip_vs_flush(void)
1405 {
1406         int idx;
1407         struct ip_vs_service *svc, *nxt;
1408
1409         /*
1410          * Flush the service table hashed by <protocol,addr,port>
1411          */
1412         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1413                 list_for_each_entry_safe(svc, nxt, &ip_vs_svc_table[idx], s_list) {
1414                         write_lock_bh(&__ip_vs_svc_lock);
1415                         ip_vs_svc_unhash(svc);
1416                         /*
1417                          * Wait until all the svc users go away.
1418                          */
1419                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1420                         __ip_vs_del_service(svc);
1421                         write_unlock_bh(&__ip_vs_svc_lock);
1422                 }
1423         }
1424
1425         /*
1426          * Flush the service table hashed by fwmark
1427          */
1428         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1429                 list_for_each_entry_safe(svc, nxt,
1430                                          &ip_vs_svc_fwm_table[idx], f_list) {
1431                         write_lock_bh(&__ip_vs_svc_lock);
1432                         ip_vs_svc_unhash(svc);
1433                         /*
1434                          * Wait until all the svc users go away.
1435                          */
1436                         IP_VS_WAIT_WHILE(atomic_read(&svc->usecnt) > 0);
1437                         __ip_vs_del_service(svc);
1438                         write_unlock_bh(&__ip_vs_svc_lock);
1439                 }
1440         }
1441
1442         return 0;
1443 }
1444
1445
1446 /*
1447  *      Zero counters in a service or all services
1448  */
1449 static int ip_vs_zero_service(struct ip_vs_service *svc)
1450 {
1451         struct ip_vs_dest *dest;
1452
1453         write_lock_bh(&__ip_vs_svc_lock);
1454         list_for_each_entry(dest, &svc->destinations, n_list) {
1455                 ip_vs_zero_stats(&dest->stats);
1456         }
1457         ip_vs_zero_stats(&svc->stats);
1458         write_unlock_bh(&__ip_vs_svc_lock);
1459         return 0;
1460 }
1461
1462 static int ip_vs_zero_all(void)
1463 {
1464         int idx;
1465         struct ip_vs_service *svc;
1466
1467         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1468                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1469                         ip_vs_zero_service(svc);
1470                 }
1471         }
1472
1473         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1474                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1475                         ip_vs_zero_service(svc);
1476                 }
1477         }
1478
1479         ip_vs_zero_stats(&ip_vs_stats);
1480         return 0;
1481 }
1482
1483
1484 static int
1485 proc_do_defense_mode(ctl_table *table, int write, struct file * filp,
1486                      void __user *buffer, size_t *lenp, loff_t *ppos)
1487 {
1488         int *valp = table->data;
1489         int val = *valp;
1490         int rc;
1491
1492         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1493         if (write && (*valp != val)) {
1494                 if ((*valp < 0) || (*valp > 3)) {
1495                         /* Restore the correct value */
1496                         *valp = val;
1497                 } else {
1498                         update_defense_level();
1499                 }
1500         }
1501         return rc;
1502 }
1503
1504
1505 static int
1506 proc_do_sync_threshold(ctl_table *table, int write, struct file *filp,
1507                        void __user *buffer, size_t *lenp, loff_t *ppos)
1508 {
1509         int *valp = table->data;
1510         int val[2];
1511         int rc;
1512
1513         /* backup the value first */
1514         memcpy(val, valp, sizeof(val));
1515
1516         rc = proc_dointvec(table, write, filp, buffer, lenp, ppos);
1517         if (write && (valp[0] < 0 || valp[1] < 0 || valp[0] >= valp[1])) {
1518                 /* Restore the correct value */
1519                 memcpy(valp, val, sizeof(val));
1520         }
1521         return rc;
1522 }
1523
1524
1525 /*
1526  *      IPVS sysctl table (under the /proc/sys/net/ipv4/vs/)
1527  */
1528
1529 static struct ctl_table vs_vars[] = {
1530         {
1531                 .procname       = "amemthresh",
1532                 .data           = &sysctl_ip_vs_amemthresh,
1533                 .maxlen         = sizeof(int),
1534                 .mode           = 0644,
1535                 .proc_handler   = &proc_dointvec,
1536         },
1537 #ifdef CONFIG_IP_VS_DEBUG
1538         {
1539                 .procname       = "debug_level",
1540                 .data           = &sysctl_ip_vs_debug_level,
1541                 .maxlen         = sizeof(int),
1542                 .mode           = 0644,
1543                 .proc_handler   = &proc_dointvec,
1544         },
1545 #endif
1546         {
1547                 .procname       = "am_droprate",
1548                 .data           = &sysctl_ip_vs_am_droprate,
1549                 .maxlen         = sizeof(int),
1550                 .mode           = 0644,
1551                 .proc_handler   = &proc_dointvec,
1552         },
1553         {
1554                 .procname       = "drop_entry",
1555                 .data           = &sysctl_ip_vs_drop_entry,
1556                 .maxlen         = sizeof(int),
1557                 .mode           = 0644,
1558                 .proc_handler   = &proc_do_defense_mode,
1559         },
1560         {
1561                 .procname       = "drop_packet",
1562                 .data           = &sysctl_ip_vs_drop_packet,
1563                 .maxlen         = sizeof(int),
1564                 .mode           = 0644,
1565                 .proc_handler   = &proc_do_defense_mode,
1566         },
1567         {
1568                 .procname       = "secure_tcp",
1569                 .data           = &sysctl_ip_vs_secure_tcp,
1570                 .maxlen         = sizeof(int),
1571                 .mode           = 0644,
1572                 .proc_handler   = &proc_do_defense_mode,
1573         },
1574 #if 0
1575         {
1576                 .procname       = "timeout_established",
1577                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED],
1578                 .maxlen         = sizeof(int),
1579                 .mode           = 0644,
1580                 .proc_handler   = &proc_dointvec_jiffies,
1581         },
1582         {
1583                 .procname       = "timeout_synsent",
1584                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT],
1585                 .maxlen         = sizeof(int),
1586                 .mode           = 0644,
1587                 .proc_handler   = &proc_dointvec_jiffies,
1588         },
1589         {
1590                 .procname       = "timeout_synrecv",
1591                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV],
1592                 .maxlen         = sizeof(int),
1593                 .mode           = 0644,
1594                 .proc_handler   = &proc_dointvec_jiffies,
1595         },
1596         {
1597                 .procname       = "timeout_finwait",
1598                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT],
1599                 .maxlen         = sizeof(int),
1600                 .mode           = 0644,
1601                 .proc_handler   = &proc_dointvec_jiffies,
1602         },
1603         {
1604                 .procname       = "timeout_timewait",
1605                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT],
1606                 .maxlen         = sizeof(int),
1607                 .mode           = 0644,
1608                 .proc_handler   = &proc_dointvec_jiffies,
1609         },
1610         {
1611                 .procname       = "timeout_close",
1612                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE],
1613                 .maxlen         = sizeof(int),
1614                 .mode           = 0644,
1615                 .proc_handler   = &proc_dointvec_jiffies,
1616         },
1617         {
1618                 .procname       = "timeout_closewait",
1619                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT],
1620                 .maxlen         = sizeof(int),
1621                 .mode           = 0644,
1622                 .proc_handler   = &proc_dointvec_jiffies,
1623         },
1624         {
1625                 .procname       = "timeout_lastack",
1626                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK],
1627                 .maxlen         = sizeof(int),
1628                 .mode           = 0644,
1629                 .proc_handler   = &proc_dointvec_jiffies,
1630         },
1631         {
1632                 .procname       = "timeout_listen",
1633                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN],
1634                 .maxlen         = sizeof(int),
1635                 .mode           = 0644,
1636                 .proc_handler   = &proc_dointvec_jiffies,
1637         },
1638         {
1639                 .procname       = "timeout_synack",
1640                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK],
1641                 .maxlen         = sizeof(int),
1642                 .mode           = 0644,
1643                 .proc_handler   = &proc_dointvec_jiffies,
1644         },
1645         {
1646                 .procname       = "timeout_udp",
1647                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_UDP],
1648                 .maxlen         = sizeof(int),
1649                 .mode           = 0644,
1650                 .proc_handler   = &proc_dointvec_jiffies,
1651         },
1652         {
1653                 .procname       = "timeout_icmp",
1654                 .data   = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP],
1655                 .maxlen         = sizeof(int),
1656                 .mode           = 0644,
1657                 .proc_handler   = &proc_dointvec_jiffies,
1658         },
1659 #endif
1660         {
1661                 .procname       = "cache_bypass",
1662                 .data           = &sysctl_ip_vs_cache_bypass,
1663                 .maxlen         = sizeof(int),
1664                 .mode           = 0644,
1665                 .proc_handler   = &proc_dointvec,
1666         },
1667         {
1668                 .procname       = "expire_nodest_conn",
1669                 .data           = &sysctl_ip_vs_expire_nodest_conn,
1670                 .maxlen         = sizeof(int),
1671                 .mode           = 0644,
1672                 .proc_handler   = &proc_dointvec,
1673         },
1674         {
1675                 .procname       = "expire_quiescent_template",
1676                 .data           = &sysctl_ip_vs_expire_quiescent_template,
1677                 .maxlen         = sizeof(int),
1678                 .mode           = 0644,
1679                 .proc_handler   = &proc_dointvec,
1680         },
1681         {
1682                 .procname       = "sync_threshold",
1683                 .data           = &sysctl_ip_vs_sync_threshold,
1684                 .maxlen         = sizeof(sysctl_ip_vs_sync_threshold),
1685                 .mode           = 0644,
1686                 .proc_handler   = &proc_do_sync_threshold,
1687         },
1688         {
1689                 .procname       = "nat_icmp_send",
1690                 .data           = &sysctl_ip_vs_nat_icmp_send,
1691                 .maxlen         = sizeof(int),
1692                 .mode           = 0644,
1693                 .proc_handler   = &proc_dointvec,
1694         },
1695         { .ctl_name = 0 }
1696 };
1697
1698 const struct ctl_path net_vs_ctl_path[] = {
1699         { .procname = "net", .ctl_name = CTL_NET, },
1700         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1701         { .procname = "vs", },
1702         { }
1703 };
1704 EXPORT_SYMBOL_GPL(net_vs_ctl_path);
1705
1706 static struct ctl_table_header * sysctl_header;
1707
1708 #ifdef CONFIG_PROC_FS
1709
1710 struct ip_vs_iter {
1711         struct list_head *table;
1712         int bucket;
1713 };
1714
1715 /*
1716  *      Write the contents of the VS rule table to a PROCfs file.
1717  *      (It is kept just for backward compatibility)
1718  */
1719 static inline const char *ip_vs_fwd_name(unsigned flags)
1720 {
1721         switch (flags & IP_VS_CONN_F_FWD_MASK) {
1722         case IP_VS_CONN_F_LOCALNODE:
1723                 return "Local";
1724         case IP_VS_CONN_F_TUNNEL:
1725                 return "Tunnel";
1726         case IP_VS_CONN_F_DROUTE:
1727                 return "Route";
1728         default:
1729                 return "Masq";
1730         }
1731 }
1732
1733
1734 /* Get the Nth entry in the two lists */
1735 static struct ip_vs_service *ip_vs_info_array(struct seq_file *seq, loff_t pos)
1736 {
1737         struct ip_vs_iter *iter = seq->private;
1738         int idx;
1739         struct ip_vs_service *svc;
1740
1741         /* look in hash by protocol */
1742         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1743                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
1744                         if (pos-- == 0){
1745                                 iter->table = ip_vs_svc_table;
1746                                 iter->bucket = idx;
1747                                 return svc;
1748                         }
1749                 }
1750         }
1751
1752         /* keep looking in fwmark */
1753         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
1754                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
1755                         if (pos-- == 0) {
1756                                 iter->table = ip_vs_svc_fwm_table;
1757                                 iter->bucket = idx;
1758                                 return svc;
1759                         }
1760                 }
1761         }
1762
1763         return NULL;
1764 }
1765
1766 static void *ip_vs_info_seq_start(struct seq_file *seq, loff_t *pos)
1767 {
1768
1769         read_lock_bh(&__ip_vs_svc_lock);
1770         return *pos ? ip_vs_info_array(seq, *pos - 1) : SEQ_START_TOKEN;
1771 }
1772
1773
1774 static void *ip_vs_info_seq_next(struct seq_file *seq, void *v, loff_t *pos)
1775 {
1776         struct list_head *e;
1777         struct ip_vs_iter *iter;
1778         struct ip_vs_service *svc;
1779
1780         ++*pos;
1781         if (v == SEQ_START_TOKEN)
1782                 return ip_vs_info_array(seq,0);
1783
1784         svc = v;
1785         iter = seq->private;
1786
1787         if (iter->table == ip_vs_svc_table) {
1788                 /* next service in table hashed by protocol */
1789                 if ((e = svc->s_list.next) != &ip_vs_svc_table[iter->bucket])
1790                         return list_entry(e, struct ip_vs_service, s_list);
1791
1792
1793                 while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1794                         list_for_each_entry(svc,&ip_vs_svc_table[iter->bucket],
1795                                             s_list) {
1796                                 return svc;
1797                         }
1798                 }
1799
1800                 iter->table = ip_vs_svc_fwm_table;
1801                 iter->bucket = -1;
1802                 goto scan_fwmark;
1803         }
1804
1805         /* next service in hashed by fwmark */
1806         if ((e = svc->f_list.next) != &ip_vs_svc_fwm_table[iter->bucket])
1807                 return list_entry(e, struct ip_vs_service, f_list);
1808
1809  scan_fwmark:
1810         while (++iter->bucket < IP_VS_SVC_TAB_SIZE) {
1811                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[iter->bucket],
1812                                     f_list)
1813                         return svc;
1814         }
1815
1816         return NULL;
1817 }
1818
1819 static void ip_vs_info_seq_stop(struct seq_file *seq, void *v)
1820 {
1821         read_unlock_bh(&__ip_vs_svc_lock);
1822 }
1823
1824
1825 static int ip_vs_info_seq_show(struct seq_file *seq, void *v)
1826 {
1827         if (v == SEQ_START_TOKEN) {
1828                 seq_printf(seq,
1829                         "IP Virtual Server version %d.%d.%d (size=%d)\n",
1830                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
1831                 seq_puts(seq,
1832                          "Prot LocalAddress:Port Scheduler Flags\n");
1833                 seq_puts(seq,
1834                          "  -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n");
1835         } else {
1836                 const struct ip_vs_service *svc = v;
1837                 const struct ip_vs_iter *iter = seq->private;
1838                 const struct ip_vs_dest *dest;
1839
1840                 if (iter->table == ip_vs_svc_table) {
1841 #ifdef CONFIG_IP_VS_IPV6
1842                         if (svc->af == AF_INET6)
1843                                 seq_printf(seq, "%s  [" NIP6_FMT "]:%04X %s ",
1844                                            ip_vs_proto_name(svc->protocol),
1845                                            NIP6(svc->addr.in6),
1846                                            ntohs(svc->port),
1847                                            svc->scheduler->name);
1848                         else
1849 #endif
1850                                 seq_printf(seq, "%s  %08X:%04X %s ",
1851                                            ip_vs_proto_name(svc->protocol),
1852                                            ntohl(svc->addr.ip),
1853                                            ntohs(svc->port),
1854                                            svc->scheduler->name);
1855                 } else {
1856                         seq_printf(seq, "FWM  %08X %s ",
1857                                    svc->fwmark, svc->scheduler->name);
1858                 }
1859
1860                 if (svc->flags & IP_VS_SVC_F_PERSISTENT)
1861                         seq_printf(seq, "persistent %d %08X\n",
1862                                 svc->timeout,
1863                                 ntohl(svc->netmask));
1864                 else
1865                         seq_putc(seq, '\n');
1866
1867                 list_for_each_entry(dest, &svc->destinations, n_list) {
1868 #ifdef CONFIG_IP_VS_IPV6
1869                         if (dest->af == AF_INET6)
1870                                 seq_printf(seq,
1871                                            "  -> [" NIP6_FMT "]:%04X"
1872                                            "      %-7s %-6d %-10d %-10d\n",
1873                                            NIP6(dest->addr.in6),
1874                                            ntohs(dest->port),
1875                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1876                                            atomic_read(&dest->weight),
1877                                            atomic_read(&dest->activeconns),
1878                                            atomic_read(&dest->inactconns));
1879                         else
1880 #endif
1881                                 seq_printf(seq,
1882                                            "  -> %08X:%04X      "
1883                                            "%-7s %-6d %-10d %-10d\n",
1884                                            ntohl(dest->addr.ip),
1885                                            ntohs(dest->port),
1886                                            ip_vs_fwd_name(atomic_read(&dest->conn_flags)),
1887                                            atomic_read(&dest->weight),
1888                                            atomic_read(&dest->activeconns),
1889                                            atomic_read(&dest->inactconns));
1890
1891                 }
1892         }
1893         return 0;
1894 }
1895
1896 static const struct seq_operations ip_vs_info_seq_ops = {
1897         .start = ip_vs_info_seq_start,
1898         .next  = ip_vs_info_seq_next,
1899         .stop  = ip_vs_info_seq_stop,
1900         .show  = ip_vs_info_seq_show,
1901 };
1902
1903 static int ip_vs_info_open(struct inode *inode, struct file *file)
1904 {
1905         return seq_open_private(file, &ip_vs_info_seq_ops,
1906                         sizeof(struct ip_vs_iter));
1907 }
1908
1909 static const struct file_operations ip_vs_info_fops = {
1910         .owner   = THIS_MODULE,
1911         .open    = ip_vs_info_open,
1912         .read    = seq_read,
1913         .llseek  = seq_lseek,
1914         .release = seq_release_private,
1915 };
1916
1917 #endif
1918
1919 struct ip_vs_stats ip_vs_stats = {
1920         .lock = __SPIN_LOCK_UNLOCKED(ip_vs_stats.lock),
1921 };
1922
1923 #ifdef CONFIG_PROC_FS
1924 static int ip_vs_stats_show(struct seq_file *seq, void *v)
1925 {
1926
1927 /*               01234567 01234567 01234567 0123456701234567 0123456701234567 */
1928         seq_puts(seq,
1929                  "   Total Incoming Outgoing         Incoming         Outgoing\n");
1930         seq_printf(seq,
1931                    "   Conns  Packets  Packets            Bytes            Bytes\n");
1932
1933         spin_lock_bh(&ip_vs_stats.lock);
1934         seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", ip_vs_stats.conns,
1935                    ip_vs_stats.inpkts, ip_vs_stats.outpkts,
1936                    (unsigned long long) ip_vs_stats.inbytes,
1937                    (unsigned long long) ip_vs_stats.outbytes);
1938
1939 /*                 01234567 01234567 01234567 0123456701234567 0123456701234567 */
1940         seq_puts(seq,
1941                    " Conns/s   Pkts/s   Pkts/s          Bytes/s          Bytes/s\n");
1942         seq_printf(seq,"%8X %8X %8X %16X %16X\n",
1943                         ip_vs_stats.cps,
1944                         ip_vs_stats.inpps,
1945                         ip_vs_stats.outpps,
1946                         ip_vs_stats.inbps,
1947                         ip_vs_stats.outbps);
1948         spin_unlock_bh(&ip_vs_stats.lock);
1949
1950         return 0;
1951 }
1952
1953 static int ip_vs_stats_seq_open(struct inode *inode, struct file *file)
1954 {
1955         return single_open(file, ip_vs_stats_show, NULL);
1956 }
1957
1958 static const struct file_operations ip_vs_stats_fops = {
1959         .owner = THIS_MODULE,
1960         .open = ip_vs_stats_seq_open,
1961         .read = seq_read,
1962         .llseek = seq_lseek,
1963         .release = single_release,
1964 };
1965
1966 #endif
1967
1968 /*
1969  *      Set timeout values for tcp tcpfin udp in the timeout_table.
1970  */
1971 static int ip_vs_set_timeout(struct ip_vs_timeout_user *u)
1972 {
1973         IP_VS_DBG(2, "Setting timeout tcp:%d tcpfin:%d udp:%d\n",
1974                   u->tcp_timeout,
1975                   u->tcp_fin_timeout,
1976                   u->udp_timeout);
1977
1978 #ifdef CONFIG_IP_VS_PROTO_TCP
1979         if (u->tcp_timeout) {
1980                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED]
1981                         = u->tcp_timeout * HZ;
1982         }
1983
1984         if (u->tcp_fin_timeout) {
1985                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT]
1986                         = u->tcp_fin_timeout * HZ;
1987         }
1988 #endif
1989
1990 #ifdef CONFIG_IP_VS_PROTO_UDP
1991         if (u->udp_timeout) {
1992                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL]
1993                         = u->udp_timeout * HZ;
1994         }
1995 #endif
1996         return 0;
1997 }
1998
1999
2000 #define SET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2001 #define SERVICE_ARG_LEN         (sizeof(struct ip_vs_service_user))
2002 #define SVCDEST_ARG_LEN         (sizeof(struct ip_vs_service_user) +    \
2003                                  sizeof(struct ip_vs_dest_user))
2004 #define TIMEOUT_ARG_LEN         (sizeof(struct ip_vs_timeout_user))
2005 #define DAEMON_ARG_LEN          (sizeof(struct ip_vs_daemon_user))
2006 #define MAX_ARG_LEN             SVCDEST_ARG_LEN
2007
2008 static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = {
2009         [SET_CMDID(IP_VS_SO_SET_ADD)]           = SERVICE_ARG_LEN,
2010         [SET_CMDID(IP_VS_SO_SET_EDIT)]          = SERVICE_ARG_LEN,
2011         [SET_CMDID(IP_VS_SO_SET_DEL)]           = SERVICE_ARG_LEN,
2012         [SET_CMDID(IP_VS_SO_SET_FLUSH)]         = 0,
2013         [SET_CMDID(IP_VS_SO_SET_ADDDEST)]       = SVCDEST_ARG_LEN,
2014         [SET_CMDID(IP_VS_SO_SET_DELDEST)]       = SVCDEST_ARG_LEN,
2015         [SET_CMDID(IP_VS_SO_SET_EDITDEST)]      = SVCDEST_ARG_LEN,
2016         [SET_CMDID(IP_VS_SO_SET_TIMEOUT)]       = TIMEOUT_ARG_LEN,
2017         [SET_CMDID(IP_VS_SO_SET_STARTDAEMON)]   = DAEMON_ARG_LEN,
2018         [SET_CMDID(IP_VS_SO_SET_STOPDAEMON)]    = DAEMON_ARG_LEN,
2019         [SET_CMDID(IP_VS_SO_SET_ZERO)]          = SERVICE_ARG_LEN,
2020 };
2021
2022 static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc,
2023                                   struct ip_vs_service_user *usvc_compat)
2024 {
2025         usvc->af                = AF_INET;
2026         usvc->protocol          = usvc_compat->protocol;
2027         usvc->addr.ip           = usvc_compat->addr;
2028         usvc->port              = usvc_compat->port;
2029         usvc->fwmark            = usvc_compat->fwmark;
2030
2031         /* Deep copy of sched_name is not needed here */
2032         usvc->sched_name        = usvc_compat->sched_name;
2033
2034         usvc->flags             = usvc_compat->flags;
2035         usvc->timeout           = usvc_compat->timeout;
2036         usvc->netmask           = usvc_compat->netmask;
2037 }
2038
2039 static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest,
2040                                    struct ip_vs_dest_user *udest_compat)
2041 {
2042         udest->addr.ip          = udest_compat->addr;
2043         udest->port             = udest_compat->port;
2044         udest->conn_flags       = udest_compat->conn_flags;
2045         udest->weight           = udest_compat->weight;
2046         udest->u_threshold      = udest_compat->u_threshold;
2047         udest->l_threshold      = udest_compat->l_threshold;
2048 }
2049
2050 static int
2051 do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2052 {
2053         int ret;
2054         unsigned char arg[MAX_ARG_LEN];
2055         struct ip_vs_service_user *usvc_compat;
2056         struct ip_vs_service_user_kern usvc;
2057         struct ip_vs_service *svc;
2058         struct ip_vs_dest_user *udest_compat;
2059         struct ip_vs_dest_user_kern udest;
2060
2061         if (!capable(CAP_NET_ADMIN))
2062                 return -EPERM;
2063
2064         if (len != set_arglen[SET_CMDID(cmd)]) {
2065                 IP_VS_ERR("set_ctl: len %u != %u\n",
2066                           len, set_arglen[SET_CMDID(cmd)]);
2067                 return -EINVAL;
2068         }
2069
2070         if (copy_from_user(arg, user, len) != 0)
2071                 return -EFAULT;
2072
2073         /* increase the module use count */
2074         ip_vs_use_count_inc();
2075
2076         if (mutex_lock_interruptible(&__ip_vs_mutex)) {
2077                 ret = -ERESTARTSYS;
2078                 goto out_dec;
2079         }
2080
2081         if (cmd == IP_VS_SO_SET_FLUSH) {
2082                 /* Flush the virtual service */
2083                 ret = ip_vs_flush();
2084                 goto out_unlock;
2085         } else if (cmd == IP_VS_SO_SET_TIMEOUT) {
2086                 /* Set timeout values for (tcp tcpfin udp) */
2087                 ret = ip_vs_set_timeout((struct ip_vs_timeout_user *)arg);
2088                 goto out_unlock;
2089         } else if (cmd == IP_VS_SO_SET_STARTDAEMON) {
2090                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2091                 ret = start_sync_thread(dm->state, dm->mcast_ifn, dm->syncid);
2092                 goto out_unlock;
2093         } else if (cmd == IP_VS_SO_SET_STOPDAEMON) {
2094                 struct ip_vs_daemon_user *dm = (struct ip_vs_daemon_user *)arg;
2095                 ret = stop_sync_thread(dm->state);
2096                 goto out_unlock;
2097         }
2098
2099         usvc_compat = (struct ip_vs_service_user *)arg;
2100         udest_compat = (struct ip_vs_dest_user *)(usvc_compat + 1);
2101
2102         /* We only use the new structs internally, so copy userspace compat
2103          * structs to extended internal versions */
2104         ip_vs_copy_usvc_compat(&usvc, usvc_compat);
2105         ip_vs_copy_udest_compat(&udest, udest_compat);
2106
2107         if (cmd == IP_VS_SO_SET_ZERO) {
2108                 /* if no service address is set, zero counters in all */
2109                 if (!usvc.fwmark && !usvc.addr.ip && !usvc.port) {
2110                         ret = ip_vs_zero_all();
2111                         goto out_unlock;
2112                 }
2113         }
2114
2115         /* Check for valid protocol: TCP or UDP, even for fwmark!=0 */
2116         if (usvc.protocol != IPPROTO_TCP && usvc.protocol != IPPROTO_UDP) {
2117                 IP_VS_ERR("set_ctl: invalid protocol: %d %d.%d.%d.%d:%d %s\n",
2118                           usvc.protocol, NIPQUAD(usvc.addr.ip),
2119                           ntohs(usvc.port), usvc.sched_name);
2120                 ret = -EFAULT;
2121                 goto out_unlock;
2122         }
2123
2124         /* Lookup the exact service by <protocol, addr, port> or fwmark */
2125         if (usvc.fwmark == 0)
2126                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
2127                                           &usvc.addr, usvc.port);
2128         else
2129                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2130
2131         if (cmd != IP_VS_SO_SET_ADD
2132             && (svc == NULL || svc->protocol != usvc.protocol)) {
2133                 ret = -ESRCH;
2134                 goto out_unlock;
2135         }
2136
2137         switch (cmd) {
2138         case IP_VS_SO_SET_ADD:
2139                 if (svc != NULL)
2140                         ret = -EEXIST;
2141                 else
2142                         ret = ip_vs_add_service(&usvc, &svc);
2143                 break;
2144         case IP_VS_SO_SET_EDIT:
2145                 ret = ip_vs_edit_service(svc, &usvc);
2146                 break;
2147         case IP_VS_SO_SET_DEL:
2148                 ret = ip_vs_del_service(svc);
2149                 if (!ret)
2150                         goto out_unlock;
2151                 break;
2152         case IP_VS_SO_SET_ZERO:
2153                 ret = ip_vs_zero_service(svc);
2154                 break;
2155         case IP_VS_SO_SET_ADDDEST:
2156                 ret = ip_vs_add_dest(svc, &udest);
2157                 break;
2158         case IP_VS_SO_SET_EDITDEST:
2159                 ret = ip_vs_edit_dest(svc, &udest);
2160                 break;
2161         case IP_VS_SO_SET_DELDEST:
2162                 ret = ip_vs_del_dest(svc, &udest);
2163                 break;
2164         default:
2165                 ret = -EINVAL;
2166         }
2167
2168         if (svc)
2169                 ip_vs_service_put(svc);
2170
2171   out_unlock:
2172         mutex_unlock(&__ip_vs_mutex);
2173   out_dec:
2174         /* decrease the module use count */
2175         ip_vs_use_count_dec();
2176
2177         return ret;
2178 }
2179
2180
2181 static void
2182 ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
2183 {
2184         spin_lock_bh(&src->lock);
2185         memcpy(dst, src, (char*)&src->lock - (char*)src);
2186         spin_unlock_bh(&src->lock);
2187 }
2188
2189 static void
2190 ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
2191 {
2192         dst->protocol = src->protocol;
2193         dst->addr = src->addr.ip;
2194         dst->port = src->port;
2195         dst->fwmark = src->fwmark;
2196         strlcpy(dst->sched_name, src->scheduler->name, sizeof(dst->sched_name));
2197         dst->flags = src->flags;
2198         dst->timeout = src->timeout / HZ;
2199         dst->netmask = src->netmask;
2200         dst->num_dests = src->num_dests;
2201         ip_vs_copy_stats(&dst->stats, &src->stats);
2202 }
2203
2204 static inline int
2205 __ip_vs_get_service_entries(const struct ip_vs_get_services *get,
2206                             struct ip_vs_get_services __user *uptr)
2207 {
2208         int idx, count=0;
2209         struct ip_vs_service *svc;
2210         struct ip_vs_service_entry entry;
2211         int ret = 0;
2212
2213         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2214                 list_for_each_entry(svc, &ip_vs_svc_table[idx], s_list) {
2215                         if (count >= get->num_services)
2216                                 goto out;
2217                         memset(&entry, 0, sizeof(entry));
2218                         ip_vs_copy_service(&entry, svc);
2219                         if (copy_to_user(&uptr->entrytable[count],
2220                                          &entry, sizeof(entry))) {
2221                                 ret = -EFAULT;
2222                                 goto out;
2223                         }
2224                         count++;
2225                 }
2226         }
2227
2228         for (idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++) {
2229                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[idx], f_list) {
2230                         if (count >= get->num_services)
2231                                 goto out;
2232                         memset(&entry, 0, sizeof(entry));
2233                         ip_vs_copy_service(&entry, svc);
2234                         if (copy_to_user(&uptr->entrytable[count],
2235                                          &entry, sizeof(entry))) {
2236                                 ret = -EFAULT;
2237                                 goto out;
2238                         }
2239                         count++;
2240                 }
2241         }
2242   out:
2243         return ret;
2244 }
2245
2246 static inline int
2247 __ip_vs_get_dest_entries(const struct ip_vs_get_dests *get,
2248                          struct ip_vs_get_dests __user *uptr)
2249 {
2250         struct ip_vs_service *svc;
2251         union nf_inet_addr addr = { .ip = get->addr };
2252         int ret = 0;
2253
2254         if (get->fwmark)
2255                 svc = __ip_vs_svc_fwm_get(AF_INET, get->fwmark);
2256         else
2257                 svc = __ip_vs_service_get(AF_INET, get->protocol, &addr,
2258                                           get->port);
2259
2260         if (svc) {
2261                 int count = 0;
2262                 struct ip_vs_dest *dest;
2263                 struct ip_vs_dest_entry entry;
2264
2265                 list_for_each_entry(dest, &svc->destinations, n_list) {
2266                         if (count >= get->num_dests)
2267                                 break;
2268
2269                         entry.addr = dest->addr.ip;
2270                         entry.port = dest->port;
2271                         entry.conn_flags = atomic_read(&dest->conn_flags);
2272                         entry.weight = atomic_read(&dest->weight);
2273                         entry.u_threshold = dest->u_threshold;
2274                         entry.l_threshold = dest->l_threshold;
2275                         entry.activeconns = atomic_read(&dest->activeconns);
2276                         entry.inactconns = atomic_read(&dest->inactconns);
2277                         entry.persistconns = atomic_read(&dest->persistconns);
2278                         ip_vs_copy_stats(&entry.stats, &dest->stats);
2279                         if (copy_to_user(&uptr->entrytable[count],
2280                                          &entry, sizeof(entry))) {
2281                                 ret = -EFAULT;
2282                                 break;
2283                         }
2284                         count++;
2285                 }
2286                 ip_vs_service_put(svc);
2287         } else
2288                 ret = -ESRCH;
2289         return ret;
2290 }
2291
2292 static inline void
2293 __ip_vs_get_timeouts(struct ip_vs_timeout_user *u)
2294 {
2295 #ifdef CONFIG_IP_VS_PROTO_TCP
2296         u->tcp_timeout =
2297                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_ESTABLISHED] / HZ;
2298         u->tcp_fin_timeout =
2299                 ip_vs_protocol_tcp.timeout_table[IP_VS_TCP_S_FIN_WAIT] / HZ;
2300 #endif
2301 #ifdef CONFIG_IP_VS_PROTO_UDP
2302         u->udp_timeout =
2303                 ip_vs_protocol_udp.timeout_table[IP_VS_UDP_S_NORMAL] / HZ;
2304 #endif
2305 }
2306
2307
2308 #define GET_CMDID(cmd)          (cmd - IP_VS_BASE_CTL)
2309 #define GET_INFO_ARG_LEN        (sizeof(struct ip_vs_getinfo))
2310 #define GET_SERVICES_ARG_LEN    (sizeof(struct ip_vs_get_services))
2311 #define GET_SERVICE_ARG_LEN     (sizeof(struct ip_vs_service_entry))
2312 #define GET_DESTS_ARG_LEN       (sizeof(struct ip_vs_get_dests))
2313 #define GET_TIMEOUT_ARG_LEN     (sizeof(struct ip_vs_timeout_user))
2314 #define GET_DAEMON_ARG_LEN      (sizeof(struct ip_vs_daemon_user) * 2)
2315
2316 static const unsigned char get_arglen[GET_CMDID(IP_VS_SO_GET_MAX)+1] = {
2317         [GET_CMDID(IP_VS_SO_GET_VERSION)]       = 64,
2318         [GET_CMDID(IP_VS_SO_GET_INFO)]          = GET_INFO_ARG_LEN,
2319         [GET_CMDID(IP_VS_SO_GET_SERVICES)]      = GET_SERVICES_ARG_LEN,
2320         [GET_CMDID(IP_VS_SO_GET_SERVICE)]       = GET_SERVICE_ARG_LEN,
2321         [GET_CMDID(IP_VS_SO_GET_DESTS)]         = GET_DESTS_ARG_LEN,
2322         [GET_CMDID(IP_VS_SO_GET_TIMEOUT)]       = GET_TIMEOUT_ARG_LEN,
2323         [GET_CMDID(IP_VS_SO_GET_DAEMON)]        = GET_DAEMON_ARG_LEN,
2324 };
2325
2326 static int
2327 do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2328 {
2329         unsigned char arg[128];
2330         int ret = 0;
2331
2332         if (!capable(CAP_NET_ADMIN))
2333                 return -EPERM;
2334
2335         if (*len < get_arglen[GET_CMDID(cmd)]) {
2336                 IP_VS_ERR("get_ctl: len %u < %u\n",
2337                           *len, get_arglen[GET_CMDID(cmd)]);
2338                 return -EINVAL;
2339         }
2340
2341         if (copy_from_user(arg, user, get_arglen[GET_CMDID(cmd)]) != 0)
2342                 return -EFAULT;
2343
2344         if (mutex_lock_interruptible(&__ip_vs_mutex))
2345                 return -ERESTARTSYS;
2346
2347         switch (cmd) {
2348         case IP_VS_SO_GET_VERSION:
2349         {
2350                 char buf[64];
2351
2352                 sprintf(buf, "IP Virtual Server version %d.%d.%d (size=%d)",
2353                         NVERSION(IP_VS_VERSION_CODE), IP_VS_CONN_TAB_SIZE);
2354                 if (copy_to_user(user, buf, strlen(buf)+1) != 0) {
2355                         ret = -EFAULT;
2356                         goto out;
2357                 }
2358                 *len = strlen(buf)+1;
2359         }
2360         break;
2361
2362         case IP_VS_SO_GET_INFO:
2363         {
2364                 struct ip_vs_getinfo info;
2365                 info.version = IP_VS_VERSION_CODE;
2366                 info.size = IP_VS_CONN_TAB_SIZE;
2367                 info.num_services = ip_vs_num_services;
2368                 if (copy_to_user(user, &info, sizeof(info)) != 0)
2369                         ret = -EFAULT;
2370         }
2371         break;
2372
2373         case IP_VS_SO_GET_SERVICES:
2374         {
2375                 struct ip_vs_get_services *get;
2376                 int size;
2377
2378                 get = (struct ip_vs_get_services *)arg;
2379                 size = sizeof(*get) +
2380                         sizeof(struct ip_vs_service_entry) * get->num_services;
2381                 if (*len != size) {
2382                         IP_VS_ERR("length: %u != %u\n", *len, size);
2383                         ret = -EINVAL;
2384                         goto out;
2385                 }
2386                 ret = __ip_vs_get_service_entries(get, user);
2387         }
2388         break;
2389
2390         case IP_VS_SO_GET_SERVICE:
2391         {
2392                 struct ip_vs_service_entry *entry;
2393                 struct ip_vs_service *svc;
2394                 union nf_inet_addr addr;
2395
2396                 entry = (struct ip_vs_service_entry *)arg;
2397                 addr.ip = entry->addr;
2398                 if (entry->fwmark)
2399                         svc = __ip_vs_svc_fwm_get(AF_INET, entry->fwmark);
2400                 else
2401                         svc = __ip_vs_service_get(AF_INET, entry->protocol,
2402                                                   &addr, entry->port);
2403                 if (svc) {
2404                         ip_vs_copy_service(entry, svc);
2405                         if (copy_to_user(user, entry, sizeof(*entry)) != 0)
2406                                 ret = -EFAULT;
2407                         ip_vs_service_put(svc);
2408                 } else
2409                         ret = -ESRCH;
2410         }
2411         break;
2412
2413         case IP_VS_SO_GET_DESTS:
2414         {
2415                 struct ip_vs_get_dests *get;
2416                 int size;
2417
2418                 get = (struct ip_vs_get_dests *)arg;
2419                 size = sizeof(*get) +
2420                         sizeof(struct ip_vs_dest_entry) * get->num_dests;
2421                 if (*len != size) {
2422                         IP_VS_ERR("length: %u != %u\n", *len, size);
2423                         ret = -EINVAL;
2424                         goto out;
2425                 }
2426                 ret = __ip_vs_get_dest_entries(get, user);
2427         }
2428         break;
2429
2430         case IP_VS_SO_GET_TIMEOUT:
2431         {
2432                 struct ip_vs_timeout_user t;
2433
2434                 __ip_vs_get_timeouts(&t);
2435                 if (copy_to_user(user, &t, sizeof(t)) != 0)
2436                         ret = -EFAULT;
2437         }
2438         break;
2439
2440         case IP_VS_SO_GET_DAEMON:
2441         {
2442                 struct ip_vs_daemon_user d[2];
2443
2444                 memset(&d, 0, sizeof(d));
2445                 if (ip_vs_sync_state & IP_VS_STATE_MASTER) {
2446                         d[0].state = IP_VS_STATE_MASTER;
2447                         strlcpy(d[0].mcast_ifn, ip_vs_master_mcast_ifn, sizeof(d[0].mcast_ifn));
2448                         d[0].syncid = ip_vs_master_syncid;
2449                 }
2450                 if (ip_vs_sync_state & IP_VS_STATE_BACKUP) {
2451                         d[1].state = IP_VS_STATE_BACKUP;
2452                         strlcpy(d[1].mcast_ifn, ip_vs_backup_mcast_ifn, sizeof(d[1].mcast_ifn));
2453                         d[1].syncid = ip_vs_backup_syncid;
2454                 }
2455                 if (copy_to_user(user, &d, sizeof(d)) != 0)
2456                         ret = -EFAULT;
2457         }
2458         break;
2459
2460         default:
2461                 ret = -EINVAL;
2462         }
2463
2464   out:
2465         mutex_unlock(&__ip_vs_mutex);
2466         return ret;
2467 }
2468
2469
2470 static struct nf_sockopt_ops ip_vs_sockopts = {
2471         .pf             = PF_INET,
2472         .set_optmin     = IP_VS_BASE_CTL,
2473         .set_optmax     = IP_VS_SO_SET_MAX+1,
2474         .set            = do_ip_vs_set_ctl,
2475         .get_optmin     = IP_VS_BASE_CTL,
2476         .get_optmax     = IP_VS_SO_GET_MAX+1,
2477         .get            = do_ip_vs_get_ctl,
2478         .owner          = THIS_MODULE,
2479 };
2480
2481 /*
2482  * Generic Netlink interface
2483  */
2484
2485 /* IPVS genetlink family */
2486 static struct genl_family ip_vs_genl_family = {
2487         .id             = GENL_ID_GENERATE,
2488         .hdrsize        = 0,
2489         .name           = IPVS_GENL_NAME,
2490         .version        = IPVS_GENL_VERSION,
2491         .maxattr        = IPVS_CMD_MAX,
2492 };
2493
2494 /* Policy used for first-level command attributes */
2495 static const struct nla_policy ip_vs_cmd_policy[IPVS_CMD_ATTR_MAX + 1] = {
2496         [IPVS_CMD_ATTR_SERVICE]         = { .type = NLA_NESTED },
2497         [IPVS_CMD_ATTR_DEST]            = { .type = NLA_NESTED },
2498         [IPVS_CMD_ATTR_DAEMON]          = { .type = NLA_NESTED },
2499         [IPVS_CMD_ATTR_TIMEOUT_TCP]     = { .type = NLA_U32 },
2500         [IPVS_CMD_ATTR_TIMEOUT_TCP_FIN] = { .type = NLA_U32 },
2501         [IPVS_CMD_ATTR_TIMEOUT_UDP]     = { .type = NLA_U32 },
2502 };
2503
2504 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DAEMON */
2505 static const struct nla_policy ip_vs_daemon_policy[IPVS_DAEMON_ATTR_MAX + 1] = {
2506         [IPVS_DAEMON_ATTR_STATE]        = { .type = NLA_U32 },
2507         [IPVS_DAEMON_ATTR_MCAST_IFN]    = { .type = NLA_NUL_STRING,
2508                                             .len = IP_VS_IFNAME_MAXLEN },
2509         [IPVS_DAEMON_ATTR_SYNC_ID]      = { .type = NLA_U32 },
2510 };
2511
2512 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_SERVICE */
2513 static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = {
2514         [IPVS_SVC_ATTR_AF]              = { .type = NLA_U16 },
2515         [IPVS_SVC_ATTR_PROTOCOL]        = { .type = NLA_U16 },
2516         [IPVS_SVC_ATTR_ADDR]            = { .type = NLA_BINARY,
2517                                             .len = sizeof(union nf_inet_addr) },
2518         [IPVS_SVC_ATTR_PORT]            = { .type = NLA_U16 },
2519         [IPVS_SVC_ATTR_FWMARK]          = { .type = NLA_U32 },
2520         [IPVS_SVC_ATTR_SCHED_NAME]      = { .type = NLA_NUL_STRING,
2521                                             .len = IP_VS_SCHEDNAME_MAXLEN },
2522         [IPVS_SVC_ATTR_FLAGS]           = { .type = NLA_BINARY,
2523                                             .len = sizeof(struct ip_vs_flags) },
2524         [IPVS_SVC_ATTR_TIMEOUT]         = { .type = NLA_U32 },
2525         [IPVS_SVC_ATTR_NETMASK]         = { .type = NLA_U32 },
2526         [IPVS_SVC_ATTR_STATS]           = { .type = NLA_NESTED },
2527 };
2528
2529 /* Policy used for attributes in nested attribute IPVS_CMD_ATTR_DEST */
2530 static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
2531         [IPVS_DEST_ATTR_ADDR]           = { .type = NLA_BINARY,
2532                                             .len = sizeof(union nf_inet_addr) },
2533         [IPVS_DEST_ATTR_PORT]           = { .type = NLA_U16 },
2534         [IPVS_DEST_ATTR_FWD_METHOD]     = { .type = NLA_U32 },
2535         [IPVS_DEST_ATTR_WEIGHT]         = { .type = NLA_U32 },
2536         [IPVS_DEST_ATTR_U_THRESH]       = { .type = NLA_U32 },
2537         [IPVS_DEST_ATTR_L_THRESH]       = { .type = NLA_U32 },
2538         [IPVS_DEST_ATTR_ACTIVE_CONNS]   = { .type = NLA_U32 },
2539         [IPVS_DEST_ATTR_INACT_CONNS]    = { .type = NLA_U32 },
2540         [IPVS_DEST_ATTR_PERSIST_CONNS]  = { .type = NLA_U32 },
2541         [IPVS_DEST_ATTR_STATS]          = { .type = NLA_NESTED },
2542 };
2543
2544 static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
2545                                  struct ip_vs_stats *stats)
2546 {
2547         struct nlattr *nl_stats = nla_nest_start(skb, container_type);
2548         if (!nl_stats)
2549                 return -EMSGSIZE;
2550
2551         spin_lock_bh(&stats->lock);
2552
2553         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->conns);
2554         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->inpkts);
2555         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->outpkts);
2556         NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->inbytes);
2557         NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->outbytes);
2558         NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->cps);
2559         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->inpps);
2560         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->outpps);
2561         NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->inbps);
2562         NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->outbps);
2563
2564         spin_unlock_bh(&stats->lock);
2565
2566         nla_nest_end(skb, nl_stats);
2567
2568         return 0;
2569
2570 nla_put_failure:
2571         spin_unlock_bh(&stats->lock);
2572         nla_nest_cancel(skb, nl_stats);
2573         return -EMSGSIZE;
2574 }
2575
2576 static int ip_vs_genl_fill_service(struct sk_buff *skb,
2577                                    struct ip_vs_service *svc)
2578 {
2579         struct nlattr *nl_service;
2580         struct ip_vs_flags flags = { .flags = svc->flags,
2581                                      .mask = ~0 };
2582
2583         nl_service = nla_nest_start(skb, IPVS_CMD_ATTR_SERVICE);
2584         if (!nl_service)
2585                 return -EMSGSIZE;
2586
2587         NLA_PUT_U16(skb, IPVS_SVC_ATTR_AF, AF_INET);
2588
2589         if (svc->fwmark) {
2590                 NLA_PUT_U32(skb, IPVS_SVC_ATTR_FWMARK, svc->fwmark);
2591         } else {
2592                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PROTOCOL, svc->protocol);
2593                 NLA_PUT(skb, IPVS_SVC_ATTR_ADDR, sizeof(svc->addr), &svc->addr);
2594                 NLA_PUT_U16(skb, IPVS_SVC_ATTR_PORT, svc->port);
2595         }
2596
2597         NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name);
2598         NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags);
2599         NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ);
2600         NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask);
2601
2602         if (ip_vs_genl_fill_stats(skb, IPVS_SVC_ATTR_STATS, &svc->stats))
2603                 goto nla_put_failure;
2604
2605         nla_nest_end(skb, nl_service);
2606
2607         return 0;
2608
2609 nla_put_failure:
2610         nla_nest_cancel(skb, nl_service);
2611         return -EMSGSIZE;
2612 }
2613
2614 static int ip_vs_genl_dump_service(struct sk_buff *skb,
2615                                    struct ip_vs_service *svc,
2616                                    struct netlink_callback *cb)
2617 {
2618         void *hdr;
2619
2620         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2621                           &ip_vs_genl_family, NLM_F_MULTI,
2622                           IPVS_CMD_NEW_SERVICE);
2623         if (!hdr)
2624                 return -EMSGSIZE;
2625
2626         if (ip_vs_genl_fill_service(skb, svc) < 0)
2627                 goto nla_put_failure;
2628
2629         return genlmsg_end(skb, hdr);
2630
2631 nla_put_failure:
2632         genlmsg_cancel(skb, hdr);
2633         return -EMSGSIZE;
2634 }
2635
2636 static int ip_vs_genl_dump_services(struct sk_buff *skb,
2637                                     struct netlink_callback *cb)
2638 {
2639         int idx = 0, i;
2640         int start = cb->args[0];
2641         struct ip_vs_service *svc;
2642
2643         mutex_lock(&__ip_vs_mutex);
2644         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2645                 list_for_each_entry(svc, &ip_vs_svc_table[i], s_list) {
2646                         if (++idx <= start)
2647                                 continue;
2648                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2649                                 idx--;
2650                                 goto nla_put_failure;
2651                         }
2652                 }
2653         }
2654
2655         for (i = 0; i < IP_VS_SVC_TAB_SIZE; i++) {
2656                 list_for_each_entry(svc, &ip_vs_svc_fwm_table[i], f_list) {
2657                         if (++idx <= start)
2658                                 continue;
2659                         if (ip_vs_genl_dump_service(skb, svc, cb) < 0) {
2660                                 idx--;
2661                                 goto nla_put_failure;
2662                         }
2663                 }
2664         }
2665
2666 nla_put_failure:
2667         mutex_unlock(&__ip_vs_mutex);
2668         cb->args[0] = idx;
2669
2670         return skb->len;
2671 }
2672
2673 static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc,
2674                                     struct nlattr *nla, int full_entry)
2675 {
2676         struct nlattr *attrs[IPVS_SVC_ATTR_MAX + 1];
2677         struct nlattr *nla_af, *nla_port, *nla_fwmark, *nla_protocol, *nla_addr;
2678
2679         /* Parse mandatory identifying service fields first */
2680         if (nla == NULL ||
2681             nla_parse_nested(attrs, IPVS_SVC_ATTR_MAX, nla, ip_vs_svc_policy))
2682                 return -EINVAL;
2683
2684         nla_af          = attrs[IPVS_SVC_ATTR_AF];
2685         nla_protocol    = attrs[IPVS_SVC_ATTR_PROTOCOL];
2686         nla_addr        = attrs[IPVS_SVC_ATTR_ADDR];
2687         nla_port        = attrs[IPVS_SVC_ATTR_PORT];
2688         nla_fwmark      = attrs[IPVS_SVC_ATTR_FWMARK];
2689
2690         if (!(nla_af && (nla_fwmark || (nla_port && nla_protocol && nla_addr))))
2691                 return -EINVAL;
2692
2693         usvc->af = nla_get_u16(nla_af);
2694         /* For now, only support IPv4 */
2695         if (nla_get_u16(nla_af) != AF_INET)
2696                 return -EAFNOSUPPORT;
2697
2698         if (nla_fwmark) {
2699                 usvc->protocol = IPPROTO_TCP;
2700                 usvc->fwmark = nla_get_u32(nla_fwmark);
2701         } else {
2702                 usvc->protocol = nla_get_u16(nla_protocol);
2703                 nla_memcpy(&usvc->addr, nla_addr, sizeof(usvc->addr));
2704                 usvc->port = nla_get_u16(nla_port);
2705                 usvc->fwmark = 0;
2706         }
2707
2708         /* If a full entry was requested, check for the additional fields */
2709         if (full_entry) {
2710                 struct nlattr *nla_sched, *nla_flags, *nla_timeout,
2711                               *nla_netmask;
2712                 struct ip_vs_flags flags;
2713                 struct ip_vs_service *svc;
2714
2715                 nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME];
2716                 nla_flags = attrs[IPVS_SVC_ATTR_FLAGS];
2717                 nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT];
2718                 nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK];
2719
2720                 if (!(nla_sched && nla_flags && nla_timeout && nla_netmask))
2721                         return -EINVAL;
2722
2723                 nla_memcpy(&flags, nla_flags, sizeof(flags));
2724
2725                 /* prefill flags from service if it already exists */
2726                 if (usvc->fwmark)
2727                         svc = __ip_vs_svc_fwm_get(usvc->af, usvc->fwmark);
2728                 else
2729                         svc = __ip_vs_service_get(usvc->af, usvc->protocol,
2730                                                   &usvc->addr, usvc->port);
2731                 if (svc) {
2732                         usvc->flags = svc->flags;
2733                         ip_vs_service_put(svc);
2734                 } else
2735                         usvc->flags = 0;
2736
2737                 /* set new flags from userland */
2738                 usvc->flags = (usvc->flags & ~flags.mask) |
2739                               (flags.flags & flags.mask);
2740                 usvc->sched_name = nla_data(nla_sched);
2741                 usvc->timeout = nla_get_u32(nla_timeout);
2742                 usvc->netmask = nla_get_u32(nla_netmask);
2743         }
2744
2745         return 0;
2746 }
2747
2748 static struct ip_vs_service *ip_vs_genl_find_service(struct nlattr *nla)
2749 {
2750         struct ip_vs_service_user_kern usvc;
2751         int ret;
2752
2753         ret = ip_vs_genl_parse_service(&usvc, nla, 0);
2754         if (ret)
2755                 return ERR_PTR(ret);
2756
2757         if (usvc.fwmark)
2758                 return __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
2759         else
2760                 return __ip_vs_service_get(usvc.af, usvc.protocol,
2761                                            &usvc.addr, usvc.port);
2762 }
2763
2764 static int ip_vs_genl_fill_dest(struct sk_buff *skb, struct ip_vs_dest *dest)
2765 {
2766         struct nlattr *nl_dest;
2767
2768         nl_dest = nla_nest_start(skb, IPVS_CMD_ATTR_DEST);
2769         if (!nl_dest)
2770                 return -EMSGSIZE;
2771
2772         NLA_PUT(skb, IPVS_DEST_ATTR_ADDR, sizeof(dest->addr), &dest->addr);
2773         NLA_PUT_U16(skb, IPVS_DEST_ATTR_PORT, dest->port);
2774
2775         NLA_PUT_U32(skb, IPVS_DEST_ATTR_FWD_METHOD,
2776                     atomic_read(&dest->conn_flags) & IP_VS_CONN_F_FWD_MASK);
2777         NLA_PUT_U32(skb, IPVS_DEST_ATTR_WEIGHT, atomic_read(&dest->weight));
2778         NLA_PUT_U32(skb, IPVS_DEST_ATTR_U_THRESH, dest->u_threshold);
2779         NLA_PUT_U32(skb, IPVS_DEST_ATTR_L_THRESH, dest->l_threshold);
2780         NLA_PUT_U32(skb, IPVS_DEST_ATTR_ACTIVE_CONNS,
2781                     atomic_read(&dest->activeconns));
2782         NLA_PUT_U32(skb, IPVS_DEST_ATTR_INACT_CONNS,
2783                     atomic_read(&dest->inactconns));
2784         NLA_PUT_U32(skb, IPVS_DEST_ATTR_PERSIST_CONNS,
2785                     atomic_read(&dest->persistconns));
2786
2787         if (ip_vs_genl_fill_stats(skb, IPVS_DEST_ATTR_STATS, &dest->stats))
2788                 goto nla_put_failure;
2789
2790         nla_nest_end(skb, nl_dest);
2791
2792         return 0;
2793
2794 nla_put_failure:
2795         nla_nest_cancel(skb, nl_dest);
2796         return -EMSGSIZE;
2797 }
2798
2799 static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
2800                                 struct netlink_callback *cb)
2801 {
2802         void *hdr;
2803
2804         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2805                           &ip_vs_genl_family, NLM_F_MULTI,
2806                           IPVS_CMD_NEW_DEST);
2807         if (!hdr)
2808                 return -EMSGSIZE;
2809
2810         if (ip_vs_genl_fill_dest(skb, dest) < 0)
2811                 goto nla_put_failure;
2812
2813         return genlmsg_end(skb, hdr);
2814
2815 nla_put_failure:
2816         genlmsg_cancel(skb, hdr);
2817         return -EMSGSIZE;
2818 }
2819
2820 static int ip_vs_genl_dump_dests(struct sk_buff *skb,
2821                                  struct netlink_callback *cb)
2822 {
2823         int idx = 0;
2824         int start = cb->args[0];
2825         struct ip_vs_service *svc;
2826         struct ip_vs_dest *dest;
2827         struct nlattr *attrs[IPVS_CMD_ATTR_MAX + 1];
2828
2829         mutex_lock(&__ip_vs_mutex);
2830
2831         /* Try to find the service for which to dump destinations */
2832         if (nlmsg_parse(cb->nlh, GENL_HDRLEN, attrs,
2833                         IPVS_CMD_ATTR_MAX, ip_vs_cmd_policy))
2834                 goto out_err;
2835
2836         svc = ip_vs_genl_find_service(attrs[IPVS_CMD_ATTR_SERVICE]);
2837         if (IS_ERR(svc) || svc == NULL)
2838                 goto out_err;
2839
2840         /* Dump the destinations */
2841         list_for_each_entry(dest, &svc->destinations, n_list) {
2842                 if (++idx <= start)
2843                         continue;
2844                 if (ip_vs_genl_dump_dest(skb, dest, cb) < 0) {
2845                         idx--;
2846                         goto nla_put_failure;
2847                 }
2848         }
2849
2850 nla_put_failure:
2851         cb->args[0] = idx;
2852         ip_vs_service_put(svc);
2853
2854 out_err:
2855         mutex_unlock(&__ip_vs_mutex);
2856
2857         return skb->len;
2858 }
2859
2860 static int ip_vs_genl_parse_dest(struct ip_vs_dest_user_kern *udest,
2861                                  struct nlattr *nla, int full_entry)
2862 {
2863         struct nlattr *attrs[IPVS_DEST_ATTR_MAX + 1];
2864         struct nlattr *nla_addr, *nla_port;
2865
2866         /* Parse mandatory identifying destination fields first */
2867         if (nla == NULL ||
2868             nla_parse_nested(attrs, IPVS_DEST_ATTR_MAX, nla, ip_vs_dest_policy))
2869                 return -EINVAL;
2870
2871         nla_addr        = attrs[IPVS_DEST_ATTR_ADDR];
2872         nla_port        = attrs[IPVS_DEST_ATTR_PORT];
2873
2874         if (!(nla_addr && nla_port))
2875                 return -EINVAL;
2876
2877         nla_memcpy(&udest->addr, nla_addr, sizeof(udest->addr));
2878         udest->port = nla_get_u16(nla_port);
2879
2880         /* If a full entry was requested, check for the additional fields */
2881         if (full_entry) {
2882                 struct nlattr *nla_fwd, *nla_weight, *nla_u_thresh,
2883                               *nla_l_thresh;
2884
2885                 nla_fwd         = attrs[IPVS_DEST_ATTR_FWD_METHOD];
2886                 nla_weight      = attrs[IPVS_DEST_ATTR_WEIGHT];
2887                 nla_u_thresh    = attrs[IPVS_DEST_ATTR_U_THRESH];
2888                 nla_l_thresh    = attrs[IPVS_DEST_ATTR_L_THRESH];
2889
2890                 if (!(nla_fwd && nla_weight && nla_u_thresh && nla_l_thresh))
2891                         return -EINVAL;
2892
2893                 udest->conn_flags = nla_get_u32(nla_fwd)
2894                                     & IP_VS_CONN_F_FWD_MASK;
2895                 udest->weight = nla_get_u32(nla_weight);
2896                 udest->u_threshold = nla_get_u32(nla_u_thresh);
2897                 udest->l_threshold = nla_get_u32(nla_l_thresh);
2898         }
2899
2900         return 0;
2901 }
2902
2903 static int ip_vs_genl_fill_daemon(struct sk_buff *skb, __be32 state,
2904                                   const char *mcast_ifn, __be32 syncid)
2905 {
2906         struct nlattr *nl_daemon;
2907
2908         nl_daemon = nla_nest_start(skb, IPVS_CMD_ATTR_DAEMON);
2909         if (!nl_daemon)
2910                 return -EMSGSIZE;
2911
2912         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_STATE, state);
2913         NLA_PUT_STRING(skb, IPVS_DAEMON_ATTR_MCAST_IFN, mcast_ifn);
2914         NLA_PUT_U32(skb, IPVS_DAEMON_ATTR_SYNC_ID, syncid);
2915
2916         nla_nest_end(skb, nl_daemon);
2917
2918         return 0;
2919
2920 nla_put_failure:
2921         nla_nest_cancel(skb, nl_daemon);
2922         return -EMSGSIZE;
2923 }
2924
2925 static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state,
2926                                   const char *mcast_ifn, __be32 syncid,
2927                                   struct netlink_callback *cb)
2928 {
2929         void *hdr;
2930         hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq,
2931                           &ip_vs_genl_family, NLM_F_MULTI,
2932                           IPVS_CMD_NEW_DAEMON);
2933         if (!hdr)
2934                 return -EMSGSIZE;
2935
2936         if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
2937                 goto nla_put_failure;
2938
2939         return genlmsg_end(skb, hdr);
2940
2941 nla_put_failure:
2942         genlmsg_cancel(skb, hdr);
2943         return -EMSGSIZE;
2944 }
2945
2946 static int ip_vs_genl_dump_daemons(struct sk_buff *skb,
2947                                    struct netlink_callback *cb)
2948 {
2949         mutex_lock(&__ip_vs_mutex);
2950         if ((ip_vs_sync_state & IP_VS_STATE_MASTER) && !cb->args[0]) {
2951                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_MASTER,
2952                                            ip_vs_master_mcast_ifn,
2953                                            ip_vs_master_syncid, cb) < 0)
2954                         goto nla_put_failure;
2955
2956                 cb->args[0] = 1;
2957         }
2958
2959         if ((ip_vs_sync_state & IP_VS_STATE_BACKUP) && !cb->args[1]) {
2960                 if (ip_vs_genl_dump_daemon(skb, IP_VS_STATE_BACKUP,
2961                                            ip_vs_backup_mcast_ifn,
2962                                            ip_vs_backup_syncid, cb) < 0)
2963                         goto nla_put_failure;
2964
2965                 cb->args[1] = 1;
2966         }
2967
2968 nla_put_failure:
2969         mutex_unlock(&__ip_vs_mutex);
2970
2971         return skb->len;
2972 }
2973
2974 static int ip_vs_genl_new_daemon(struct nlattr **attrs)
2975 {
2976         if (!(attrs[IPVS_DAEMON_ATTR_STATE] &&
2977               attrs[IPVS_DAEMON_ATTR_MCAST_IFN] &&
2978               attrs[IPVS_DAEMON_ATTR_SYNC_ID]))
2979                 return -EINVAL;
2980
2981         return start_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]),
2982                                  nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]),
2983                                  nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]));
2984 }
2985
2986 static int ip_vs_genl_del_daemon(struct nlattr **attrs)
2987 {
2988         if (!attrs[IPVS_DAEMON_ATTR_STATE])
2989                 return -EINVAL;
2990
2991         return stop_sync_thread(nla_get_u32(attrs[IPVS_DAEMON_ATTR_STATE]));
2992 }
2993
2994 static int ip_vs_genl_set_config(struct nlattr **attrs)
2995 {
2996         struct ip_vs_timeout_user t;
2997
2998         __ip_vs_get_timeouts(&t);
2999
3000         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP])
3001                 t.tcp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP]);
3002
3003         if (attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN])
3004                 t.tcp_fin_timeout =
3005                         nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_TCP_FIN]);
3006
3007         if (attrs[IPVS_CMD_ATTR_TIMEOUT_UDP])
3008                 t.udp_timeout = nla_get_u32(attrs[IPVS_CMD_ATTR_TIMEOUT_UDP]);
3009
3010         return ip_vs_set_timeout(&t);
3011 }
3012
3013 static int ip_vs_genl_set_cmd(struct sk_buff *skb, struct genl_info *info)
3014 {
3015         struct ip_vs_service *svc = NULL;
3016         struct ip_vs_service_user_kern usvc;
3017         struct ip_vs_dest_user_kern udest;
3018         int ret = 0, cmd;
3019         int need_full_svc = 0, need_full_dest = 0;
3020
3021         cmd = info->genlhdr->cmd;
3022
3023         mutex_lock(&__ip_vs_mutex);
3024
3025         if (cmd == IPVS_CMD_FLUSH) {
3026                 ret = ip_vs_flush();
3027                 goto out;
3028         } else if (cmd == IPVS_CMD_SET_CONFIG) {
3029                 ret = ip_vs_genl_set_config(info->attrs);
3030                 goto out;
3031         } else if (cmd == IPVS_CMD_NEW_DAEMON ||
3032                    cmd == IPVS_CMD_DEL_DAEMON) {
3033
3034                 struct nlattr *daemon_attrs[IPVS_DAEMON_ATTR_MAX + 1];
3035
3036                 if (!info->attrs[IPVS_CMD_ATTR_DAEMON] ||
3037                     nla_parse_nested(daemon_attrs, IPVS_DAEMON_ATTR_MAX,
3038                                      info->attrs[IPVS_CMD_ATTR_DAEMON],
3039                                      ip_vs_daemon_policy)) {
3040                         ret = -EINVAL;
3041                         goto out;
3042                 }
3043
3044                 if (cmd == IPVS_CMD_NEW_DAEMON)
3045                         ret = ip_vs_genl_new_daemon(daemon_attrs);
3046                 else
3047                         ret = ip_vs_genl_del_daemon(daemon_attrs);
3048                 goto out;
3049         } else if (cmd == IPVS_CMD_ZERO &&
3050                    !info->attrs[IPVS_CMD_ATTR_SERVICE]) {
3051                 ret = ip_vs_zero_all();
3052                 goto out;
3053         }
3054
3055         /* All following commands require a service argument, so check if we
3056          * received a valid one. We need a full service specification when
3057          * adding / editing a service. Only identifying members otherwise. */
3058         if (cmd == IPVS_CMD_NEW_SERVICE || cmd == IPVS_CMD_SET_SERVICE)
3059                 need_full_svc = 1;
3060
3061         ret = ip_vs_genl_parse_service(&usvc,
3062                                        info->attrs[IPVS_CMD_ATTR_SERVICE],
3063                                        need_full_svc);
3064         if (ret)
3065                 goto out;
3066
3067         /* Lookup the exact service by <protocol, addr, port> or fwmark */
3068         if (usvc.fwmark == 0)
3069                 svc = __ip_vs_service_get(usvc.af, usvc.protocol,
3070                                           &usvc.addr, usvc.port);
3071         else
3072                 svc = __ip_vs_svc_fwm_get(usvc.af, usvc.fwmark);
3073
3074         /* Unless we're adding a new service, the service must already exist */
3075         if ((cmd != IPVS_CMD_NEW_SERVICE) && (svc == NULL)) {
3076                 ret = -ESRCH;
3077                 goto out;
3078         }
3079
3080         /* Destination commands require a valid destination argument. For
3081          * adding / editing a destination, we need a full destination
3082          * specification. */
3083         if (cmd == IPVS_CMD_NEW_DEST || cmd == IPVS_CMD_SET_DEST ||
3084             cmd == IPVS_CMD_DEL_DEST) {
3085                 if (cmd != IPVS_CMD_DEL_DEST)
3086                         need_full_dest = 1;
3087
3088                 ret = ip_vs_genl_parse_dest(&udest,
3089                                             info->attrs[IPVS_CMD_ATTR_DEST],
3090                                             need_full_dest);
3091                 if (ret)
3092                         goto out;
3093         }
3094
3095         switch (cmd) {
3096         case IPVS_CMD_NEW_SERVICE:
3097                 if (svc == NULL)
3098                         ret = ip_vs_add_service(&usvc, &svc);
3099                 else
3100                         ret = -EEXIST;
3101                 break;
3102         case IPVS_CMD_SET_SERVICE:
3103                 ret = ip_vs_edit_service(svc, &usvc);
3104                 break;
3105         case IPVS_CMD_DEL_SERVICE:
3106                 ret = ip_vs_del_service(svc);
3107                 break;
3108         case IPVS_CMD_NEW_DEST:
3109                 ret = ip_vs_add_dest(svc, &udest);
3110                 break;
3111         case IPVS_CMD_SET_DEST:
3112                 ret = ip_vs_edit_dest(svc, &udest);
3113                 break;
3114         case IPVS_CMD_DEL_DEST:
3115                 ret = ip_vs_del_dest(svc, &udest);
3116                 break;
3117         case IPVS_CMD_ZERO:
3118                 ret = ip_vs_zero_service(svc);
3119                 break;
3120         default:
3121                 ret = -EINVAL;
3122         }
3123
3124 out:
3125         if (svc)
3126                 ip_vs_service_put(svc);
3127         mutex_unlock(&__ip_vs_mutex);
3128
3129         return ret;
3130 }
3131
3132 static int ip_vs_genl_get_cmd(struct sk_buff *skb, struct genl_info *info)
3133 {
3134         struct sk_buff *msg;
3135         void *reply;
3136         int ret, cmd, reply_cmd;
3137
3138         cmd = info->genlhdr->cmd;
3139
3140         if (cmd == IPVS_CMD_GET_SERVICE)
3141                 reply_cmd = IPVS_CMD_NEW_SERVICE;
3142         else if (cmd == IPVS_CMD_GET_INFO)
3143                 reply_cmd = IPVS_CMD_SET_INFO;
3144         else if (cmd == IPVS_CMD_GET_CONFIG)
3145                 reply_cmd = IPVS_CMD_SET_CONFIG;
3146         else {
3147                 IP_VS_ERR("unknown Generic Netlink command\n");
3148                 return -EINVAL;
3149         }
3150
3151         msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
3152         if (!msg)
3153                 return -ENOMEM;
3154
3155         mutex_lock(&__ip_vs_mutex);
3156
3157         reply = genlmsg_put_reply(msg, info, &ip_vs_genl_family, 0, reply_cmd);
3158         if (reply == NULL)
3159                 goto nla_put_failure;
3160
3161         switch (cmd) {
3162         case IPVS_CMD_GET_SERVICE:
3163         {
3164                 struct ip_vs_service *svc;
3165
3166                 svc = ip_vs_genl_find_service(info->attrs[IPVS_CMD_ATTR_SERVICE]);
3167                 if (IS_ERR(svc)) {
3168                         ret = PTR_ERR(svc);
3169                         goto out_err;
3170                 } else if (svc) {
3171                         ret = ip_vs_genl_fill_service(msg, svc);
3172                         ip_vs_service_put(svc);
3173                         if (ret)
3174                                 goto nla_put_failure;
3175                 } else {
3176                         ret = -ESRCH;
3177                         goto out_err;
3178                 }
3179
3180                 break;
3181         }
3182
3183         case IPVS_CMD_GET_CONFIG:
3184         {
3185                 struct ip_vs_timeout_user t;
3186
3187                 __ip_vs_get_timeouts(&t);
3188 #ifdef CONFIG_IP_VS_PROTO_TCP
3189                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP, t.tcp_timeout);
3190                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_TCP_FIN,
3191                             t.tcp_fin_timeout);
3192 #endif
3193 #ifdef CONFIG_IP_VS_PROTO_UDP
3194                 NLA_PUT_U32(msg, IPVS_CMD_ATTR_TIMEOUT_UDP, t.udp_timeout);
3195 #endif
3196
3197                 break;
3198         }
3199
3200         case IPVS_CMD_GET_INFO:
3201                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_VERSION, IP_VS_VERSION_CODE);
3202                 NLA_PUT_U32(msg, IPVS_INFO_ATTR_CONN_TAB_SIZE,
3203                             IP_VS_CONN_TAB_SIZE);
3204                 break;
3205         }
3206
3207         genlmsg_end(msg, reply);
3208         ret = genlmsg_unicast(msg, info->snd_pid);
3209         goto out;
3210
3211 nla_put_failure:
3212         IP_VS_ERR("not enough space in Netlink message\n");
3213         ret = -EMSGSIZE;
3214
3215 out_err:
3216         nlmsg_free(msg);
3217 out:
3218         mutex_unlock(&__ip_vs_mutex);
3219
3220         return ret;
3221 }
3222
3223
3224 static struct genl_ops ip_vs_genl_ops[] __read_mostly = {
3225         {
3226                 .cmd    = IPVS_CMD_NEW_SERVICE,
3227                 .flags  = GENL_ADMIN_PERM,
3228                 .policy = ip_vs_cmd_policy,
3229                 .doit   = ip_vs_genl_set_cmd,
3230         },
3231         {
3232                 .cmd    = IPVS_CMD_SET_SERVICE,
3233                 .flags  = GENL_ADMIN_PERM,
3234                 .policy = ip_vs_cmd_policy,
3235                 .doit   = ip_vs_genl_set_cmd,
3236         },
3237         {
3238                 .cmd    = IPVS_CMD_DEL_SERVICE,
3239                 .flags  = GENL_ADMIN_PERM,
3240                 .policy = ip_vs_cmd_policy,
3241                 .doit   = ip_vs_genl_set_cmd,
3242         },
3243         {
3244                 .cmd    = IPVS_CMD_GET_SERVICE,
3245                 .flags  = GENL_ADMIN_PERM,
3246                 .doit   = ip_vs_genl_get_cmd,
3247                 .dumpit = ip_vs_genl_dump_services,
3248                 .policy = ip_vs_cmd_policy,
3249         },
3250         {
3251                 .cmd    = IPVS_CMD_NEW_DEST,
3252                 .flags  = GENL_ADMIN_PERM,
3253                 .policy = ip_vs_cmd_policy,
3254                 .doit   = ip_vs_genl_set_cmd,
3255         },
3256         {
3257                 .cmd    = IPVS_CMD_SET_DEST,
3258                 .flags  = GENL_ADMIN_PERM,
3259                 .policy = ip_vs_cmd_policy,
3260                 .doit   = ip_vs_genl_set_cmd,
3261         },
3262         {
3263                 .cmd    = IPVS_CMD_DEL_DEST,
3264                 .flags  = GENL_ADMIN_PERM,
3265                 .policy = ip_vs_cmd_policy,
3266                 .doit   = ip_vs_genl_set_cmd,
3267         },
3268         {
3269                 .cmd    = IPVS_CMD_GET_DEST,
3270                 .flags  = GENL_ADMIN_PERM,
3271                 .policy = ip_vs_cmd_policy,
3272                 .dumpit = ip_vs_genl_dump_dests,
3273         },
3274         {
3275                 .cmd    = IPVS_CMD_NEW_DAEMON,
3276                 .flags  = GENL_ADMIN_PERM,
3277                 .policy = ip_vs_cmd_policy,
3278                 .doit   = ip_vs_genl_set_cmd,
3279         },
3280         {
3281                 .cmd    = IPVS_CMD_DEL_DAEMON,
3282                 .flags  = GENL_ADMIN_PERM,
3283                 .policy = ip_vs_cmd_policy,
3284                 .doit   = ip_vs_genl_set_cmd,
3285         },
3286         {
3287                 .cmd    = IPVS_CMD_GET_DAEMON,
3288                 .flags  = GENL_ADMIN_PERM,
3289                 .dumpit = ip_vs_genl_dump_daemons,
3290         },
3291         {
3292                 .cmd    = IPVS_CMD_SET_CONFIG,
3293                 .flags  = GENL_ADMIN_PERM,
3294                 .policy = ip_vs_cmd_policy,
3295                 .doit   = ip_vs_genl_set_cmd,
3296         },
3297         {
3298                 .cmd    = IPVS_CMD_GET_CONFIG,
3299                 .flags  = GENL_ADMIN_PERM,
3300                 .doit   = ip_vs_genl_get_cmd,
3301         },
3302         {
3303                 .cmd    = IPVS_CMD_GET_INFO,
3304                 .flags  = GENL_ADMIN_PERM,
3305                 .doit   = ip_vs_genl_get_cmd,
3306         },
3307         {
3308                 .cmd    = IPVS_CMD_ZERO,
3309                 .flags  = GENL_ADMIN_PERM,
3310                 .policy = ip_vs_cmd_policy,
3311                 .doit   = ip_vs_genl_set_cmd,
3312         },
3313         {
3314                 .cmd    = IPVS_CMD_FLUSH,
3315                 .flags  = GENL_ADMIN_PERM,
3316                 .doit   = ip_vs_genl_set_cmd,
3317         },
3318 };
3319
3320 static int __init ip_vs_genl_register(void)
3321 {
3322         int ret, i;
3323
3324         ret = genl_register_family(&ip_vs_genl_family);
3325         if (ret)
3326                 return ret;
3327
3328         for (i = 0; i < ARRAY_SIZE(ip_vs_genl_ops); i++) {
3329                 ret = genl_register_ops(&ip_vs_genl_family, &ip_vs_genl_ops[i]);
3330                 if (ret)
3331                         goto err_out;
3332         }
3333         return 0;
3334
3335 err_out:
3336         genl_unregister_family(&ip_vs_genl_family);
3337         return ret;
3338 }
3339
3340 static void ip_vs_genl_unregister(void)
3341 {
3342         genl_unregister_family(&ip_vs_genl_family);
3343 }
3344
3345 /* End of Generic Netlink interface definitions */
3346
3347
3348 int __init ip_vs_control_init(void)
3349 {
3350         int ret;
3351         int idx;
3352
3353         EnterFunction(2);
3354
3355         ret = nf_register_sockopt(&ip_vs_sockopts);
3356         if (ret) {
3357                 IP_VS_ERR("cannot register sockopt.\n");
3358                 return ret;
3359         }
3360
3361         ret = ip_vs_genl_register();
3362         if (ret) {
3363                 IP_VS_ERR("cannot register Generic Netlink interface.\n");
3364                 nf_unregister_sockopt(&ip_vs_sockopts);
3365                 return ret;
3366         }
3367
3368         proc_net_fops_create(&init_net, "ip_vs", 0, &ip_vs_info_fops);
3369         proc_net_fops_create(&init_net, "ip_vs_stats",0, &ip_vs_stats_fops);
3370
3371         sysctl_header = register_sysctl_paths(net_vs_ctl_path, vs_vars);
3372
3373         /* Initialize ip_vs_svc_table, ip_vs_svc_fwm_table, ip_vs_rtable */
3374         for(idx = 0; idx < IP_VS_SVC_TAB_SIZE; idx++)  {
3375                 INIT_LIST_HEAD(&ip_vs_svc_table[idx]);
3376                 INIT_LIST_HEAD(&ip_vs_svc_fwm_table[idx]);
3377         }
3378         for(idx = 0; idx < IP_VS_RTAB_SIZE; idx++)  {
3379                 INIT_LIST_HEAD(&ip_vs_rtable[idx]);
3380         }
3381
3382         ip_vs_new_estimator(&ip_vs_stats);
3383
3384         /* Hook the defense timer */
3385         schedule_delayed_work(&defense_work, DEFENSE_TIMER_PERIOD);
3386
3387         LeaveFunction(2);
3388         return 0;
3389 }
3390
3391
3392 void ip_vs_control_cleanup(void)
3393 {
3394         EnterFunction(2);
3395         ip_vs_trash_cleanup();
3396         cancel_rearming_delayed_work(&defense_work);
3397         cancel_work_sync(&defense_work.work);
3398         ip_vs_kill_estimator(&ip_vs_stats);
3399         unregister_sysctl_table(sysctl_header);
3400         proc_net_remove(&init_net, "ip_vs_stats");
3401         proc_net_remove(&init_net, "ip_vs");
3402         ip_vs_genl_unregister();
3403         nf_unregister_sockopt(&ip_vs_sockopts);
3404         LeaveFunction(2);
3405 }