]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/netfilter/ipvs/ip_vs_core.c
ipvs: SCTP Trasport Loadbalancing Support
[net-next-2.6.git] / net / netfilter / ipvs / ip_vs_core.c
index 847ffca401841e57e5ffec634c78a2dc472b82fa..72e96d823ebfcbb00c85425e63776302c9c28b20 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/kernel.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <linux/sctp.h>
 #include <linux/icmp.h>
 
 #include <net/ip.h>
@@ -81,6 +82,8 @@ const char *ip_vs_proto_name(unsigned proto)
                return "UDP";
        case IPPROTO_TCP:
                return "TCP";
+       case IPPROTO_SCTP:
+               return "SCTP";
        case IPPROTO_ICMP:
                return "ICMP";
 #ifdef CONFIG_IP_VS_IPV6
@@ -589,8 +592,9 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp,
                ip_send_check(ciph);
        }
 
-       /* the TCP/UDP port */
-       if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) {
+       /* the TCP/UDP/SCTP port */
+       if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol ||
+           IPPROTO_SCTP == ciph->protocol) {
                __be16 *ports = (void *)ciph + ciph->ihl*4;
 
                if (inout)
@@ -630,8 +634,9 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp,
                ciph->saddr = cp->daddr.in6;
        }
 
-       /* the TCP/UDP port */
-       if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr) {
+       /* the TCP/UDP/SCTP port */
+       if (IPPROTO_TCP == ciph->nexthdr || IPPROTO_UDP == ciph->nexthdr ||
+           IPPROTO_SCTP == ciph->nexthdr) {
                __be16 *ports = (void *)ciph + sizeof(struct ipv6hdr);
 
                if (inout)
@@ -679,7 +684,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
                goto out;
        }
 
-       if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol)
+       if (IPPROTO_TCP == protocol || IPPROTO_UDP == protocol ||
+           IPPROTO_SCTP == protocol)
                offset += 2 * sizeof(__u16);
        if (!skb_make_writable(skb, offset))
                goto out;
@@ -857,6 +863,21 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related)
 }
 #endif
 
+/*
+ * Check if sctp chunc is ABORT chunk
+ */
+static inline int is_sctp_abort(const struct sk_buff *skb, int nh_len)
+{
+       sctp_chunkhdr_t *sch, schunk;
+       sch = skb_header_pointer(skb, nh_len + sizeof(sctp_sctphdr_t),
+                       sizeof(schunk), &schunk);
+       if (sch == NULL)
+               return 0;
+       if (sch->type == SCTP_CID_ABORT)
+               return 1;
+       return 0;
+}
+
 static inline int is_tcp_reset(const struct sk_buff *skb, int nh_len)
 {
        struct tcphdr _tcph, *th;
@@ -999,7 +1020,8 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
        if (unlikely(!cp)) {
                if (sysctl_ip_vs_nat_icmp_send &&
                    (pp->protocol == IPPROTO_TCP ||
-                    pp->protocol == IPPROTO_UDP)) {
+                    pp->protocol == IPPROTO_UDP ||
+                    pp->protocol == IPPROTO_SCTP)) {
                        __be16 _ports[2], *pptr;
 
                        pptr = skb_header_pointer(skb, iph.len,
@@ -1014,8 +1036,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb,
                                 * existing entry if it is not RST
                                 * packet or not TCP packet.
                                 */
-                               if (iph.protocol != IPPROTO_TCP
-                                   || !is_tcp_reset(skb, iph.len)) {
+                               if ((iph.protocol != IPPROTO_TCP &&
+                                    iph.protocol != IPPROTO_SCTP)
+                                    || ((iph.protocol == IPPROTO_TCP
+                                         && !is_tcp_reset(skb, iph.len))
+                                        || (iph.protocol == IPPROTO_SCTP
+                                               && !is_sctp_abort(skb,
+                                                       iph.len)))) {
 #ifdef CONFIG_IP_VS_IPV6
                                        if (af == AF_INET6)
                                                icmpv6_send(skb,
@@ -1235,7 +1262,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum)
 
        /* do the statistics and put it back */
        ip_vs_in_stats(cp, skb);
-       if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr)
+       if (IPPROTO_TCP == cih->nexthdr || IPPROTO_UDP == cih->nexthdr ||
+           IPPROTO_SCTP == cih->nexthdr)
                offset += 2 * sizeof(__u16);
        verdict = ip_vs_icmp_xmit_v6(skb, cp, pp, offset);
        /* do not touch skb anymore */
@@ -1358,6 +1386,21 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
         * encorage the standby servers to update the connections timeout
         */
        pkts = atomic_add_return(1, &cp->in_pkts);
+       if (af == AF_INET && (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
+           cp->protocol == IPPROTO_SCTP) {
+               if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
+                       (atomic_read(&cp->in_pkts) %
+                        sysctl_ip_vs_sync_threshold[1]
+                        == sysctl_ip_vs_sync_threshold[0])) ||
+                               (cp->old_state != cp->state &&
+                                ((cp->state == IP_VS_SCTP_S_CLOSED) ||
+                                 (cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
+                                 (cp->state == IP_VS_SCTP_S_SHUT_ACK_SER)))) {
+                       ip_vs_sync_conn(cp);
+                       goto out;
+               }
+       }
+
        if (af == AF_INET &&
            (ip_vs_sync_state & IP_VS_STATE_MASTER) &&
            (((cp->protocol != IPPROTO_TCP ||
@@ -1370,6 +1413,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb,
               (cp->state == IP_VS_TCP_S_CLOSE_WAIT) ||
               (cp->state == IP_VS_TCP_S_TIME_WAIT)))))
                ip_vs_sync_conn(cp);
+out:
        cp->old_state = cp->state;
 
        ip_vs_conn_put(cp);