]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - net/rds/send.c
RDS: Do not set op_active in r_m_copy_from_user().
[net-next-2.6.git] / net / rds / send.c
index 72dbe7fc4f540067272f9fdf8c22f35db7f71c0d..ad89a63c430480277359acaef2c7d7b2fb6cf68d 100644 (file)
@@ -72,7 +72,9 @@ void rds_send_reset(struct rds_connection *conn)
        conn->c_xmit_sg = 0;
        conn->c_xmit_hdr_off = 0;
        conn->c_xmit_data_off = 0;
+       conn->c_xmit_atomic_sent = 0;
        conn->c_xmit_rdma_sent = 0;
+       conn->c_xmit_data_sent = 0;
 
        conn->c_map_queued = 0;
 
@@ -136,68 +138,54 @@ int rds_send_xmit(struct rds_connection *conn)
 
        /*
         * spin trying to push headers and data down the connection until
-        * the connection doens't make forward progress.
+        * the connection doesn't make forward progress.
         */
        while (--send_quota) {
-               /*
-                * See if need to send a congestion map update if we're
-                * between sending messages.  The send_sem protects our sole
-                * use of c_map_offset and _bytes.
-                * Note this is used only by transports that define a special
-                * xmit_cong_map function. For all others, we create allocate
-                * a cong_map message and treat it just like any other send.
-                */
-               if (conn->c_map_bytes) {
-                       ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
-                                               conn->c_map_offset);
-                       if (ret <= 0)
-                               break;
-
-                       conn->c_map_offset += ret;
-                       conn->c_map_bytes -= ret;
-                       if (conn->c_map_bytes)
-                               continue;
-               }
 
-               /* If we're done sending the current message, clear the
-                * offset and S/G temporaries.
-                */
                rm = conn->c_xmit_rm;
-               if (rm &&
-                   conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
-                   conn->c_xmit_sg == rm->data.m_nents) {
-                       conn->c_xmit_rm = NULL;
-                       conn->c_xmit_sg = 0;
-                       conn->c_xmit_hdr_off = 0;
-                       conn->c_xmit_data_off = 0;
-                       conn->c_xmit_rdma_sent = 0;
-
-                       /* Release the reference to the previous message. */
-                       rds_message_put(rm);
-                       rm = NULL;
-               }
 
-               /* If we're asked to send a cong map update, do so.
+               /*
+                * If between sending messages, we can send a pending congestion
+                * map update.
+                *
+                * Transports either define a special xmit_cong_map function,
+                * or we allocate a cong_map message and treat it just like any
+                * other send.
                 */
                if (!rm && test_and_clear_bit(0, &conn->c_map_queued)) {
                        if (conn->c_trans->xmit_cong_map) {
-                               conn->c_map_offset = 0;
-                               conn->c_map_bytes = sizeof(struct rds_header) +
+                               unsigned long map_offset = 0;
+                               unsigned long map_bytes = sizeof(struct rds_header) +
                                        RDS_CONG_MAP_BYTES;
-                               continue;
-                       }
 
-                       rm = rds_cong_update_alloc(conn);
-                       if (IS_ERR(rm)) {
-                               ret = PTR_ERR(rm);
-                               break;
-                       }
+                               while (map_bytes) {
+                                       ret = conn->c_trans->xmit_cong_map(conn, conn->c_lcong,
+                                                                          map_offset);
+                                       if (ret <= 0) {
+                                               /* too far down the rabbithole! */
+                                               mutex_unlock(&conn->c_send_lock);
+                                               rds_conn_error(conn, "Cong map xmit failed\n");
+                                               goto out;
+                                       }
+
+                                       map_offset += ret;
+                                       map_bytes -= ret;
+                               }
+                       } else {
+                               /* send cong update like a normal rm */
+                               rm = rds_cong_update_alloc(conn);
+                               if (IS_ERR(rm)) {
+                                       ret = PTR_ERR(rm);
+                                       break;
+                               }
+                               rm->data.op_active = 1;
 
-                       conn->c_xmit_rm = rm;
+                               conn->c_xmit_rm = rm;
+                       }
                }
 
                /*
-                * Grab the next message from the send queue, if there is one.
+                * If not already working on one, grab the next message.
                 *
                 * c_xmit_rm holds a ref while we're sending this message down
                 * the connction.  We can use this ref while holding the
@@ -235,7 +223,7 @@ int rds_send_xmit(struct rds_connection *conn)
                         * connection.
                         * Therefore, we never retransmit messages with RDMA ops.
                         */
-                       if (rm->rdma.m_rdma_op.r_active &&
+                       if (rm->rdma.op_active &&
                            test_bit(RDS_MSG_RETRANSMITTED, &rm->m_flags)) {
                                spin_lock_irqsave(&conn->c_lock, flags);
                                if (test_and_clear_bit(RDS_MSG_ON_CONN, &rm->m_flags))
@@ -262,23 +250,43 @@ int rds_send_xmit(struct rds_connection *conn)
                        conn->c_xmit_rm = rm;
                }
 
-               /*
-                * Try and send an rdma message.  Let's see if we can
-                * keep this simple and require that the transport either
-                * send the whole rdma or none of it.
-                */
-               if (rm->rdma.m_rdma_op.r_active && !conn->c_xmit_rdma_sent) {
-                       ret = conn->c_trans->xmit_rdma(conn, &rm->rdma.m_rdma_op);
+               if (rm->atomic.op_active && !conn->c_xmit_atomic_sent) {
+                       ret = conn->c_trans->xmit_atomic(conn, rm);
+                       if (ret)
+                               break;
+                       conn->c_xmit_atomic_sent = 1;
+                       /* The transport owns the mapped memory for now.
+                        * You can't unmap it while it's on the send queue */
+                       set_bit(RDS_MSG_MAPPED, &rm->m_flags);
+
+                       /*
+                        * This is evil, muahaha.
+                        * We permit 0-byte sends. (rds-ping depends on this.)
+                        * BUT if there is an atomic op and no sent data,
+                        * we turn off sending the header, to achieve
+                        * "silent" atomics.
+                        * But see below; RDMA op might toggle this back on!
+                        */
+                       if (rm->data.op_nents == 0)
+                               rm->data.op_active = 0;
+               }
+
+               /* The transport either sends the whole rdma or none of it */
+               if (rm->rdma.op_active && !conn->c_xmit_rdma_sent) {
+                       ret = conn->c_trans->xmit_rdma(conn, &rm->rdma);
                        if (ret)
                                break;
                        conn->c_xmit_rdma_sent = 1;
+
+                       /* rdmas need data sent, even if just the header */
+                       rm->data.op_active = 1;
+
                        /* The transport owns the mapped memory for now.
                         * You can't unmap it while it's on the send queue */
                        set_bit(RDS_MSG_MAPPED, &rm->m_flags);
                }
 
-               if (conn->c_xmit_hdr_off < sizeof(struct rds_header) ||
-                   conn->c_xmit_sg < rm->data.m_nents) {
+               if (rm->data.op_active && !conn->c_xmit_data_sent) {
                        ret = conn->c_trans->xmit(conn, rm,
                                                  conn->c_xmit_hdr_off,
                                                  conn->c_xmit_sg,
@@ -294,7 +302,7 @@ int rds_send_xmit(struct rds_connection *conn)
                                ret -= tmp;
                        }
 
-                       sg = &rm->data.m_sg[conn->c_xmit_sg];
+                       sg = &rm->data.op_sg[conn->c_xmit_sg];
                        while (ret) {
                                tmp = min_t(int, ret, sg->length -
                                                      conn->c_xmit_data_off);
@@ -305,9 +313,30 @@ int rds_send_xmit(struct rds_connection *conn)
                                        sg++;
                                        conn->c_xmit_sg++;
                                        BUG_ON(ret != 0 &&
-                                              conn->c_xmit_sg == rm->data.m_nents);
+                                              conn->c_xmit_sg == rm->data.op_nents);
                                }
                        }
+
+                       if (conn->c_xmit_hdr_off == sizeof(struct rds_header) &&
+                           (conn->c_xmit_sg == rm->data.op_nents))
+                               conn->c_xmit_data_sent = 1;
+               }
+
+               /*
+                * A rm will only take multiple times through this loop
+                * if there is a data op. Thus, if the data is sent (or there was
+                * none), then we're done with the rm.
+                */
+               if (!rm->data.op_active || conn->c_xmit_data_sent) {
+                       conn->c_xmit_rm = NULL;
+                       conn->c_xmit_sg = 0;
+                       conn->c_xmit_hdr_off = 0;
+                       conn->c_xmit_data_off = 0;
+                       conn->c_xmit_rdma_sent = 0;
+                       conn->c_xmit_atomic_sent = 0;
+                       conn->c_xmit_data_sent = 0;
+
+                       rds_message_put(rm);
                }
        }
 
@@ -332,7 +361,7 @@ int rds_send_xmit(struct rds_connection *conn)
         */
        mutex_unlock(&conn->c_send_lock);
 
-       if (conn->c_map_bytes || (send_quota == 0 && !was_empty)) {
+       if (send_quota == 0 && !was_empty) {
                /* We exhausted the send quota, but there's work left to
                 * do. Return and (re-)schedule the send worker.
                 */
@@ -412,16 +441,16 @@ int rds_send_acked_before(struct rds_connection *conn, u64 seq)
 void rds_rdma_send_complete(struct rds_message *rm, int status)
 {
        struct rds_sock *rs = NULL;
-       struct rds_rdma_op *ro;
+       struct rm_rdma_op *ro;
        struct rds_notifier *notifier;
        unsigned long flags;
 
        spin_lock_irqsave(&rm->m_rs_lock, flags);
 
-       ro = &rm->rdma.m_rdma_op;
+       ro = &rm->rdma;
        if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags) &&
-           ro->r_active && ro->r_notify && ro->r_notifier) {
-               notifier = ro->r_notifier;
+           ro->op_active && ro->op_notify && ro->op_notifier) {
+               notifier = ro->op_notifier;
                rs = rm->m_rs;
                sock_hold(rds_rs_to_sk(rs));
 
@@ -430,7 +459,7 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
                list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
                spin_unlock(&rs->rs_lock);
 
-               ro->r_notifier = NULL;
+               ro->op_notifier = NULL;
        }
 
        spin_unlock_irqrestore(&rm->m_rs_lock, flags);
@@ -442,6 +471,41 @@ void rds_rdma_send_complete(struct rds_message *rm, int status)
 }
 EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
 
+/*
+ * Just like above, except looks at atomic op
+ */
+void rds_atomic_send_complete(struct rds_message *rm, int status)
+{
+       struct rds_sock *rs = NULL;
+       struct rm_atomic_op *ao;
+       struct rds_notifier *notifier;
+
+       spin_lock(&rm->m_rs_lock);
+
+       ao = &rm->atomic;
+       if (test_bit(RDS_MSG_ON_SOCK, &rm->m_flags)
+           && ao->op_active && ao->op_notify && ao->op_notifier) {
+               notifier = ao->op_notifier;
+               rs = rm->m_rs;
+               sock_hold(rds_rs_to_sk(rs));
+
+               notifier->n_status = status;
+               spin_lock(&rs->rs_lock);
+               list_add_tail(&notifier->n_list, &rs->rs_notify_queue);
+               spin_unlock(&rs->rs_lock);
+
+               ao->op_notifier = NULL;
+       }
+
+       spin_unlock(&rm->m_rs_lock);
+
+       if (rs) {
+               rds_wake_sk_sleep(rs);
+               sock_put(rds_rs_to_sk(rs));
+       }
+}
+EXPORT_SYMBOL_GPL(rds_atomic_send_complete);
+
 /*
  * This is the same as rds_rdma_send_complete except we
  * don't do any locking - we have all the ingredients (message,
@@ -450,13 +514,13 @@ EXPORT_SYMBOL_GPL(rds_rdma_send_complete);
 static inline void
 __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status)
 {
-       struct rds_rdma_op *ro;
+       struct rm_rdma_op *ro;
 
-       ro = &rm->rdma.m_rdma_op;
-       if (ro->r_active && ro->r_notify && ro->r_notifier) {
-               ro->r_notifier->n_status = status;
-               list_add_tail(&ro->r_notifier->n_list, &rs->rs_notify_queue);
-               ro->r_notifier = NULL;
+       ro = &rm->rdma;
+       if (ro->op_active && ro->op_notify && ro->op_notifier) {
+               ro->op_notifier->n_status = status;
+               list_add_tail(&ro->op_notifier->n_list, &rs->rs_notify_queue);
+               ro->op_notifier = NULL;
        }
 
        /* No need to wake the app - caller does this */
@@ -468,7 +532,7 @@ __rds_rdma_send_complete(struct rds_sock *rs, struct rds_message *rm, int status
  * So speed is not an issue here.
  */
 struct rds_message *rds_send_get_message(struct rds_connection *conn,
-                                        struct rds_rdma_op *op)
+                                        struct rm_rdma_op *op)
 {
        struct rds_message *rm, *tmp, *found = NULL;
        unsigned long flags;
@@ -476,7 +540,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
        spin_lock_irqsave(&conn->c_lock, flags);
 
        list_for_each_entry_safe(rm, tmp, &conn->c_retrans, m_conn_item) {
-               if (&rm->rdma.m_rdma_op == op) {
+               if (&rm->rdma == op) {
                        atomic_inc(&rm->m_refcount);
                        found = rm;
                        goto out;
@@ -484,7 +548,7 @@ struct rds_message *rds_send_get_message(struct rds_connection *conn,
        }
 
        list_for_each_entry_safe(rm, tmp, &conn->c_send_queue, m_conn_item) {
-               if (&rm->rdma.m_rdma_op == op) {
+               if (&rm->rdma == op) {
                        atomic_inc(&rm->m_refcount);
                        found = rm;
                        break;
@@ -544,20 +608,20 @@ void rds_send_remove_from_sock(struct list_head *messages, int status)
                spin_lock(&rs->rs_lock);
 
                if (test_and_clear_bit(RDS_MSG_ON_SOCK, &rm->m_flags)) {
-                       struct rds_rdma_op *ro = &rm->rdma.m_rdma_op;
+                       struct rm_rdma_op *ro = &rm->rdma;
                        struct rds_notifier *notifier;
 
                        list_del_init(&rm->m_sock_item);
                        rds_send_sndbuf_remove(rs, rm);
 
-                       if (ro->r_active && ro->r_notifier &&
-                           (status || ro->r_notify)) {
-                               notifier = ro->r_notifier;
+                       if (ro->op_active && ro->op_notifier &&
+                              (ro->op_notify || (ro->op_recverr && status))) {
+                               notifier = ro->op_notifier;
                                list_add_tail(&notifier->n_list,
                                                &rs->rs_notify_queue);
                                if (!notifier->n_status)
                                        notifier->n_status = status;
-                               rm->rdma.m_rdma_op.r_notifier = NULL;
+                               rm->rdma.op_notifier = NULL;
                        }
                        was_on_sock = 1;
                        rm->m_rs = NULL;
@@ -788,6 +852,11 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
                        /* these are valid but do no add any size */
                        break;
 
+               case RDS_CMSG_ATOMIC_CSWP:
+               case RDS_CMSG_ATOMIC_FADD:
+                       size += sizeof(struct scatterlist);
+                       break;
+
                default:
                        return -EINVAL;
                }
@@ -813,7 +882,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
                        continue;
 
                /* As a side effect, RDMA_DEST and RDMA_MAP will set
-                * rm->m_rdma_cookie and rm->m_rdma_mr.
+                * rm->rdma.m_rdma_cookie and rm->rdma.m_rdma_mr.
                 */
                switch (cmsg->cmsg_type) {
                case RDS_CMSG_RDMA_ARGS:
@@ -829,6 +898,10 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
                        if (!ret)
                                *allocated_mr = 1;
                        break;
+               case RDS_CMSG_ATOMIC_CSWP:
+               case RDS_CMSG_ATOMIC_FADD:
+                       ret = rds_cmsg_atomic(rs, rm, cmsg);
+                       break;
 
                default:
                        return -EINVAL;
@@ -897,11 +970,14 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
                goto out;
        }
 
-       rm->data.m_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
-       /* XXX fix this to not allocate memory */
-       ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
-       if (ret)
-               goto out;
+       /* Attach data to the rm */
+       if (payload_len) {
+               rm->data.op_sg = rds_message_alloc_sgs(rm, ceil(payload_len, PAGE_SIZE));
+               ret = rds_message_copy_from_user(rm, msg->msg_iov, payload_len);
+               if (ret)
+                       goto out;
+       }
+       rm->data.op_active = 1;
 
        rm->m_daddr = daddr;
 
@@ -925,11 +1001,19 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
        if (ret)
                goto out;
 
-       if ((rm->m_rdma_cookie || rm->rdma.m_rdma_op.r_active) &&
+       if ((rm->m_rdma_cookie || rm->rdma.op_active) &&
            !conn->c_trans->xmit_rdma) {
                if (printk_ratelimit())
                        printk(KERN_NOTICE "rdma_op %p conn xmit_rdma %p\n",
-                               &rm->rdma.m_rdma_op, conn->c_trans->xmit_rdma);
+                              &rm->rdma, conn->c_trans->xmit_rdma);
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
+       if (rm->atomic.op_active && !conn->c_trans->xmit_atomic) {
+               if (printk_ratelimit())
+                       printk(KERN_NOTICE "atomic_op %p conn xmit_atomic %p\n",
+                              &rm->atomic, conn->c_trans->xmit_atomic);
                ret = -EOPNOTSUPP;
                goto out;
        }