]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
Merge branch 'master' of /home/trondmy/kernel/linux-2.6/ into merge_linus
authorTrond Myklebust <Trond.Myklebust@netapp.com>
Thu, 7 Dec 2006 21:35:17 +0000 (16:35 -0500)
committerTrond Myklebust <Trond.Myklebust@netapp.com>
Thu, 7 Dec 2006 21:35:17 +0000 (16:35 -0500)
44 files changed:
fs/lockd/clntproc.c
fs/lockd/svc4proc.c
fs/lockd/svcproc.c
fs/nfs/direct.c
fs/nfs/file.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/nfs3proc.c
fs/nfs/nfs4proc.c
fs/nfs/pagelist.c
fs/nfs/proc.c
fs/nfs/read.c
fs/nfs/symlink.c
fs/nfs/write.c
include/linux/nfs_fs.h
include/linux/nfs_page.h
include/linux/nfs_xdr.h
include/linux/sunrpc/auth_gss.h
include/linux/sunrpc/clnt.h
include/linux/sunrpc/debug.h
include/linux/sunrpc/gss_krb5.h
include/linux/sunrpc/gss_spkm3.h
include/linux/sunrpc/sched.h
include/linux/sunrpc/xdr.h
include/linux/sunrpc/xprt.h
net/sunrpc/auth_gss/auth_gss.c
net/sunrpc/auth_gss/gss_krb5_crypto.c
net/sunrpc/auth_gss/gss_krb5_mech.c
net/sunrpc/auth_gss/gss_krb5_seal.c
net/sunrpc/auth_gss/gss_krb5_unseal.c
net/sunrpc/auth_gss/gss_krb5_wrap.c
net/sunrpc/auth_gss/gss_spkm3_mech.c
net/sunrpc/auth_gss/gss_spkm3_seal.c
net/sunrpc/auth_gss/gss_spkm3_token.c
net/sunrpc/auth_gss/gss_spkm3_unseal.c
net/sunrpc/clnt.c
net/sunrpc/pmap_clnt.c
net/sunrpc/sched.c
net/sunrpc/socklib.c
net/sunrpc/sunrpc_syms.c
net/sunrpc/sysctl.c
net/sunrpc/xdr.c
net/sunrpc/xprt.c
net/sunrpc/xprtsock.c

index 50643b6a5556178be5f6b90a7d0c40afd01365db..497c3cd59d527c64055c21579930c7ec43fcfcd5 100644 (file)
@@ -730,7 +730,7 @@ static void nlmclnt_cancel_callback(struct rpc_task *task, void *data)
                goto retry_cancel;
        }
 
-       dprintk("lockd: cancel status %d (task %d)\n",
+       dprintk("lockd: cancel status %u (task %u)\n",
                        req->a_res.status, task->tk_pid);
 
        switch (req->a_res.status) {
index 0ce5c81ff5078076e3fd43f659fa0de561370917..f67146a8199a0a98149407080dd83582357dc52f 100644 (file)
@@ -234,7 +234,7 @@ nlm4svc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
  */
 static void nlm4svc_callback_exit(struct rpc_task *task, void *data)
 {
-       dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+       dprintk("lockd: %5u callback returned %d\n", task->tk_pid,
                        -task->tk_status);
 }
 
index 32e99a6e8dcad6c20964d63d3dcb5d460423b495..3707c3a23e9330173b1d50fdbb1548691064f688 100644 (file)
@@ -263,7 +263,7 @@ nlmsvc_proc_granted(struct svc_rqst *rqstp, struct nlm_args *argp,
  */
 static void nlmsvc_callback_exit(struct rpc_task *task, void *data)
 {
-       dprintk("lockd: %4d callback returned %d\n", task->tk_pid,
+       dprintk("lockd: %5u callback returned %d\n", task->tk_pid,
                        -task->tk_status);
 }
 
index 2f488e1d9b6c5d359ff00120b548fd8b9a22eff6..f9d678f4ae06cff66aa86e8ca58425a2ba779a5c 100644 (file)
@@ -307,9 +307,7 @@ static ssize_t nfs_direct_read_schedule(struct nfs_direct_req *dreq, unsigned lo
 
                data->task.tk_cookie = (unsigned long) inode;
 
-               lock_kernel();
                rpc_execute(&data->task);
-               unlock_kernel();
 
                dfprintk(VFS, "NFS: %5u initiated direct read call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
                                data->task.tk_pid,
@@ -475,9 +473,7 @@ static void nfs_direct_commit_schedule(struct nfs_direct_req *dreq)
 
        dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
 
-       lock_kernel();
        rpc_execute(&data->task);
-       unlock_kernel();
 }
 
 static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
@@ -641,9 +637,7 @@ static ssize_t nfs_direct_write_schedule(struct nfs_direct_req *dreq, unsigned l
                data->task.tk_priority = RPC_PRIORITY_NORMAL;
                data->task.tk_cookie = (unsigned long) inode;
 
-               lock_kernel();
                rpc_execute(&data->task);
-               unlock_kernel();
 
                dfprintk(VFS, "NFS: %5u initiated direct write call (req %s/%Ld, %zu bytes @ offset %Lu)\n",
                                data->task.tk_pid,
index cc93865cea932a4e9c78ea95e1c0165e575630d1..8e28bffc35a0618f2b10c7807d6babc46c63a466 100644 (file)
@@ -307,28 +307,28 @@ static int nfs_commit_write(struct file *file, struct page *page, unsigned offse
 
 static void nfs_invalidate_page(struct page *page, unsigned long offset)
 {
-       struct inode *inode = page->mapping->host;
-
+       if (offset != 0)
+               return;
        /* Cancel any unstarted writes on this page */
-       if (offset == 0)
-               nfs_sync_inode_wait(inode, page->index, 1, FLUSH_INVALIDATE);
+       nfs_wb_page_priority(page->mapping->host, page, FLUSH_INVALIDATE);
 }
 
 static int nfs_release_page(struct page *page, gfp_t gfp)
 {
-       if (gfp & __GFP_FS)
-               return !nfs_wb_page(page->mapping->host, page);
-       else
-               /*
-                * Avoid deadlock on nfs_wait_on_request().
-                */
+       /*
+        * Avoid deadlock on nfs_wait_on_request().
+        */
+       if (!(gfp & __GFP_FS))
                return 0;
+       /* Hack... Force nfs_wb_page() to write out the page */
+       SetPageDirty(page);
+       return !nfs_wb_page(page->mapping->host, page);
 }
 
 const struct address_space_operations nfs_file_aops = {
        .readpage = nfs_readpage,
        .readpages = nfs_readpages,
-       .set_page_dirty = __set_page_dirty_nobuffers,
+       .set_page_dirty = nfs_set_page_dirty,
        .writepage = nfs_writepage,
        .writepages = nfs_writepages,
        .prepare_write = nfs_prepare_write,
@@ -375,6 +375,12 @@ static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov,
 
        nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count);
        result = generic_file_aio_write(iocb, iov, nr_segs, pos);
+       /* Return error values for O_SYNC and IS_SYNC() */
+       if (result >= 0 && (IS_SYNC(inode) || (iocb->ki_filp->f_flags & O_SYNC))) {
+               int err = nfs_fsync(iocb->ki_filp, dentry, 1);
+               if (err < 0)
+                       result = err;
+       }
 out:
        return result;
 
index 15afa460e629b14fa1c508192828d9e969a5aeba..36680d1061b0d1a314eb57abc802a4e5404adc36 100644 (file)
@@ -422,7 +422,7 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
        int err;
 
        /* Flush out writes to the server in order to update c/mtime */
-       nfs_sync_inode_wait(inode, 0, 0, FLUSH_NOCOMMIT);
+       nfs_sync_mapping_range(inode->i_mapping, 0, 0, FLUSH_NOCOMMIT);
 
        /*
         * We may force a getattr if the user cares about atime.
index d205466233f67f932460d51f124cade1bec0a516..a28f6ce2e131e4df24e40223ebdc36d7d986da6f 100644 (file)
@@ -217,3 +217,21 @@ void nfs_super_set_maxbytes(struct super_block *sb, __u64 maxfilesize)
        if (sb->s_maxbytes > MAX_LFS_FILESIZE || sb->s_maxbytes <= 0)
                sb->s_maxbytes = MAX_LFS_FILESIZE;
 }
+
+/*
+ * Determine the number of bytes of data the page contains
+ */
+static inline
+unsigned int nfs_page_length(struct page *page)
+{
+       loff_t i_size = i_size_read(page->mapping->host);
+
+       if (i_size > 0) {
+               pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+               if (page->index < end_index)
+                       return PAGE_CACHE_SIZE;
+               if (page->index == end_index)
+                       return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
+       }
+       return 0;
+}
index e5f128ffc32dd7ee368946768f9ce762a44c4fe7..510ae524f3fd7dfd3e8e5fbd91f1b166964126c1 100644 (file)
@@ -276,51 +276,6 @@ static int nfs3_proc_read(struct nfs_read_data *rdata)
        return status;
 }
 
-static int nfs3_proc_write(struct nfs_write_data *wdata)
-{
-       int                     rpcflags = wdata->flags;
-       struct inode *          inode = wdata->inode;
-       struct nfs_fattr *      fattr = wdata->res.fattr;
-       struct rpc_message      msg = {
-               .rpc_proc       = &nfs3_procedures[NFS3PROC_WRITE],
-               .rpc_argp       = &wdata->args,
-               .rpc_resp       = &wdata->res,
-               .rpc_cred       = wdata->cred,
-       };
-       int                     status;
-
-       dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
-                       (long long) wdata->args.offset);
-       nfs_fattr_init(fattr);
-       status = rpc_call_sync(NFS_CLIENT(inode), &msg, rpcflags);
-       if (status >= 0)
-               nfs_post_op_update_inode(inode, fattr);
-       dprintk("NFS reply write: %d\n", status);
-       return status < 0? status : wdata->res.count;
-}
-
-static int nfs3_proc_commit(struct nfs_write_data *cdata)
-{
-       struct inode *          inode = cdata->inode;
-       struct nfs_fattr *      fattr = cdata->res.fattr;
-       struct rpc_message      msg = {
-               .rpc_proc       = &nfs3_procedures[NFS3PROC_COMMIT],
-               .rpc_argp       = &cdata->args,
-               .rpc_resp       = &cdata->res,
-               .rpc_cred       = cdata->cred,
-       };
-       int                     status;
-
-       dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
-                       (long long) cdata->args.offset);
-       nfs_fattr_init(fattr);
-       status = rpc_call_sync(NFS_CLIENT(inode), &msg, 0);
-       if (status >= 0)
-               nfs_post_op_update_inode(inode, fattr);
-       dprintk("NFS reply commit: %d\n", status);
-       return status;
-}
-
 /*
  * Create a regular file.
  * For now, we don't implement O_EXCL.
@@ -369,7 +324,7 @@ again:
 
        /* If the server doesn't support the exclusive creation semantics,
         * try again with simple 'guarded' mode. */
-       if (status == NFSERR_NOTSUPP) {
+       if (status == -ENOTSUPP) {
                switch (arg.createmode) {
                        case NFS3_CREATE_EXCLUSIVE:
                                arg.createmode = NFS3_CREATE_GUARDED;
@@ -690,8 +645,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
        };
        int                     status;
 
-       lock_kernel();
-
        if (plus)
                msg.rpc_proc = &nfs3_procedures[NFS3PROC_READDIRPLUS];
 
@@ -702,7 +655,6 @@ nfs3_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
        status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        nfs_refresh_inode(dir, &dir_attr);
        dprintk("NFS reply readdir: %d\n", status);
-       unlock_kernel();
        return status;
 }
 
@@ -904,8 +856,6 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
        .access         = nfs3_proc_access,
        .readlink       = nfs3_proc_readlink,
        .read           = nfs3_proc_read,
-       .write          = nfs3_proc_write,
-       .commit         = nfs3_proc_commit,
        .create         = nfs3_proc_create,
        .remove         = nfs3_proc_remove,
        .unlink_setup   = nfs3_proc_unlink_setup,
index 8118036cc4494c3a420869820cdebdb66c2c6262..ee458aeab24a5ef1ebc1ad8aad1410cd2d64abd9 100644 (file)
@@ -636,7 +636,7 @@ static int _nfs4_proc_open_confirm(struct nfs4_opendata *data)
                smp_wmb();
        } else
                status = data->rpc_status;
-       rpc_release_task(task);
+       rpc_put_task(task);
        return status;
 }
 
@@ -742,7 +742,7 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
                smp_wmb();
        } else
                status = data->rpc_status;
-       rpc_release_task(task);
+       rpc_put_task(task);
        if (status != 0)
                return status;
 
@@ -1775,89 +1775,6 @@ static int nfs4_proc_read(struct nfs_read_data *rdata)
        return err;
 }
 
-static int _nfs4_proc_write(struct nfs_write_data *wdata)
-{
-       int rpcflags = wdata->flags;
-       struct inode *inode = wdata->inode;
-       struct nfs_fattr *fattr = wdata->res.fattr;
-       struct nfs_server *server = NFS_SERVER(inode);
-       struct rpc_message msg = {
-               .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_WRITE],
-               .rpc_argp       = &wdata->args,
-               .rpc_resp       = &wdata->res,
-               .rpc_cred       = wdata->cred,
-       };
-       int status;
-
-       dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
-                       (long long) wdata->args.offset);
-
-       wdata->args.bitmask = server->attr_bitmask;
-       wdata->res.server = server;
-       wdata->timestamp = jiffies;
-       nfs_fattr_init(fattr);
-       status = rpc_call_sync(server->client, &msg, rpcflags);
-       dprintk("NFS reply write: %d\n", status);
-       if (status < 0)
-               return status;
-       renew_lease(server, wdata->timestamp);
-       nfs_post_op_update_inode(inode, fattr);
-       return wdata->res.count;
-}
-
-static int nfs4_proc_write(struct nfs_write_data *wdata)
-{
-       struct nfs4_exception exception = { };
-       int err;
-       do {
-               err = nfs4_handle_exception(NFS_SERVER(wdata->inode),
-                               _nfs4_proc_write(wdata),
-                               &exception);
-       } while (exception.retry);
-       return err;
-}
-
-static int _nfs4_proc_commit(struct nfs_write_data *cdata)
-{
-       struct inode *inode = cdata->inode;
-       struct nfs_fattr *fattr = cdata->res.fattr;
-       struct nfs_server *server = NFS_SERVER(inode);
-       struct rpc_message msg = {
-               .rpc_proc       = &nfs4_procedures[NFSPROC4_CLNT_COMMIT],
-               .rpc_argp       = &cdata->args,
-               .rpc_resp       = &cdata->res,
-               .rpc_cred       = cdata->cred,
-       };
-       int status;
-
-       dprintk("NFS call  commit %d @ %Ld\n", cdata->args.count,
-                       (long long) cdata->args.offset);
-
-       cdata->args.bitmask = server->attr_bitmask;
-       cdata->res.server = server;
-       cdata->timestamp = jiffies;
-       nfs_fattr_init(fattr);
-       status = rpc_call_sync(server->client, &msg, 0);
-       if (status >= 0)
-               renew_lease(server, cdata->timestamp);
-       dprintk("NFS reply commit: %d\n", status);
-       if (status >= 0)
-               nfs_post_op_update_inode(inode, fattr);
-       return status;
-}
-
-static int nfs4_proc_commit(struct nfs_write_data *cdata)
-{
-       struct nfs4_exception exception = { };
-       int err;
-       do {
-               err = nfs4_handle_exception(NFS_SERVER(cdata->inode),
-                               _nfs4_proc_commit(cdata),
-                               &exception);
-       } while (exception.retry);
-       return err;
-}
-
 /*
  * Got race?
  * We will need to arrange for the VFS layer to provide an atomic open.
@@ -2223,13 +2140,11 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
                        dentry->d_parent->d_name.name,
                        dentry->d_name.name,
                        (unsigned long long)cookie);
-       lock_kernel();
        nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
        res.pgbase = args.pgbase;
        status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
        if (status == 0)
                memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
-       unlock_kernel();
        dprintk("%s: returns %d\n", __FUNCTION__, status);
        return status;
 }
@@ -3067,7 +2982,7 @@ static int _nfs4_proc_delegreturn(struct inode *inode, struct rpc_cred *cred, co
                if (status == 0)
                        nfs_post_op_update_inode(inode, &data->fattr);
        }
-       rpc_release_task(task);
+       rpc_put_task(task);
        return status;
 }
 
@@ -3314,7 +3229,7 @@ static int nfs4_proc_unlck(struct nfs4_state *state, int cmd, struct file_lock *
        if (IS_ERR(task))
                goto out;
        status = nfs4_wait_for_completion_rpc_task(task);
-       rpc_release_task(task);
+       rpc_put_task(task);
 out:
        return status;
 }
@@ -3430,7 +3345,7 @@ static void nfs4_lock_release(void *calldata)
                task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
                                data->arg.lock_seqid);
                if (!IS_ERR(task))
-                       rpc_release_task(task);
+                       rpc_put_task(task);
                dprintk("%s: cancelling lock!\n", __FUNCTION__);
        } else
                nfs_free_seqid(data->arg.lock_seqid);
@@ -3472,7 +3387,7 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f
                        ret = -EAGAIN;
        } else
                data->cancelled = 1;
-       rpc_release_task(task);
+       rpc_put_task(task);
        dprintk("%s: done, ret = %d!\n", __FUNCTION__, ret);
        return ret;
 }
@@ -3732,8 +3647,6 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
        .access         = nfs4_proc_access,
        .readlink       = nfs4_proc_readlink,
        .read           = nfs4_proc_read,
-       .write          = nfs4_proc_write,
-       .commit         = nfs4_proc_commit,
        .create         = nfs4_proc_create,
        .remove         = nfs4_proc_remove,
        .unlink_setup   = nfs4_proc_unlink_setup,
index 3fbfc2f033079b54520ae0e586fa0150e73584b6..ca4b1d4ff42b0f7d337ad01fa7c1675f959c4134 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/nfs_page.h>
 #include <linux/nfs_fs.h>
 #include <linux/nfs_mount.h>
+#include <linux/writeback.h>
 
 #define NFS_PARANOIA 1
 
@@ -268,11 +269,10 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
 
 #define NFS_SCAN_MAXENTRIES 16
 /**
- * nfs_scan_lock_dirty - Scan the radix tree for dirty requests
- * @nfsi: NFS inode
+ * nfs_scan_dirty - Scan the radix tree for dirty requests
+ * @mapping: pointer to address space
+ * @wbc: writeback_control structure
  * @dst: Destination list
- * @idx_start: lower bound of page->index to scan
- * @npages: idx_start + npages sets the upper bound to scan.
  *
  * Moves elements from one of the inode request lists.
  * If the number of requests is set to 0, the entire address_space
@@ -280,46 +280,63 @@ nfs_coalesce_requests(struct list_head *head, struct list_head *dst,
  * The requests are *not* checked to ensure that they form a contiguous set.
  * You must be holding the inode's req_lock when calling this function
  */
-int
-nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
-             unsigned long idx_start, unsigned int npages)
+long nfs_scan_dirty(struct address_space *mapping,
+                       struct writeback_control *wbc,
+                       struct list_head *dst)
 {
+       struct nfs_inode *nfsi = NFS_I(mapping->host);
        struct nfs_page *pgvec[NFS_SCAN_MAXENTRIES];
        struct nfs_page *req;
-       unsigned long idx_end;
+       pgoff_t idx_start, idx_end;
+       long res = 0;
        int found, i;
-       int res;
 
-       res = 0;
-       if (npages == 0)
-               idx_end = ~0;
-       else
-               idx_end = idx_start + npages - 1;
+       if (nfsi->ndirty == 0)
+               return 0;
+       if (wbc->range_cyclic) {
+               idx_start = 0;
+               idx_end = ULONG_MAX;
+       } else if (wbc->range_end == 0) {
+               idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
+               idx_end = ULONG_MAX;
+       } else {
+               idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
+               idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
+       }
 
        for (;;) {
+               unsigned int toscan = NFS_SCAN_MAXENTRIES;
+
                found = radix_tree_gang_lookup_tag(&nfsi->nfs_page_tree,
-                               (void **)&pgvec[0], idx_start, NFS_SCAN_MAXENTRIES,
+                               (void **)&pgvec[0], idx_start, toscan,
                                NFS_PAGE_TAG_DIRTY);
+
+               /* Did we make progress? */
                if (found <= 0)
                        break;
+
                for (i = 0; i < found; i++) {
                        req = pgvec[i];
-                       if (req->wb_index > idx_end)
+                       if (!wbc->range_cyclic && req->wb_index > idx_end)
                                goto out;
 
+                       /* Try to lock request and mark it for writeback */
+                       if (!nfs_set_page_writeback_locked(req))
+                               goto next;
+                       radix_tree_tag_clear(&nfsi->nfs_page_tree,
+                                       req->wb_index, NFS_PAGE_TAG_DIRTY);
+                       nfsi->ndirty--;
+                       nfs_list_remove_request(req);
+                       nfs_list_add_request(req, dst);
+                       res++;
+                       if (res == LONG_MAX)
+                               goto out;
+next:
                        idx_start = req->wb_index + 1;
-
-                       if (nfs_set_page_writeback_locked(req)) {
-                               radix_tree_tag_clear(&nfsi->nfs_page_tree,
-                                               req->wb_index, NFS_PAGE_TAG_DIRTY);
-                               nfs_list_remove_request(req);
-                               nfs_list_add_request(req, dst);
-                               dec_zone_page_state(req->wb_page, NR_FILE_DIRTY);
-                               res++;
-                       }
                }
        }
 out:
+       WARN_ON ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty));
        return res;
 }
 
index 4529cc4f3f8fe427bb6b262718213f933b7ac6a1..10f5e80ca15789fc91718c2a5859423c30d21bcc 100644 (file)
@@ -215,32 +215,6 @@ static int nfs_proc_read(struct nfs_read_data *rdata)
        return status;
 }
 
-static int nfs_proc_write(struct nfs_write_data *wdata)
-{
-       int                     flags = wdata->flags;
-       struct inode *          inode = wdata->inode;
-       struct nfs_fattr *      fattr = wdata->res.fattr;
-       struct rpc_message      msg = {
-               .rpc_proc       = &nfs_procedures[NFSPROC_WRITE],
-               .rpc_argp       = &wdata->args,
-               .rpc_resp       = &wdata->res,
-               .rpc_cred       = wdata->cred,
-       };
-       int                     status;
-
-       dprintk("NFS call  write %d @ %Ld\n", wdata->args.count,
-                       (long long) wdata->args.offset);
-       nfs_fattr_init(fattr);
-       status = rpc_call_sync(NFS_CLIENT(inode), &msg, flags);
-       if (status >= 0) {
-               nfs_post_op_update_inode(inode, fattr);
-               wdata->res.count = wdata->args.count;
-               wdata->verf.committed = NFS_FILE_SYNC;
-       }
-       dprintk("NFS reply write: %d\n", status);
-       return status < 0? status : wdata->res.count;
-}
-
 static int
 nfs_proc_create(struct inode *dir, struct dentry *dentry, struct iattr *sattr,
                int flags, struct nameidata *nd)
@@ -545,13 +519,10 @@ nfs_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
        };
        int                     status;
 
-       lock_kernel();
-
        dprintk("NFS call  readdir %d\n", (unsigned int)cookie);
        status = rpc_call_sync(NFS_CLIENT(dir), &msg, 0);
 
        dprintk("NFS reply readdir: %d\n", status);
-       unlock_kernel();
        return status;
 }
 
@@ -696,8 +667,6 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
        .access         = NULL,                /* access */
        .readlink       = nfs_proc_readlink,
        .read           = nfs_proc_read,
-       .write          = nfs_proc_write,
-       .commit         = NULL,                /* commit */
        .create         = nfs_proc_create,
        .remove         = nfs_proc_remove,
        .unlink_setup   = nfs_proc_unlink_setup,
index 244a8c45b68e45b68f257a30c1330d06146b94b6..a9c26521a9e2d8a13965e364dae9ac8392cecdb1 100644 (file)
@@ -30,6 +30,7 @@
 
 #include <asm/system.h>
 
+#include "internal.h"
 #include "iostat.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
@@ -65,32 +66,22 @@ struct nfs_read_data *nfs_readdata_alloc(size_t len)
        return p;
 }
 
-static void nfs_readdata_free(struct nfs_read_data *p)
+static void nfs_readdata_rcu_free(struct rcu_head *head)
 {
+       struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_rdata_mempool);
 }
 
-void nfs_readdata_release(void *data)
+static void nfs_readdata_free(struct nfs_read_data *rdata)
 {
-        nfs_readdata_free(data);
+       call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
 }
 
-static
-unsigned int nfs_page_length(struct inode *inode, struct page *page)
+void nfs_readdata_release(void *data)
 {
-       loff_t i_size = i_size_read(inode);
-       unsigned long idx;
-
-       if (i_size <= 0)
-               return 0;
-       idx = (i_size - 1) >> PAGE_CACHE_SHIFT;
-       if (page->index > idx)
-               return 0;
-       if (page->index != idx)
-               return PAGE_CACHE_SIZE;
-       return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1));
+        nfs_readdata_free(data);
 }
 
 static
@@ -139,12 +130,12 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
 {
        unsigned int    rsize = NFS_SERVER(inode)->rsize;
        unsigned int    count = PAGE_CACHE_SIZE;
-       int             result;
+       int result = -ENOMEM;
        struct nfs_read_data *rdata;
 
        rdata = nfs_readdata_alloc(count);
        if (!rdata)
-               return -ENOMEM;
+               goto out_unlock;
 
        memset(rdata, 0, sizeof(*rdata));
        rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
@@ -212,8 +203,9 @@ static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode,
        result = 0;
 
 io_error:
-       unlock_page(page);
        nfs_readdata_free(rdata);
+out_unlock:
+       unlock_page(page);
        return result;
 }
 
@@ -224,7 +216,7 @@ static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
        struct nfs_page *new;
        unsigned int len;
 
-       len = nfs_page_length(inode, page);
+       len = nfs_page_length(page);
        if (len == 0)
                return nfs_return_empty_page(page);
        new = nfs_create_request(ctx, inode, page, 0, len);
@@ -316,9 +308,7 @@ static void nfs_execute_read(struct nfs_read_data *data)
        sigset_t oldset;
 
        rpc_clnt_sigmask(clnt, &oldset);
-       lock_kernel();
        rpc_execute(&data->task);
-       unlock_kernel();
        rpc_clnt_sigunmask(clnt, &oldset);
 }
 
@@ -454,6 +444,55 @@ nfs_pagein_list(struct list_head *head, int rpages)
        return error;
 }
 
+/*
+ * This is the callback from RPC telling us whether a reply was
+ * received or some error occurred (timeout or socket shutdown).
+ */
+int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
+{
+       int status;
+
+       dprintk("%s: %4d, (status %d)\n", __FUNCTION__, task->tk_pid,
+                       task->tk_status);
+
+       status = NFS_PROTO(data->inode)->read_done(task, data);
+       if (status != 0)
+               return status;
+
+       nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
+
+       if (task->tk_status == -ESTALE) {
+               set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
+               nfs_mark_for_revalidate(data->inode);
+       }
+       spin_lock(&data->inode->i_lock);
+       NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
+       spin_unlock(&data->inode->i_lock);
+       return 0;
+}
+
+static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
+{
+       struct nfs_readargs *argp = &data->args;
+       struct nfs_readres *resp = &data->res;
+
+       if (resp->eof || resp->count == argp->count)
+               return 0;
+
+       /* This is a short read! */
+       nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
+       /* Has the server at least made some progress? */
+       if (resp->count == 0)
+               return 0;
+
+       /* Yes, so retry the read at the end of the data */
+       argp->offset += resp->count;
+       argp->pgbase += resp->count;
+       argp->count -= resp->count;
+       rpc_restart_call(task);
+       return -EAGAIN;
+}
+
 /*
  * Handle a read reply that fills part of a page.
  */
@@ -463,12 +502,16 @@ static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
        struct nfs_page *req = data->req;
        struct page *page = req->wb_page;
  
-       if (likely(task->tk_status >= 0))
-               nfs_readpage_truncate_uninitialised_page(data);
-       else
-               SetPageError(page);
        if (nfs_readpage_result(task, data) != 0)
                return;
+
+       if (likely(task->tk_status >= 0)) {
+               nfs_readpage_truncate_uninitialised_page(data);
+               if (nfs_readpage_retry(task, data) != 0)
+                       return;
+       }
+       if (unlikely(task->tk_status < 0))
+               SetPageError(page);
        if (atomic_dec_and_test(&req->wb_complete)) {
                if (!PageError(page))
                        SetPageUptodate(page);
@@ -496,25 +539,13 @@ static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
        count += base;
        for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
                SetPageUptodate(*pages);
-       if (count != 0)
+       if (count == 0)
+               return;
+       /* Was this a short read? */
+       if (data->res.eof || data->res.count == data->args.count)
                SetPageUptodate(*pages);
 }
 
-static void nfs_readpage_set_pages_error(struct nfs_read_data *data)
-{
-       unsigned int count = data->args.count;
-       unsigned int base = data->args.pgbase;
-       struct page **pages;
-
-       pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
-       base &= ~PAGE_CACHE_MASK;
-       count += base;
-       for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
-               SetPageError(*pages);
-       if (count != 0)
-               SetPageError(*pages);
-}
-
 /*
  * This is the callback from RPC telling us whether a reply was
  * received or some error occurred (timeout or socket shutdown).
@@ -523,19 +554,20 @@ static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
 {
        struct nfs_read_data *data = calldata;
 
+       if (nfs_readpage_result(task, data) != 0)
+               return;
        /*
-        * Note: nfs_readpage_result may change the values of
+        * Note: nfs_readpage_retry may change the values of
         * data->args. In the multi-page case, we therefore need
-        * to ensure that we call the next nfs_readpage_set_page_uptodate()
-        * first in the multi-page case.
+        * to ensure that we call nfs_readpage_set_pages_uptodate()
+        * first.
         */
        if (likely(task->tk_status >= 0)) {
                nfs_readpage_truncate_uninitialised_page(data);
                nfs_readpage_set_pages_uptodate(data);
-       } else
-               nfs_readpage_set_pages_error(data);
-       if (nfs_readpage_result(task, data) != 0)
-               return;
+               if (nfs_readpage_retry(task, data) != 0)
+                       return;
+       }
        while (!list_empty(&data->pages)) {
                struct nfs_page *req = nfs_list_entry(data->pages.next);
 
@@ -549,50 +581,6 @@ static const struct rpc_call_ops nfs_read_full_ops = {
        .rpc_release = nfs_readdata_release,
 };
 
-/*
- * This is the callback from RPC telling us whether a reply was
- * received or some error occurred (timeout or socket shutdown).
- */
-int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
-{
-       struct nfs_readargs *argp = &data->args;
-       struct nfs_readres *resp = &data->res;
-       int status;
-
-       dprintk("NFS: %4d nfs_readpage_result, (status %d)\n",
-               task->tk_pid, task->tk_status);
-
-       status = NFS_PROTO(data->inode)->read_done(task, data);
-       if (status != 0)
-               return status;
-
-       nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, resp->count);
-
-       if (task->tk_status < 0) {
-               if (task->tk_status == -ESTALE) {
-                       set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
-                       nfs_mark_for_revalidate(data->inode);
-               }
-       } else if (resp->count < argp->count && !resp->eof) {
-               /* This is a short read! */
-               nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
-               /* Has the server at least made some progress? */
-               if (resp->count != 0) {
-                       /* Yes, so retry the read at the end of the data */
-                       argp->offset += resp->count;
-                       argp->pgbase += resp->count;
-                       argp->count -= resp->count;
-                       rpc_restart_call(task);
-                       return -EAGAIN;
-               }
-               task->tk_status = -EIO;
-       }
-       spin_lock(&data->inode->i_lock);
-       NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
-       spin_unlock(&data->inode->i_lock);
-       return 0;
-}
-
 /*
  * Read a page over NFS.
  * We read the page synchronously in the following case:
@@ -626,9 +614,10 @@ int nfs_readpage(struct file *file, struct page *page)
                goto out_error;
 
        if (file == NULL) {
+               error = -EBADF;
                ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
                if (ctx == NULL)
-                       return -EBADF;
+                       goto out_error;
        } else
                ctx = get_nfs_open_context((struct nfs_open_context *)
                                file->private_data);
@@ -663,7 +652,7 @@ readpage_async_filler(void *data, struct page *page)
        unsigned int len;
 
        nfs_wb_page(inode, page);
-       len = nfs_page_length(inode, page);
+       len = nfs_page_length(page);
        if (len == 0)
                return nfs_return_empty_page(page);
        new = nfs_create_request(desc->ctx, inode, page, 0, len);
index 600bbe630abd99cdc6e8f9074984495630c9adc1..6c686112cc03fa3c3dcea70180653640d95aaeb9 100644 (file)
@@ -33,9 +33,7 @@ static int nfs_symlink_filler(struct inode *inode, struct page *page)
 {
        int error;
 
-       lock_kernel();
        error = NFS_PROTO(inode)->readlink(inode, page, 0, PAGE_SIZE);
-       unlock_kernel();
        if (error < 0)
                goto error;
        SetPageUptodate(page);
index 41b07288f99e89f4d6e189dc010538ba2e5dd1a5..594eb16879ef744fb50226c5272f26c45f7a596f 100644 (file)
@@ -63,6 +63,7 @@
 #include <linux/smp_lock.h>
 
 #include "delegation.h"
+#include "internal.h"
 #include "iostat.h"
 
 #define NFSDBG_FACILITY                NFSDBG_PAGECACHE
  * Local function declarations
  */
 static struct nfs_page * nfs_update_request(struct nfs_open_context*,
-                                           struct inode *,
                                            struct page *,
                                            unsigned int, unsigned int);
+static void nfs_mark_request_dirty(struct nfs_page *req);
 static int nfs_wait_on_write_congestion(struct address_space *, int);
 static int nfs_wait_on_requests(struct inode *, unsigned long, unsigned int);
-static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
-                          unsigned int npages, int how);
+static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how);
 static const struct rpc_call_ops nfs_write_partial_ops;
 static const struct rpc_call_ops nfs_write_full_ops;
 static const struct rpc_call_ops nfs_commit_ops;
@@ -102,13 +102,19 @@ struct nfs_write_data *nfs_commit_alloc(void)
        return p;
 }
 
-void nfs_commit_free(struct nfs_write_data *p)
+void nfs_commit_rcu_free(struct rcu_head *head)
 {
+       struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_commit_mempool);
 }
 
+void nfs_commit_free(struct nfs_write_data *wdata)
+{
+       call_rcu_bh(&wdata->task.u.tk_rcu, nfs_commit_rcu_free);
+}
+
 struct nfs_write_data *nfs_writedata_alloc(size_t len)
 {
        unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -131,18 +137,47 @@ struct nfs_write_data *nfs_writedata_alloc(size_t len)
        return p;
 }
 
-static void nfs_writedata_free(struct nfs_write_data *p)
+static void nfs_writedata_rcu_free(struct rcu_head *head)
 {
+       struct nfs_write_data *p = container_of(head, struct nfs_write_data, task.u.tk_rcu);
        if (p && (p->pagevec != &p->page_array[0]))
                kfree(p->pagevec);
        mempool_free(p, nfs_wdata_mempool);
 }
 
+static void nfs_writedata_free(struct nfs_write_data *wdata)
+{
+       call_rcu_bh(&wdata->task.u.tk_rcu, nfs_writedata_rcu_free);
+}
+
 void nfs_writedata_release(void *wdata)
 {
        nfs_writedata_free(wdata);
 }
 
+static struct nfs_page *nfs_page_find_request_locked(struct page *page)
+{
+       struct nfs_page *req = NULL;
+
+       if (PagePrivate(page)) {
+               req = (struct nfs_page *)page_private(page);
+               if (req != NULL)
+                       atomic_inc(&req->wb_count);
+       }
+       return req;
+}
+
+static struct nfs_page *nfs_page_find_request(struct page *page)
+{
+       struct nfs_page *req = NULL;
+       spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+
+       spin_lock(req_lock);
+       req = nfs_page_find_request_locked(page);
+       spin_unlock(req_lock);
+       return req;
+}
+
 /* Adjust the file length if we're writing beyond the end */
 static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int count)
 {
@@ -164,113 +199,34 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
  */
 static void nfs_mark_uptodate(struct page *page, unsigned int base, unsigned int count)
 {
-       loff_t end_offs;
-
        if (PageUptodate(page))
                return;
        if (base != 0)
                return;
-       if (count == PAGE_CACHE_SIZE) {
-               SetPageUptodate(page);
-               return;
-       }
-
-       end_offs = i_size_read(page->mapping->host) - 1;
-       if (end_offs < 0)
+       if (count != nfs_page_length(page))
                return;
-       /* Is this the last page? */
-       if (page->index != (unsigned long)(end_offs >> PAGE_CACHE_SHIFT))
-               return;
-       /* This is the last page: set PG_uptodate if we cover the entire
-        * extent of the data, then zero the rest of the page.
-        */
-       if (count == (unsigned int)(end_offs & (PAGE_CACHE_SIZE - 1)) + 1) {
+       if (count != PAGE_CACHE_SIZE)
                memclear_highpage_flush(page, count, PAGE_CACHE_SIZE - count);
-               SetPageUptodate(page);
-       }
+       SetPageUptodate(page);
 }
 
-/*
- * Write a page synchronously.
- * Offset is the data offset within the page.
- */
-static int nfs_writepage_sync(struct nfs_open_context *ctx, struct inode *inode,
-               struct page *page, unsigned int offset, unsigned int count,
-               int how)
-{
-       unsigned int    wsize = NFS_SERVER(inode)->wsize;
-       int             result, written = 0;
-       struct nfs_write_data *wdata;
-
-       wdata = nfs_writedata_alloc(wsize);
-       if (!wdata)
-               return -ENOMEM;
-
-       wdata->flags = how;
-       wdata->cred = ctx->cred;
-       wdata->inode = inode;
-       wdata->args.fh = NFS_FH(inode);
-       wdata->args.context = ctx;
-       wdata->args.pages = &page;
-       wdata->args.stable = NFS_FILE_SYNC;
-       wdata->args.pgbase = offset;
-       wdata->args.count = wsize;
-       wdata->res.fattr = &wdata->fattr;
-       wdata->res.verf = &wdata->verf;
-
-       dprintk("NFS:      nfs_writepage_sync(%s/%Ld %d@%Ld)\n",
-               inode->i_sb->s_id,
-               (long long)NFS_FILEID(inode),
-               count, (long long)(page_offset(page) + offset));
-
-       set_page_writeback(page);
-       nfs_begin_data_update(inode);
-       do {
-               if (count < wsize)
-                       wdata->args.count = count;
-               wdata->args.offset = page_offset(page) + wdata->args.pgbase;
-
-               result = NFS_PROTO(inode)->write(wdata);
-
-               if (result < 0) {
-                       /* Must mark the page invalid after I/O error */
-                       ClearPageUptodate(page);
-                       goto io_error;
-               }
-               if (result < wdata->args.count)
-                       printk(KERN_WARNING "NFS: short write, count=%u, result=%d\n",
-                                       wdata->args.count, result);
-
-               wdata->args.offset += result;
-               wdata->args.pgbase += result;
-               written += result;
-               count -= result;
-               nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, result);
-       } while (count);
-       /* Update file length */
-       nfs_grow_file(page, offset, written);
-       /* Set the PG_uptodate flag? */
-       nfs_mark_uptodate(page, offset, written);
-
-       if (PageError(page))
-               ClearPageError(page);
-
-io_error:
-       nfs_end_data_update(inode);
-       end_page_writeback(page);
-       nfs_writedata_free(wdata);
-       return written ? written : result;
-}
-
-static int nfs_writepage_async(struct nfs_open_context *ctx,
-               struct inode *inode, struct page *page,
+static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
                unsigned int offset, unsigned int count)
 {
        struct nfs_page *req;
+       int ret;
 
-       req = nfs_update_request(ctx, inode, page, offset, count);
-       if (IS_ERR(req))
-               return PTR_ERR(req);
+       for (;;) {
+               req = nfs_update_request(ctx, page, offset, count);
+               if (!IS_ERR(req))
+                       break;
+               ret = PTR_ERR(req);
+               if (ret != -EBUSY)
+                       return ret;
+               ret = nfs_wb_page(page->mapping->host, page);
+               if (ret != 0)
+                       return ret;
+       }
        /* Update file length */
        nfs_grow_file(page, offset, count);
        /* Set the PG_uptodate flag? */
@@ -288,74 +244,95 @@ static int wb_priority(struct writeback_control *wbc)
        return 0;
 }
 
+/*
+ * Find an associated nfs write request, and prepare to flush it out
+ * Returns 1 if there was no write request, or if the request was
+ * already tagged by nfs_set_page_dirty.Returns 0 if the request
+ * was not tagged.
+ * May also return an error if the user signalled nfs_wait_on_request().
+ */
+static int nfs_page_mark_flush(struct page *page)
+{
+       struct nfs_page *req;
+       spinlock_t *req_lock = &NFS_I(page->mapping->host)->req_lock;
+       int ret;
+
+       spin_lock(req_lock);
+       for(;;) {
+               req = nfs_page_find_request_locked(page);
+               if (req == NULL) {
+                       spin_unlock(req_lock);
+                       return 1;
+               }
+               if (nfs_lock_request_dontget(req))
+                       break;
+               /* Note: If we hold the page lock, as is the case in nfs_writepage,
+                *       then the call to nfs_lock_request_dontget() will always
+                *       succeed provided that someone hasn't already marked the
+                *       request as dirty (in which case we don't care).
+                */
+               spin_unlock(req_lock);
+               ret = nfs_wait_on_request(req);
+               nfs_release_request(req);
+               if (ret != 0)
+                       return ret;
+               spin_lock(req_lock);
+       }
+       spin_unlock(req_lock);
+       if (test_and_set_bit(PG_FLUSHING, &req->wb_flags) == 0) {
+               nfs_mark_request_dirty(req);
+               set_page_writeback(page);
+       }
+       ret = test_bit(PG_NEED_FLUSH, &req->wb_flags);
+       nfs_unlock_request(req);
+       return ret;
+}
+
 /*
  * Write an mmapped page to the server.
  */
-int nfs_writepage(struct page *page, struct writeback_control *wbc)
+static int nfs_writepage_locked(struct page *page, struct writeback_control *wbc)
 {
        struct nfs_open_context *ctx;
        struct inode *inode = page->mapping->host;
-       unsigned long end_index;
-       unsigned offset = PAGE_CACHE_SIZE;
-       loff_t i_size = i_size_read(inode);
-       int inode_referenced = 0;
-       int priority = wb_priority(wbc);
+       unsigned offset;
        int err;
 
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
        nfs_add_stats(inode, NFSIOS_WRITEPAGES, 1);
 
-       /*
-        * Note: We need to ensure that we have a reference to the inode
-        *       if we are to do asynchronous writes. If not, waiting
-        *       in nfs_wait_on_request() may deadlock with clear_inode().
-        *
-        *       If igrab() fails here, then it is in any case safe to
-        *       call nfs_wb_page(), since there will be no pending writes.
-        */
-       if (igrab(inode) != 0)
-               inode_referenced = 1;
-       end_index = i_size >> PAGE_CACHE_SHIFT;
-
-       /* Ensure we've flushed out any previous writes */
-       nfs_wb_page_priority(inode, page, priority);
-
-       /* easy case */
-       if (page->index < end_index)
-               goto do_it;
-       /* things got complicated... */
-       offset = i_size & (PAGE_CACHE_SIZE-1);
-
-       /* OK, are we completely out? */
-       err = 0; /* potential race with truncate - ignore */
-       if (page->index >= end_index+1 || !offset)
+       err = nfs_page_mark_flush(page);
+       if (err <= 0)
+               goto out;
+       err = 0;
+       offset = nfs_page_length(page);
+       if (!offset)
                goto out;
-do_it:
+
        ctx = nfs_find_open_context(inode, NULL, FMODE_WRITE);
        if (ctx == NULL) {
                err = -EBADF;
                goto out;
        }
-       lock_kernel();
-       if (!IS_SYNC(inode) && inode_referenced) {
-               err = nfs_writepage_async(ctx, inode, page, 0, offset);
-               if (!wbc->for_writepages)
-                       nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
-       } else {
-               err = nfs_writepage_sync(ctx, inode, page, 0,
-                                               offset, priority);
-               if (err >= 0) {
-                       if (err != offset)
-                               redirty_page_for_writepage(wbc, page);
-                       err = 0;
-               }
-       }
-       unlock_kernel();
+       err = nfs_writepage_setup(ctx, page, 0, offset);
        put_nfs_open_context(ctx);
+       if (err != 0)
+               goto out;
+       err = nfs_page_mark_flush(page);
+       if (err > 0)
+               err = 0;
 out:
+       if (!wbc->for_writepages)
+               nfs_flush_mapping(page->mapping, wbc, wb_priority(wbc));
+       return err;
+}
+
+int nfs_writepage(struct page *page, struct writeback_control *wbc)
+{
+       int err;
+
+       err = nfs_writepage_locked(page, wbc);
        unlock_page(page);
-       if (inode_referenced)
-               iput(inode);
        return err; 
 }
 
@@ -379,21 +356,18 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
                        return 0;
                nfs_wait_on_write_congestion(mapping, 0);
        }
-       err = nfs_flush_inode(inode, 0, 0, wb_priority(wbc));
+       err = nfs_flush_mapping(mapping, wbc, wb_priority(wbc));
        if (err < 0)
                goto out;
        nfs_add_stats(inode, NFSIOS_WRITEPAGES, err);
-       wbc->nr_to_write -= err;
        if (!wbc->nonblocking && wbc->sync_mode == WB_SYNC_ALL) {
                err = nfs_wait_on_requests(inode, 0, 0);
                if (err < 0)
                        goto out;
        }
        err = nfs_commit_inode(inode, wb_priority(wbc));
-       if (err > 0) {
-               wbc->nr_to_write -= err;
+       if (err > 0)
                err = 0;
-       }
 out:
        clear_bit(BDI_write_congested, &bdi->state);
        wake_up_all(&nfs_write_congestion);
@@ -420,6 +394,7 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
                        nfsi->change_attr++;
        }
        SetPagePrivate(req->wb_page);
+       set_page_private(req->wb_page, (unsigned long)req);
        nfsi->npages++;
        atomic_inc(&req->wb_count);
        return 0;
@@ -436,6 +411,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
        BUG_ON (!NFS_WBACK_BUSY(req));
 
        spin_lock(&nfsi->req_lock);
+       set_page_private(req->wb_page, 0);
        ClearPagePrivate(req->wb_page);
        radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
        nfsi->npages--;
@@ -449,33 +425,6 @@ static void nfs_inode_remove_request(struct nfs_page *req)
        nfs_release_request(req);
 }
 
-/*
- * Find a request
- */
-static inline struct nfs_page *
-_nfs_find_request(struct inode *inode, unsigned long index)
-{
-       struct nfs_inode *nfsi = NFS_I(inode);
-       struct nfs_page *req;
-
-       req = (struct nfs_page*)radix_tree_lookup(&nfsi->nfs_page_tree, index);
-       if (req)
-               atomic_inc(&req->wb_count);
-       return req;
-}
-
-static struct nfs_page *
-nfs_find_request(struct inode *inode, unsigned long index)
-{
-       struct nfs_page         *req;
-       struct nfs_inode        *nfsi = NFS_I(inode);
-
-       spin_lock(&nfsi->req_lock);
-       req = _nfs_find_request(inode, index);
-       spin_unlock(&nfsi->req_lock);
-       return req;
-}
-
 /*
  * Add a request to the inode's dirty list.
  */
@@ -491,8 +440,14 @@ nfs_mark_request_dirty(struct nfs_page *req)
        nfs_list_add_request(req, &nfsi->dirty);
        nfsi->ndirty++;
        spin_unlock(&nfsi->req_lock);
-       inc_zone_page_state(req->wb_page, NR_FILE_DIRTY);
-       mark_inode_dirty(inode);
+       __mark_inode_dirty(inode, I_DIRTY_PAGES);
+}
+
+static void
+nfs_redirty_request(struct nfs_page *req)
+{
+       clear_bit(PG_FLUSHING, &req->wb_flags);
+       __set_page_dirty_nobuffers(req->wb_page);
 }
 
 /*
@@ -501,8 +456,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
 static inline int
 nfs_dirty_request(struct nfs_page *req)
 {
-       struct nfs_inode *nfsi = NFS_I(req->wb_context->dentry->d_inode);
-       return !list_empty(&req->wb_list) && req->wb_list_head == &nfsi->dirty;
+       return test_bit(PG_FLUSHING, &req->wb_flags) == 0;
 }
 
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -520,7 +474,7 @@ nfs_mark_request_commit(struct nfs_page *req)
        nfsi->ncommit++;
        spin_unlock(&nfsi->req_lock);
        inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
-       mark_inode_dirty(inode);
+       __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 }
 #endif
 
@@ -597,31 +551,6 @@ static void nfs_cancel_commit_list(struct list_head *head)
        }
 }
 
-/*
- * nfs_scan_dirty - Scan an inode for dirty requests
- * @inode: NFS inode to scan
- * @dst: destination list
- * @idx_start: lower bound of page->index to scan.
- * @npages: idx_start + npages sets the upper bound to scan.
- *
- * Moves requests from the inode's dirty page list.
- * The requests are *not* checked to ensure that they form a contiguous set.
- */
-static int
-nfs_scan_dirty(struct inode *inode, struct list_head *dst, unsigned long idx_start, unsigned int npages)
-{
-       struct nfs_inode *nfsi = NFS_I(inode);
-       int res = 0;
-
-       if (nfsi->ndirty != 0) {
-               res = nfs_scan_lock_dirty(nfsi, dst, idx_start, npages);
-               nfsi->ndirty -= res;
-               if ((nfsi->ndirty == 0) != list_empty(&nfsi->dirty))
-                       printk(KERN_ERR "NFS: desynchronized value of nfs_i.ndirty.\n");
-       }
-       return res;
-}
-
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 /*
  * nfs_scan_commit - Scan an inode for commit requests
@@ -698,27 +627,27 @@ static int nfs_wait_on_write_congestion(struct address_space *mapping, int intr)
  * Note: Should always be called with the Page Lock held!
  */
 static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
-               struct inode *inode, struct page *page,
-               unsigned int offset, unsigned int bytes)
+               struct page *page, unsigned int offset, unsigned int bytes)
 {
-       struct nfs_server *server = NFS_SERVER(inode);
+       struct inode *inode = page->mapping->host;
        struct nfs_inode *nfsi = NFS_I(inode);
        struct nfs_page         *req, *new = NULL;
        unsigned long           rqend, end;
 
        end = offset + bytes;
 
-       if (nfs_wait_on_write_congestion(page->mapping, server->flags & NFS_MOUNT_INTR))
+       if (nfs_wait_on_write_congestion(page->mapping, NFS_SERVER(inode)->flags & NFS_MOUNT_INTR))
                return ERR_PTR(-ERESTARTSYS);
        for (;;) {
                /* Loop over all inode entries and see if we find
                 * A request for the page we wish to update
                 */
                spin_lock(&nfsi->req_lock);
-               req = _nfs_find_request(inode, page->index);
+               req = nfs_page_find_request_locked(page);
                if (req) {
                        if (!nfs_lock_request_dontget(req)) {
                                int error;
+
                                spin_unlock(&nfsi->req_lock);
                                error = nfs_wait_on_request(req);
                                nfs_release_request(req);
@@ -745,7 +674,6 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
                                return ERR_PTR(error);
                        }
                        spin_unlock(&nfsi->req_lock);
-                       nfs_mark_request_dirty(new);
                        return new;
                }
                spin_unlock(&nfsi->req_lock);
@@ -786,9 +714,8 @@ static struct nfs_page * nfs_update_request(struct nfs_open_context* ctx,
 int nfs_flush_incompatible(struct file *file, struct page *page)
 {
        struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
-       struct inode    *inode = page->mapping->host;
        struct nfs_page *req;
-       int             status = 0;
+       int do_flush, status;
        /*
         * Look for a request corresponding to this page. If there
         * is one, and it belongs to another file, we flush it out
@@ -797,13 +724,18 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
         * Also do the same if we find a request from an existing
         * dropped page.
         */
-       req = nfs_find_request(inode, page->index);
-       if (req) {
-               if (req->wb_page != page || ctx != req->wb_context)
-                       status = nfs_wb_page(inode, page);
+       do {
+               req = nfs_page_find_request(page);
+               if (req == NULL)
+                       return 0;
+               do_flush = req->wb_page != page || req->wb_context != ctx
+                       || !nfs_dirty_request(req);
                nfs_release_request(req);
-       }
-       return (status < 0) ? status : 0;
+               if (!do_flush)
+                       return 0;
+               status = nfs_wb_page(page->mapping->host, page);
+       } while (status == 0);
+       return status;
 }
 
 /*
@@ -817,7 +749,6 @@ int nfs_updatepage(struct file *file, struct page *page,
 {
        struct nfs_open_context *ctx = (struct nfs_open_context *)file->private_data;
        struct inode    *inode = page->mapping->host;
-       struct nfs_page *req;
        int             status = 0;
 
        nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -827,62 +758,18 @@ int nfs_updatepage(struct file *file, struct page *page,
                file->f_dentry->d_name.name, count,
                (long long)(page_offset(page) +offset));
 
-       if (IS_SYNC(inode)) {
-               status = nfs_writepage_sync(ctx, inode, page, offset, count, 0);
-               if (status > 0) {
-                       if (offset == 0 && status == PAGE_CACHE_SIZE)
-                               SetPageUptodate(page);
-                       return 0;
-               }
-               return status;
-       }
-
        /* If we're not using byte range locks, and we know the page
         * is entirely in cache, it may be more efficient to avoid
         * fragmenting write requests.
         */
        if (PageUptodate(page) && inode->i_flock == NULL && !(file->f_mode & O_SYNC)) {
-               loff_t end_offs = i_size_read(inode) - 1;
-               unsigned long end_index = end_offs >> PAGE_CACHE_SHIFT;
-
-               count += offset;
+               count = max(count + offset, nfs_page_length(page));
                offset = 0;
-               if (unlikely(end_offs < 0)) {
-                       /* Do nothing */
-               } else if (page->index == end_index) {
-                       unsigned int pglen;
-                       pglen = (unsigned int)(end_offs & (PAGE_CACHE_SIZE-1)) + 1;
-                       if (count < pglen)
-                               count = pglen;
-               } else if (page->index < end_index)
-                       count = PAGE_CACHE_SIZE;
        }
 
-       /*
-        * Try to find an NFS request corresponding to this page
-        * and update it.
-        * If the existing request cannot be updated, we must flush
-        * it out now.
-        */
-       do {
-               req = nfs_update_request(ctx, inode, page, offset, count);
-               status = (IS_ERR(req)) ? PTR_ERR(req) : 0;
-               if (status != -EBUSY)
-                       break;
-               /* Request could not be updated. Flush it out and try again */
-               status = nfs_wb_page(inode, page);
-       } while (status >= 0);
-       if (status < 0)
-               goto done;
-
-       status = 0;
+       status = nfs_writepage_setup(ctx, page, offset, count);
+       __set_page_dirty_nobuffers(page);
 
-       /* Update file length */
-       nfs_grow_file(page, offset, count);
-       /* Set the PG_uptodate flag? */
-       nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
-       nfs_unlock_request(req);
-done:
         dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
                        status, (long long)i_size_read(inode));
        if (status < 0)
@@ -897,7 +784,7 @@ static void nfs_writepage_release(struct nfs_page *req)
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
        if (!PageError(req->wb_page)) {
                if (NFS_NEED_RESCHED(req)) {
-                       nfs_mark_request_dirty(req);
+                       nfs_redirty_request(req);
                        goto out;
                } else if (NFS_NEED_COMMIT(req)) {
                        nfs_mark_request_commit(req);
@@ -979,9 +866,7 @@ static void nfs_execute_write(struct nfs_write_data *data)
        sigset_t oldset;
 
        rpc_clnt_sigmask(clnt, &oldset);
-       lock_kernel();
        rpc_execute(&data->task);
-       unlock_kernel();
        rpc_clnt_sigunmask(clnt, &oldset);
 }
 
@@ -1015,7 +900,6 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, int how)
        atomic_set(&req->wb_complete, requests);
 
        ClearPageError(page);
-       set_page_writeback(page);
        offset = 0;
        nbytes = req->wb_bytes;
        do {
@@ -1043,9 +927,9 @@ out_bad:
        while (!list_empty(&list)) {
                data = list_entry(list.next, struct nfs_write_data, pages);
                list_del(&data->pages);
-               nfs_writedata_free(data);
+               nfs_writedata_release(data);
        }
-       nfs_mark_request_dirty(req);
+       nfs_redirty_request(req);
        nfs_clear_page_writeback(req);
        return -ENOMEM;
 }
@@ -1076,7 +960,6 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
                nfs_list_remove_request(req);
                nfs_list_add_request(req, &data->pages);
                ClearPageError(req->wb_page);
-               set_page_writeback(req->wb_page);
                *pages++ = req->wb_page;
                count += req->wb_bytes;
        }
@@ -1091,7 +974,7 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, int how)
        while (!list_empty(head)) {
                struct nfs_page *req = nfs_list_entry(head->next);
                nfs_list_remove_request(req);
-               nfs_mark_request_dirty(req);
+               nfs_redirty_request(req);
                nfs_clear_page_writeback(req);
        }
        return -ENOMEM;
@@ -1126,7 +1009,7 @@ out_err:
        while (!list_empty(head)) {
                req = nfs_list_entry(head->next);
                nfs_list_remove_request(req);
-               nfs_mark_request_dirty(req);
+               nfs_redirty_request(req);
                nfs_clear_page_writeback(req);
        }
        return error;
@@ -1442,7 +1325,7 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
                }
                /* We have a mismatch. Write the page again */
                dprintk(" mismatch\n");
-               nfs_mark_request_dirty(req);
+               nfs_redirty_request(req);
        next:
                nfs_clear_page_writeback(req);
        }
@@ -1459,18 +1342,17 @@ static inline int nfs_commit_list(struct inode *inode, struct list_head *head, i
 }
 #endif
 
-static int nfs_flush_inode(struct inode *inode, unsigned long idx_start,
-                          unsigned int npages, int how)
+static long nfs_flush_mapping(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
-       struct nfs_inode *nfsi = NFS_I(inode);
+       struct nfs_inode *nfsi = NFS_I(mapping->host);
        LIST_HEAD(head);
-       int res;
+       long res;
 
        spin_lock(&nfsi->req_lock);
-       res = nfs_scan_dirty(inode, &head, idx_start, npages);
+       res = nfs_scan_dirty(mapping, wbc, &head);
        spin_unlock(&nfsi->req_lock);
        if (res) {
-               int error = nfs_flush_list(inode, &head, res, how);
+               int error = nfs_flush_list(mapping->host, &head, res, how);
                if (error < 0)
                        return error;
        }
@@ -1496,38 +1378,62 @@ int nfs_commit_inode(struct inode *inode, int how)
 }
 #endif
 
-int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
-               unsigned int npages, int how)
+long nfs_sync_mapping_wait(struct address_space *mapping, struct writeback_control *wbc, int how)
 {
+       struct inode *inode = mapping->host;
        struct nfs_inode *nfsi = NFS_I(inode);
+       unsigned long idx_start, idx_end;
+       unsigned int npages = 0;
        LIST_HEAD(head);
        int nocommit = how & FLUSH_NOCOMMIT;
-       int pages, ret;
-
+       long pages, ret;
+
+       /* FIXME */
+       if (wbc->range_cyclic)
+               idx_start = 0;
+       else {
+               idx_start = wbc->range_start >> PAGE_CACHE_SHIFT;
+               idx_end = wbc->range_end >> PAGE_CACHE_SHIFT;
+               if (idx_end > idx_start) {
+                       unsigned long l_npages = 1 + idx_end - idx_start;
+                       npages = l_npages;
+                       if (sizeof(npages) != sizeof(l_npages) &&
+                                       (unsigned long)npages != l_npages)
+                               npages = 0;
+               }
+       }
        how &= ~FLUSH_NOCOMMIT;
        spin_lock(&nfsi->req_lock);
        do {
+               wbc->pages_skipped = 0;
                ret = nfs_wait_on_requests_locked(inode, idx_start, npages);
                if (ret != 0)
                        continue;
-               pages = nfs_scan_dirty(inode, &head, idx_start, npages);
+               pages = nfs_scan_dirty(mapping, wbc, &head);
                if (pages != 0) {
                        spin_unlock(&nfsi->req_lock);
-                       if (how & FLUSH_INVALIDATE)
+                       if (how & FLUSH_INVALIDATE) {
                                nfs_cancel_dirty_list(&head);
-                       else
+                               ret = pages;
+                       } else
                                ret = nfs_flush_list(inode, &head, pages, how);
                        spin_lock(&nfsi->req_lock);
                        continue;
                }
+               if (wbc->pages_skipped != 0)
+                       continue;
                if (nocommit)
                        break;
                pages = nfs_scan_commit(inode, &head, idx_start, npages);
-               if (pages == 0)
+               if (pages == 0) {
+                       if (wbc->pages_skipped != 0)
+                               continue;
                        break;
+               }
                if (how & FLUSH_INVALIDATE) {
                        spin_unlock(&nfsi->req_lock);
                        nfs_cancel_commit_list(&head);
+                       ret = pages;
                        spin_lock(&nfsi->req_lock);
                        continue;
                }
@@ -1540,6 +1446,106 @@ int nfs_sync_inode_wait(struct inode *inode, unsigned long idx_start,
        return ret;
 }
 
+/*
+ * flush the inode to disk.
+ */
+int nfs_wb_all(struct inode *inode)
+{
+       struct address_space *mapping = inode->i_mapping;
+       struct writeback_control wbc = {
+               .bdi = mapping->backing_dev_info,
+               .sync_mode = WB_SYNC_ALL,
+               .nr_to_write = LONG_MAX,
+               .for_writepages = 1,
+               .range_cyclic = 1,
+       };
+       int ret;
+
+       ret = generic_writepages(mapping, &wbc);
+       if (ret < 0)
+               goto out;
+       ret = nfs_sync_mapping_wait(mapping, &wbc, 0);
+       if (ret >= 0)
+               return 0;
+out:
+       __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+       return ret;
+}
+
+int nfs_sync_mapping_range(struct address_space *mapping, loff_t range_start, loff_t range_end, int how)
+{
+       struct writeback_control wbc = {
+               .bdi = mapping->backing_dev_info,
+               .sync_mode = WB_SYNC_ALL,
+               .nr_to_write = LONG_MAX,
+               .range_start = range_start,
+               .range_end = range_end,
+               .for_writepages = 1,
+       };
+       int ret;
+
+       if (!(how & FLUSH_NOWRITEPAGE)) {
+               ret = generic_writepages(mapping, &wbc);
+               if (ret < 0)
+                       goto out;
+       }
+       ret = nfs_sync_mapping_wait(mapping, &wbc, how);
+       if (ret >= 0)
+               return 0;
+out:
+       __mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
+       return ret;
+}
+
+int nfs_wb_page_priority(struct inode *inode, struct page *page, int how)
+{
+       loff_t range_start = page_offset(page);
+       loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+       struct writeback_control wbc = {
+               .bdi = page->mapping->backing_dev_info,
+               .sync_mode = WB_SYNC_ALL,
+               .nr_to_write = LONG_MAX,
+               .range_start = range_start,
+               .range_end = range_end,
+       };
+       int ret;
+
+       BUG_ON(!PageLocked(page));
+       if (!(how & FLUSH_NOWRITEPAGE) && clear_page_dirty_for_io(page)) {
+               ret = nfs_writepage_locked(page, &wbc);
+               if (ret < 0)
+                       goto out;
+       }
+       ret = nfs_sync_mapping_wait(page->mapping, &wbc, how);
+       if (ret >= 0)
+               return 0;
+out:
+       __mark_inode_dirty(inode, I_DIRTY_PAGES);
+       return ret;
+}
+
+/*
+ * Write back all requests on one page - we do this before reading it.
+ */
+int nfs_wb_page(struct inode *inode, struct page* page)
+{
+       return nfs_wb_page_priority(inode, page, FLUSH_STABLE);
+}
+
+int nfs_set_page_dirty(struct page *page)
+{
+       struct nfs_page *req;
+
+       req = nfs_page_find_request(page);
+       if (req != NULL) {
+               /* Mark any existing write requests for flushing */
+               set_bit(PG_NEED_FLUSH, &req->wb_flags);
+               nfs_release_request(req);
+       }
+       return __set_page_dirty_nobuffers(page);
+}
+
+
 int __init nfs_init_writepagecache(void)
 {
        nfs_wdata_cachep = kmem_cache_create("nfs_write_data",
index 625ffea98561e4ca340636c1e5610ed66931d5a1..04963063e6200023dd0524f023b30f4973317df1 100644 (file)
@@ -33,6 +33,7 @@
 #define FLUSH_HIGHPRI          16      /* high priority memory reclaim flush */
 #define FLUSH_NOCOMMIT         32      /* Don't send the NFSv3/v4 COMMIT */
 #define FLUSH_INVALIDATE       64      /* Invalidate the page cache */
+#define FLUSH_NOWRITEPAGE      128     /* Don't call writepage() */
 
 #ifdef __KERNEL__
 
@@ -427,19 +428,21 @@ extern int  nfs_flush_incompatible(struct file *file, struct page *page);
 extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned int);
 extern int nfs_writeback_done(struct rpc_task *, struct nfs_write_data *);
 extern void nfs_writedata_release(void *);
-
-#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
-struct nfs_write_data *nfs_commit_alloc(void);
-void nfs_commit_free(struct nfs_write_data *p);
-#endif
+extern int nfs_set_page_dirty(struct page *);
 
 /*
  * Try to write back everything synchronously (but check the
  * return value!)
  */
-extern int  nfs_sync_inode_wait(struct inode *, unsigned long, unsigned int, int);
+extern long nfs_sync_mapping_wait(struct address_space *, struct writeback_control *, int);
+extern int nfs_sync_mapping_range(struct address_space *, loff_t, loff_t, int);
+extern int nfs_wb_all(struct inode *inode);
+extern int nfs_wb_page(struct inode *inode, struct page* page);
+extern int nfs_wb_page_priority(struct inode *inode, struct page* page, int how);
 #if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
+extern struct nfs_write_data *nfs_commit_alloc(void);
+extern void nfs_commit_free(struct nfs_write_data *wdata);
 extern void nfs_commit_release(void *wdata);
 #else
 static inline int
@@ -455,28 +458,6 @@ nfs_have_writebacks(struct inode *inode)
        return NFS_I(inode)->npages != 0;
 }
 
-static inline int
-nfs_wb_all(struct inode *inode)
-{
-       int error = nfs_sync_inode_wait(inode, 0, 0, 0);
-       return (error < 0) ? error : 0;
-}
-
-/*
- * Write back all requests on one page - we do this before reading it.
- */
-static inline int nfs_wb_page_priority(struct inode *inode, struct page* page, int how)
-{
-       int error = nfs_sync_inode_wait(inode, page->index, 1,
-                       how | FLUSH_STABLE);
-       return (error < 0) ? error : 0;
-}
-
-static inline int nfs_wb_page(struct inode *inode, struct page* page)
-{
-       return nfs_wb_page_priority(inode, page, 0);
-}
-
 /*
  * Allocate nfs_write_data structures
  */
index 1f7bd287c230e5cc78cc2cd079b39d77f3768c99..2e555d49c9b732fa4379b8b1c17a98cb430ce857 100644 (file)
@@ -30,6 +30,8 @@
 #define PG_BUSY                        0
 #define PG_NEED_COMMIT         1
 #define PG_NEED_RESCHED                2
+#define PG_NEED_FLUSH          3
+#define PG_FLUSHING            4
 
 struct nfs_inode;
 struct nfs_page {
@@ -60,8 +62,9 @@ extern        void nfs_clear_request(struct nfs_page *req);
 extern void nfs_release_request(struct nfs_page *req);
 
 
-extern  int nfs_scan_lock_dirty(struct nfs_inode *nfsi, struct list_head *dst,
-                               unsigned long idx_start, unsigned int npages);
+extern long nfs_scan_dirty(struct address_space *mapping,
+                               struct writeback_control *wbc,
+                               struct list_head *dst);
 extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *head, struct list_head *dst,
                          unsigned long idx_start, unsigned int npages);
 extern int nfs_coalesce_requests(struct list_head *, struct list_head *,
index 768c1ad5ff6f93e1859ca7771fb20fbf36c4faf2..9ee9da5e1cc9de834a4e81c9155b3e9e6b0f57ca 100644 (file)
@@ -785,8 +785,6 @@ struct nfs_rpc_ops {
        int     (*readlink)(struct inode *, struct page *, unsigned int,
                            unsigned int);
        int     (*read)    (struct nfs_read_data *);
-       int     (*write)   (struct nfs_write_data *);
-       int     (*commit)  (struct nfs_write_data *);
        int     (*create)  (struct inode *, struct dentry *,
                            struct iattr *, int, struct nameidata *);
        int     (*remove)  (struct inode *, struct qstr *);
index 97b62e97dd8d3fcaef89e0332005d97abd397c5c..2db2fbf349472e3760a003f6d046e35687658ed6 100644 (file)
@@ -90,8 +90,6 @@ struct gss_cred {
 #define gc_flags               gc_base.cr_flags
 #define gc_expire              gc_base.cr_expire
 
-void print_hexl(u32 *p, u_int length, u_int offset);
-
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_AUTH_GSS_H */
 
index f6d1d646ce05c1039437ff7af52155df9afbe353..a1be89deb3af7cb0f5d592b5bbfcdb82e216ccd0 100644 (file)
@@ -53,6 +53,7 @@ struct rpc_clnt {
        struct dentry *         cl_dentry;      /* inode */
        struct rpc_clnt *       cl_parent;      /* Points to parent of clones */
        struct rpc_rtt          cl_rtt_default;
+       struct rpc_program *    cl_program;
        char                    cl_inline_name[32];
 };
 
index e4729aa676547d49926d9fe798204eb7f7910972..60fce3c928570166fdf354eee838adc22dc24d37 100644 (file)
@@ -62,12 +62,6 @@ extern unsigned int          nlm_debug;
 # define RPC_IFDEBUG(x)
 #endif
 
-#ifdef RPC_PROFILE
-# define pprintk(args...)      printk(## args)
-#else
-# define pprintk(args...)      do ; while (0)
-#endif
-
 /*
  * Sysctl interface for RPC debugging
  */
index e30ba201910ae8c24dde51bd01451a6fca6e0614..5a4b1e0206e3a8afff8bf8f7a7fc00051f121ba0 100644 (file)
 
 struct krb5_ctx {
        int                     initiate; /* 1 = initiating, 0 = accepting */
-       int                     seed_init;
-       unsigned char           seed[16];
-       int                     signalg;
-       int                     sealalg;
        struct crypto_blkcipher *enc;
        struct crypto_blkcipher *seq;
        s32                     endtime;
@@ -117,7 +113,7 @@ enum seal_alg {
 #define ENCTYPE_UNKNOWN         0x01ff
 
 s32
-make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
+make_checksum(char *, char *header, int hdrlen, struct xdr_buf *body,
                   int body_offset, struct xdr_netobj *cksum);
 
 u32 gss_get_mic_kerberos(struct gss_ctx *, struct xdr_buf *,
index 2cf3fbb40b4fe1e9617153e86be151ac44c62c0b..e3e6a3437f8b20b63c31bfdc792a16a906ebe2f9 100644 (file)
 #include <linux/sunrpc/gss_asn1.h>
 
 struct spkm3_ctx {
-       struct xdr_netobj       ctx_id; /* per message context id */
-       int                     qop;         /* negotiated qop */
+       struct xdr_netobj       ctx_id;  /* per message context id */
+       int                     endtime; /* endtime of the context */
        struct xdr_netobj       mech_used;
        unsigned int            ret_flags ;
-       unsigned int            req_flags ;
-       struct xdr_netobj       share_key;
-       int                     conf_alg;
-       struct crypto_blkcipher *derived_conf_key;
-       int                     intg_alg;
-       struct crypto_blkcipher *derived_integ_key;
-       int                     keyestb_alg;   /* alg used to get share_key */
-       int                     owf_alg;   /* one way function */
+       struct xdr_netobj       conf_alg;
+       struct xdr_netobj       derived_conf_key;
+       struct xdr_netobj       intg_alg;
+       struct xdr_netobj       derived_integ_key;
 };
 
-/* from openssl/objects.h */
-/* XXX need SEAL_ALG_NONE */
-#define NID_md5                4
-#define NID_dhKeyAgreement     28 
-#define NID_des_cbc            31 
-#define NID_sha1               64
-#define NID_cast5_cbc          108
+/* OIDs declarations for K-ALG, I-ALG, C-ALG, and OWF-ALG */
+extern const struct xdr_netobj hmac_md5_oid;
+extern const struct xdr_netobj cast5_cbc_oid;
 
 /* SPKM InnerContext Token types */
 
@@ -46,11 +38,13 @@ u32 spkm3_make_token(struct spkm3_ctx *ctx, struct xdr_buf * text, struct xdr_ne
 u32 spkm3_read_token(struct spkm3_ctx *ctx, struct xdr_netobj *read_token, struct xdr_buf *message_buffer, int toktype);
 
 #define CKSUMTYPE_RSA_MD5            0x0007
+#define CKSUMTYPE_HMAC_MD5           0x0008
 
-s32 make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
-                   int body_offset, struct xdr_netobj *cksum);
+s32 make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
+               unsigned int hdrlen, struct xdr_buf *body,
+               unsigned int body_offset, struct xdr_netobj *cksum);
 void asn1_bitstring_len(struct xdr_netobj *in, int *enclen, int *zerobits);
-int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen, 
+int decode_asn1_bitstring(struct xdr_netobj *out, char *in, int enclen,
                    int explen);
 void spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, 
                    unsigned char *ctxhdr, int elen, int zbit);
index 0746c3b16f3ab0cfa80c1a32ec009fafae4e2108..97c761652581a722b7872bab49445e37521a8ea7 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/timer.h>
 #include <linux/sunrpc/types.h>
+#include <linux/rcupdate.h>
 #include <linux/spinlock.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
@@ -85,6 +86,7 @@ struct rpc_task {
        union {
                struct work_struct      tk_work;        /* Async task work queue */
                struct rpc_wait         tk_wait;        /* RPC wait */
+               struct rcu_head         tk_rcu;         /* for task deletion */
        } u;
 
        unsigned short          tk_timeouts;    /* maj timeouts */
@@ -178,13 +180,6 @@ struct rpc_call_ops {
        } while (0)
 
 #define RPC_IS_ACTIVATED(t)    (test_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate))
-#define rpc_set_active(t)      (set_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate))
-#define rpc_clear_active(t)    \
-       do { \
-               smp_mb__before_clear_bit(); \
-               clear_bit(RPC_TASK_ACTIVE, &(t)->tk_runstate); \
-               smp_mb__after_clear_bit(); \
-       } while(0)
 
 /*
  * Task priorities.
@@ -254,8 +249,10 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
 void           rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt,
                                int flags, const struct rpc_call_ops *ops,
                                void *data);
+void           rpc_put_task(struct rpc_task *);
 void           rpc_release_task(struct rpc_task *);
 void           rpc_exit_task(struct rpc_task *);
+void           rpc_release_calldata(const struct rpc_call_ops *, void *);
 void           rpc_killall_tasks(struct rpc_clnt *);
 int            rpc_execute(struct rpc_task *);
 void           rpc_init_priority_wait_queue(struct rpc_wait_queue *, const char *);
index 9a527c3643948a266bdcc862c2537e5f277a2f56..9e340fa23c0633f55c6e152d049874c41282f78a 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/uio.h>
 #include <asm/byteorder.h>
+#include <linux/scatterlist.h>
 
 /*
  * Buffer adjustment
@@ -139,29 +140,30 @@ xdr_adjust_iovec(struct kvec *iov, __be32 *p)
  */
 extern void xdr_shift_buf(struct xdr_buf *, size_t);
 extern void xdr_buf_from_iov(struct kvec *, struct xdr_buf *);
-extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, int, int);
-extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, int);
-extern int read_bytes_from_xdr_buf(struct xdr_buf *, int, void *, int);
-extern int write_bytes_to_xdr_buf(struct xdr_buf *, int, void *, int);
+extern int xdr_buf_subsegment(struct xdr_buf *, struct xdr_buf *, unsigned int, unsigned int);
+extern int xdr_buf_read_netobj(struct xdr_buf *, struct xdr_netobj *, unsigned int);
+extern int read_bytes_from_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
+extern int write_bytes_to_xdr_buf(struct xdr_buf *, unsigned int, void *, unsigned int);
 
 /*
  * Helper structure for copying from an sk_buff.
  */
-typedef struct {
+struct xdr_skb_reader {
        struct sk_buff  *skb;
        unsigned int    offset;
        size_t          count;
        __wsum          csum;
-} skb_reader_t;
+};
 
-typedef size_t (*skb_read_actor_t)(skb_reader_t *desc, void *to, size_t len);
+typedef size_t (*xdr_skb_read_actor)(struct xdr_skb_reader *desc, void *to, size_t len);
 
+size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len);
 extern int csum_partial_copy_to_xdr(struct xdr_buf *, struct sk_buff *);
 extern ssize_t xdr_partial_copy_from_skb(struct xdr_buf *, unsigned int,
-               skb_reader_t *, skb_read_actor_t);
+               struct xdr_skb_reader *, xdr_skb_read_actor);
 
-extern int xdr_encode_word(struct xdr_buf *, int, u32);
-extern int xdr_decode_word(struct xdr_buf *, int, u32 *);
+extern int xdr_encode_word(struct xdr_buf *, unsigned int, u32);
+extern int xdr_decode_word(struct xdr_buf *, unsigned int, u32 *);
 
 struct xdr_array2_desc;
 typedef int (*xdr_xcode_elem_t)(struct xdr_array2_desc *desc, void *elem);
@@ -196,6 +198,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32
 extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes);
 extern void xdr_read_pages(struct xdr_stream *xdr, unsigned int len);
 extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len);
+extern int xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len, int (*actor)(struct scatterlist *, void *), void *data);
 
 #endif /* __KERNEL__ */
 
index 3e04c1512fc444f6a8330612165f3d4626aad0a9..f780e72fc417e44b1701edd63d9af1ef271f703e 100644 (file)
@@ -106,7 +106,6 @@ struct rpc_rqst {
 
 struct rpc_xprt_ops {
        void            (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
-       char *          (*print_addr)(struct rpc_xprt *xprt, enum rpc_display_format_t format);
        int             (*reserve_xprt)(struct rpc_task *task);
        void            (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
        void            (*rpcbind)(struct rpc_task *task);
@@ -126,8 +125,6 @@ struct rpc_xprt_ops {
 struct rpc_xprt {
        struct kref             kref;           /* Reference count */
        struct rpc_xprt_ops *   ops;            /* transport methods */
-       struct socket *         sock;           /* BSD socket layer */
-       struct sock *           inet;           /* INET layer */
 
        struct rpc_timeout      timeout;        /* timeout parms */
        struct sockaddr_storage addr;           /* server address */
@@ -137,9 +134,6 @@ struct rpc_xprt {
        unsigned long           cong;           /* current congestion */
        unsigned long           cwnd;           /* congestion window */
 
-       size_t                  rcvsize,        /* transport rcv buffer size */
-                               sndsize;        /* transport send buffer size */
-
        size_t                  max_payload;    /* largest RPC payload size,
                                                   in bytes */
        unsigned int            tsh_size;       /* size of transport specific
@@ -157,28 +151,12 @@ struct rpc_xprt {
        unsigned char           shutdown   : 1, /* being shut down */
                                resvport   : 1; /* use a reserved port */
 
-       /*
-        * XID
-        */
-       __u32                   xid;            /* Next XID value to use */
-
-       /*
-        * State of TCP reply receive stuff
-        */
-       __be32                  tcp_recm,       /* Fragment header */
-                               tcp_xid;        /* Current XID */
-       u32                     tcp_reclen,     /* fragment length */
-                               tcp_offset;     /* fragment offset */
-       unsigned long           tcp_copied,     /* copied to request */
-                               tcp_flags;
        /*
         * Connection of transports
         */
        unsigned long           connect_timeout,
                                bind_timeout,
                                reestablish_timeout;
-       struct delayed_work     connect_worker;
-       unsigned short          port;
 
        /*
         * Disconnection of idle transports
@@ -193,8 +171,8 @@ struct rpc_xprt {
         */
        spinlock_t              transport_lock; /* lock transport info */
        spinlock_t              reserve_lock;   /* lock slot table */
+       u32                     xid;            /* Next XID value to use */
        struct rpc_task *       snd_task;       /* Task blocked in send */
-
        struct list_head        recv;
 
        struct {
@@ -210,18 +188,9 @@ struct rpc_xprt {
                                        bklog_u;        /* backlog queue utilization */
        } stat;
 
-       void                    (*old_data_ready)(struct sock *, int);
-       void                    (*old_state_change)(struct sock *);
-       void                    (*old_write_space)(struct sock *);
-
        char *                  address_strings[RPC_DISPLAY_MAX];
 };
 
-#define XPRT_LAST_FRAG         (1 << 0)
-#define XPRT_COPY_RECM         (1 << 1)
-#define XPRT_COPY_XID          (1 << 2)
-#define XPRT_COPY_DATA         (1 << 3)
-
 #ifdef __KERNEL__
 
 /*
@@ -270,8 +239,8 @@ void                        xprt_disconnect(struct rpc_xprt *xprt);
 /*
  * Socket transport setup operations
  */
-int                    xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to);
-int                    xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to);
+struct rpc_xprt *      xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
+struct rpc_xprt *      xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to);
 
 /*
  * Reserved bit positions in xprt->state
index e5a84a482e57f7a3c644880084cb9afd817d1640..a02ecc1f230dddb839ff85350f094c39b0ad2f28 100644 (file)
@@ -68,7 +68,7 @@ static struct rpc_credops gss_credops;
 #define GSS_CRED_SLACK         1024            /* XXX: unused */
 /* length of a krb5 verifier (48), plus data added before arguments when
  * using integrity (two 4-byte integers): */
-#define GSS_VERF_SLACK         56
+#define GSS_VERF_SLACK         100
 
 /* XXX this define must match the gssd define
 * as it is passed to gssd to signal the use of
@@ -94,46 +94,6 @@ struct gss_auth {
 static void gss_destroy_ctx(struct gss_cl_ctx *);
 static struct rpc_pipe_ops gss_upcall_ops;
 
-void
-print_hexl(u32 *p, u_int length, u_int offset)
-{
-       u_int i, j, jm;
-       u8 c, *cp;
-       
-       dprintk("RPC: print_hexl: length %d\n",length);
-       dprintk("\n");
-       cp = (u8 *) p;
-       
-       for (i = 0; i < length; i += 0x10) {
-               dprintk("  %04x: ", (u_int)(i + offset));
-               jm = length - i;
-               jm = jm > 16 ? 16 : jm;
-               
-               for (j = 0; j < jm; j++) {
-                       if ((j % 2) == 1)
-                               dprintk("%02x ", (u_int)cp[i+j]);
-                       else
-                               dprintk("%02x", (u_int)cp[i+j]);
-               }
-               for (; j < 16; j++) {
-                       if ((j % 2) == 1)
-                               dprintk("   ");
-                       else
-                               dprintk("  ");
-               }
-               dprintk(" ");
-               
-               for (j = 0; j < jm; j++) {
-                       c = cp[i+j];
-                       c = isprint(c) ? c : '.';
-                       dprintk("%c", c);
-               }
-               dprintk("\n");
-       }
-}
-
-EXPORT_SYMBOL(print_hexl);
-
 static inline struct gss_cl_ctx *
 gss_get_ctx(struct gss_cl_ctx *ctx)
 {
index e11a40b25cce68d40a2649cb999793d235fd455a..d926cda8862399de9d73d94c30063004a7c6b3f5 100644 (file)
@@ -43,6 +43,7 @@
 #include <linux/highmem.h>
 #include <linux/pagemap.h>
 #include <linux/sunrpc/gss_krb5.h>
+#include <linux/sunrpc/xdr.h>
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY        RPCDBG_AUTH
@@ -61,9 +62,6 @@ krb5_encrypt(
        u8 local_iv[16] = {0};
        struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
-       dprintk("RPC:      krb5_encrypt: input data:\n");
-       print_hexl((u32 *)in, length, 0);
-
        if (length % crypto_blkcipher_blocksize(tfm) != 0)
                goto out;
 
@@ -80,12 +78,9 @@ krb5_encrypt(
        sg_set_buf(sg, out, length);
 
        ret = crypto_blkcipher_encrypt_iv(&desc, sg, sg, length);
-
-       dprintk("RPC:      krb5_encrypt: output data:\n");
-       print_hexl((u32 *)out, length, 0);
 out:
        dprintk("RPC:      krb5_encrypt returns %d\n",ret);
-       return(ret);
+       return ret;
 }
 
 EXPORT_SYMBOL(krb5_encrypt);
@@ -103,9 +98,6 @@ krb5_decrypt(
        u8 local_iv[16] = {0};
        struct blkcipher_desc desc = { .tfm = tfm, .info = local_iv };
 
-       dprintk("RPC:      krb5_decrypt: input data:\n");
-       print_hexl((u32 *)in, length, 0);
-
        if (length % crypto_blkcipher_blocksize(tfm) != 0)
                goto out;
 
@@ -121,82 +113,13 @@ krb5_decrypt(
        sg_set_buf(sg, out, length);
 
        ret = crypto_blkcipher_decrypt_iv(&desc, sg, sg, length);
-
-       dprintk("RPC:      krb5_decrypt: output_data:\n");
-       print_hexl((u32 *)out, length, 0);
 out:
        dprintk("RPC:      gss_k5decrypt returns %d\n",ret);
-       return(ret);
+       return ret;
 }
 
 EXPORT_SYMBOL(krb5_decrypt);
 
-static int
-process_xdr_buf(struct xdr_buf *buf, int offset, int len,
-               int (*actor)(struct scatterlist *, void *), void *data)
-{
-       int i, page_len, thislen, page_offset, ret = 0;
-       struct scatterlist      sg[1];
-
-       if (offset >= buf->head[0].iov_len) {
-               offset -= buf->head[0].iov_len;
-       } else {
-               thislen = buf->head[0].iov_len - offset;
-               if (thislen > len)
-                       thislen = len;
-               sg_set_buf(sg, buf->head[0].iov_base + offset, thislen);
-               ret = actor(sg, data);
-               if (ret)
-                       goto out;
-               offset = 0;
-               len -= thislen;
-       }
-       if (len == 0)
-               goto out;
-
-       if (offset >= buf->page_len) {
-               offset -= buf->page_len;
-       } else {
-               page_len = buf->page_len - offset;
-               if (page_len > len)
-                       page_len = len;
-               len -= page_len;
-               page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
-               i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
-               thislen = PAGE_CACHE_SIZE - page_offset;
-               do {
-                       if (thislen > page_len)
-                               thislen = page_len;
-                       sg->page = buf->pages[i];
-                       sg->offset = page_offset;
-                       sg->length = thislen;
-                       ret = actor(sg, data);
-                       if (ret)
-                               goto out;
-                       page_len -= thislen;
-                       i++;
-                       page_offset = 0;
-                       thislen = PAGE_CACHE_SIZE;
-               } while (page_len != 0);
-               offset = 0;
-       }
-       if (len == 0)
-               goto out;
-
-       if (offset < buf->tail[0].iov_len) {
-               thislen = buf->tail[0].iov_len - offset;
-               if (thislen > len)
-                       thislen = len;
-               sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen);
-               ret = actor(sg, data);
-               len -= thislen;
-       }
-       if (len != 0)
-               ret = -EINVAL;
-out:
-       return ret;
-}
-
 static int
 checksummer(struct scatterlist *sg, void *data)
 {
@@ -207,23 +130,13 @@ checksummer(struct scatterlist *sg, void *data)
 
 /* checksum the plaintext data and hdrlen bytes of the token header */
 s32
-make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
+make_checksum(char *cksumname, char *header, int hdrlen, struct xdr_buf *body,
                   int body_offset, struct xdr_netobj *cksum)
 {
-       char                            *cksumname;
        struct hash_desc                desc; /* XXX add to ctx? */
        struct scatterlist              sg[1];
        int err;
 
-       switch (cksumtype) {
-               case CKSUMTYPE_RSA_MD5:
-                       cksumname = "md5";
-                       break;
-               default:
-                       dprintk("RPC:      krb5_make_checksum:"
-                               " unsupported checksum %d", cksumtype);
-                       return GSS_S_FAILURE;
-       }
        desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
        if (IS_ERR(desc.tfm))
                return GSS_S_FAILURE;
@@ -237,7 +150,7 @@ make_checksum(s32 cksumtype, char *header, int hdrlen, struct xdr_buf *body,
        err = crypto_hash_update(&desc, sg, hdrlen);
        if (err)
                goto out;
-       err = process_xdr_buf(body, body_offset, body->len - body_offset,
+       err = xdr_process_buf(body, body_offset, body->len - body_offset,
                              checksummer, &desc);
        if (err)
                goto out;
@@ -335,7 +248,7 @@ gss_encrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
        desc.fragno = 0;
        desc.fraglen = 0;
 
-       ret = process_xdr_buf(buf, offset, buf->len - offset, encryptor, &desc);
+       ret = xdr_process_buf(buf, offset, buf->len - offset, encryptor, &desc);
        return ret;
 }
 
@@ -401,7 +314,7 @@ gss_decrypt_xdr_buf(struct crypto_blkcipher *tfm, struct xdr_buf *buf,
        desc.desc.flags = 0;
        desc.fragno = 0;
        desc.fraglen = 0;
-       return process_xdr_buf(buf, offset, buf->len - offset, decryptor, &desc);
+       return xdr_process_buf(buf, offset, buf->len - offset, decryptor, &desc);
 }
 
 EXPORT_SYMBOL(gss_decrypt_xdr_buf);
index 754b8cd6439f20e4157069ff9bbc5197b3e3010f..05d4bee86fc06d32c0ab559a936bfff92b8623ae 100644 (file)
@@ -129,6 +129,7 @@ gss_import_sec_context_kerberos(const void *p,
 {
        const void *end = (const void *)((const char *)p + len);
        struct  krb5_ctx *ctx;
+       int tmp;
 
        if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
                goto out_err;
@@ -136,18 +137,23 @@ gss_import_sec_context_kerberos(const void *p,
        p = simple_get_bytes(p, end, &ctx->initiate, sizeof(ctx->initiate));
        if (IS_ERR(p))
                goto out_err_free_ctx;
-       p = simple_get_bytes(p, end, &ctx->seed_init, sizeof(ctx->seed_init));
-       if (IS_ERR(p))
+       /* The downcall format was designed before we completely understood
+        * the uses of the context fields; so it includes some stuff we
+        * just give some minimal sanity-checking, and some we ignore
+        * completely (like the next twenty bytes): */
+       if (unlikely(p + 20 > end || p + 20 < p))
                goto out_err_free_ctx;
-       p = simple_get_bytes(p, end, ctx->seed, sizeof(ctx->seed));
+       p += 20;
+       p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
        if (IS_ERR(p))
                goto out_err_free_ctx;
-       p = simple_get_bytes(p, end, &ctx->signalg, sizeof(ctx->signalg));
-       if (IS_ERR(p))
+       if (tmp != SGN_ALG_DES_MAC_MD5)
                goto out_err_free_ctx;
-       p = simple_get_bytes(p, end, &ctx->sealalg, sizeof(ctx->sealalg));
+       p = simple_get_bytes(p, end, &tmp, sizeof(tmp));
        if (IS_ERR(p))
                goto out_err_free_ctx;
+       if (tmp != SEAL_ALG_DES)
+               goto out_err_free_ctx;
        p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
        if (IS_ERR(p))
                goto out_err_free_ctx;
index 08601ee4cd7311d45c4a1146386ce22a0d96be76..d0bb5064f8c5ae1b0578b8f635cea5d5df0695a4 100644 (file)
@@ -77,7 +77,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
                struct xdr_netobj *token)
 {
        struct krb5_ctx         *ctx = gss_ctx->internal_ctx_id;
-       s32                     checksum_type;
        char                    cksumdata[16];
        struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        unsigned char           *ptr, *krb5_hdr, *msg_start;
@@ -88,21 +87,6 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
 
        now = get_seconds();
 
-       switch (ctx->signalg) {
-               case SGN_ALG_DES_MAC_MD5:
-                       checksum_type = CKSUMTYPE_RSA_MD5;
-                       break;
-               default:
-                       dprintk("RPC:      gss_krb5_seal: ctx->signalg %d not"
-                               " supported\n", ctx->signalg);
-                       goto out_err;
-       }
-       if (ctx->sealalg != SEAL_ALG_NONE && ctx->sealalg != SEAL_ALG_DES) {
-               dprintk("RPC:      gss_krb5_seal: ctx->sealalg %d not supported\n",
-                       ctx->sealalg);
-               goto out_err;
-       }
-
        token->len = g_token_size(&ctx->mech_used, 22);
 
        ptr = token->data;
@@ -115,37 +99,26 @@ gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
        krb5_hdr = ptr - 2;
        msg_start = krb5_hdr + 24;
 
-       *(__be16 *)(krb5_hdr + 2) = htons(ctx->signalg);
+       *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
        memset(krb5_hdr + 4, 0xff, 4);
 
-       if (make_checksum(checksum_type, krb5_hdr, 8, text, 0, &md5cksum))
-                       goto out_err;
-
-       switch (ctx->signalg) {
-       case SGN_ALG_DES_MAC_MD5:
-               if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
-                                 md5cksum.data, md5cksum.len))
-                       goto out_err;
-               memcpy(krb5_hdr + 16,
-                      md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
-                      KRB5_CKSUM_LENGTH);
-
-               dprintk("RPC:      make_seal_token: cksum data: \n");
-               print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
-               break;
-       default:
-               BUG();
-       }
+       if (make_checksum("md5", krb5_hdr, 8, text, 0, &md5cksum))
+               return GSS_S_FAILURE;
+
+       if (krb5_encrypt(ctx->seq, NULL, md5cksum.data,
+                         md5cksum.data, md5cksum.len))
+               return GSS_S_FAILURE;
+
+       memcpy(krb5_hdr + 16, md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
+              KRB5_CKSUM_LENGTH);
 
        spin_lock(&krb5_seq_lock);
        seq_send = ctx->seq_send++;
        spin_unlock(&krb5_seq_lock);
 
-       if ((krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
-                              seq_send, krb5_hdr + 16, krb5_hdr + 8)))
-               goto out_err;
+       if (krb5_make_seq_num(ctx->seq, ctx->initiate ? 0 : 0xff,
+                              ctx->seq_send, krb5_hdr + 16, krb5_hdr + 8))
+               return GSS_S_FAILURE;
 
-       return ((ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
-out_err:
-       return GSS_S_FAILURE;
+       return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
index 0828cf64100f977d241dac71d393a0148aa8f09a..87f8977ccece78861cd8d65c1e83007930b82cc0 100644 (file)
@@ -78,7 +78,6 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
        struct krb5_ctx         *ctx = gss_ctx->internal_ctx_id;
        int                     signalg;
        int                     sealalg;
-       s32                     checksum_type;
        char                    cksumdata[16];
        struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        s32                     now;
@@ -86,96 +85,54 @@ gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
        s32                     seqnum;
        unsigned char           *ptr = (unsigned char *)read_token->data;
        int                     bodysize;
-       u32                     ret = GSS_S_DEFECTIVE_TOKEN;
 
        dprintk("RPC:      krb5_read_token\n");
 
        if (g_verify_token_header(&ctx->mech_used, &bodysize, &ptr,
                                        read_token->len))
-               goto out;
+               return GSS_S_DEFECTIVE_TOKEN;
 
        if ((*ptr++ != ((KG_TOK_MIC_MSG>>8)&0xff)) ||
            (*ptr++ != ( KG_TOK_MIC_MSG    &0xff))   )
-               goto out;
+               return GSS_S_DEFECTIVE_TOKEN;
 
        /* XXX sanity-check bodysize?? */
 
-       /* get the sign and seal algorithms */
-
        signalg = ptr[0] + (ptr[1] << 8);
-       sealalg = ptr[2] + (ptr[3] << 8);
+       if (signalg != SGN_ALG_DES_MAC_MD5)
+               return GSS_S_DEFECTIVE_TOKEN;
 
-       /* Sanity checks */
+       sealalg = ptr[2] + (ptr[3] << 8);
+       if (sealalg != SEAL_ALG_NONE)
+               return GSS_S_DEFECTIVE_TOKEN;
 
        if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
-               goto out;
-
-       if (sealalg != 0xffff)
-               goto out;
-
-       /* there are several mappings of seal algorithms to sign algorithms,
-          but few enough that we can try them all. */
-
-       if ((ctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
-           (ctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
-           (ctx->sealalg == SEAL_ALG_DES3KD &&
-            signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
-               goto out;
-
-       /* compute the checksum of the message */
-
-       /* initialize the the cksum */
-       switch (signalg) {
-       case SGN_ALG_DES_MAC_MD5:
-               checksum_type = CKSUMTYPE_RSA_MD5;
-               break;
-       default:
-               ret = GSS_S_DEFECTIVE_TOKEN;
-               goto out;
-       }
-
-       switch (signalg) {
-       case SGN_ALG_DES_MAC_MD5:
-               ret = make_checksum(checksum_type, ptr - 2, 8,
-                                        message_buffer, 0, &md5cksum);
-               if (ret)
-                       goto out;
-
-               ret = krb5_encrypt(ctx->seq, NULL, md5cksum.data,
-                                  md5cksum.data, 16);
-               if (ret)
-                       goto out;
-
-               if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
-                       ret = GSS_S_BAD_SIG;
-                       goto out;
-               }
-               break;
-       default:
-               ret = GSS_S_DEFECTIVE_TOKEN;
-               goto out;
-       }
+               return GSS_S_DEFECTIVE_TOKEN;
+
+       if (make_checksum("md5", ptr - 2, 8, message_buffer, 0, &md5cksum))
+               return GSS_S_FAILURE;
+
+       if (krb5_encrypt(ctx->seq, NULL, md5cksum.data, md5cksum.data, 16))
+               return GSS_S_FAILURE;
+
+       if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+               return GSS_S_BAD_SIG;
 
        /* it got through unscathed.  Make sure the context is unexpired */
 
        now = get_seconds();
 
-       ret = GSS_S_CONTEXT_EXPIRED;
        if (now > ctx->endtime)
-               goto out;
+               return GSS_S_CONTEXT_EXPIRED;
 
        /* do sequencing checks */
 
-       ret = GSS_S_BAD_SIG;
-       if ((ret = krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction,
-                                   &seqnum)))
-               goto out;
+       if (krb5_get_seq_num(ctx->seq, ptr + 14, ptr + 6, &direction, &seqnum))
+               return GSS_S_FAILURE;
 
        if ((ctx->initiate && direction != 0xff) ||
            (!ctx->initiate && direction != 0))
-               goto out;
+               return GSS_S_BAD_SIG;
 
-       ret = GSS_S_COMPLETE;
-out:
-       return ret;
+       return GSS_S_COMPLETE;
 }
index cc45c1605f80eb7bbc6f2382bb2c3952f0446ec9..fe25b3d898dc53951be0c84166df3eae02487817 100644 (file)
@@ -57,9 +57,9 @@ gss_krb5_remove_padding(struct xdr_buf *buf, int blocksize)
                                        >>PAGE_CACHE_SHIFT;
                int offset = (buf->page_base + len - 1)
                                        & (PAGE_CACHE_SIZE - 1);
-               ptr = kmap_atomic(buf->pages[last], KM_SKB_SUNRPC_DATA);
+               ptr = kmap_atomic(buf->pages[last], KM_USER0);
                pad = *(ptr + offset);
-               kunmap_atomic(ptr, KM_SKB_SUNRPC_DATA);
+               kunmap_atomic(ptr, KM_USER0);
                goto out;
        } else
                len -= buf->page_len;
@@ -120,7 +120,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
                struct xdr_buf *buf, struct page **pages)
 {
        struct krb5_ctx         *kctx = ctx->internal_ctx_id;
-       s32                     checksum_type;
        char                    cksumdata[16];
        struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        int                     blocksize = 0, plainlen;
@@ -134,21 +133,6 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
 
        now = get_seconds();
 
-       switch (kctx->signalg) {
-               case SGN_ALG_DES_MAC_MD5:
-                       checksum_type = CKSUMTYPE_RSA_MD5;
-                       break;
-               default:
-                       dprintk("RPC:      gss_krb5_seal: kctx->signalg %d not"
-                               " supported\n", kctx->signalg);
-                       goto out_err;
-       }
-       if (kctx->sealalg != SEAL_ALG_NONE && kctx->sealalg != SEAL_ALG_DES) {
-               dprintk("RPC:      gss_krb5_seal: kctx->sealalg %d not supported\n",
-                       kctx->sealalg);
-               goto out_err;
-       }
-
        blocksize = crypto_blkcipher_blocksize(kctx->enc);
        gss_krb5_add_padding(buf, offset, blocksize);
        BUG_ON((buf->len - offset) % blocksize);
@@ -175,37 +159,27 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
        /* ptr now at byte 2 of header described in rfc 1964, section 1.2.1: */
        krb5_hdr = ptr - 2;
        msg_start = krb5_hdr + 24;
-       /* XXXJBF: */ BUG_ON(buf->head[0].iov_base + offset + headlen != msg_start + blocksize);
 
-       *(__be16 *)(krb5_hdr + 2) = htons(kctx->signalg);
+       *(__be16 *)(krb5_hdr + 2) = htons(SGN_ALG_DES_MAC_MD5);
        memset(krb5_hdr + 4, 0xff, 4);
-       *(__be16 *)(krb5_hdr + 4) = htons(kctx->sealalg);
+       *(__be16 *)(krb5_hdr + 4) = htons(SEAL_ALG_DES);
 
        make_confounder(msg_start, blocksize);
 
        /* XXXJBF: UGH!: */
        tmp_pages = buf->pages;
        buf->pages = pages;
-       if (make_checksum(checksum_type, krb5_hdr, 8, buf,
+       if (make_checksum("md5", krb5_hdr, 8, buf,
                                offset + headlen - blocksize, &md5cksum))
-               goto out_err;
+               return GSS_S_FAILURE;
        buf->pages = tmp_pages;
 
-       switch (kctx->signalg) {
-       case SGN_ALG_DES_MAC_MD5:
-               if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
-                                 md5cksum.data, md5cksum.len))
-                       goto out_err;
-               memcpy(krb5_hdr + 16,
-                      md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
-                      KRB5_CKSUM_LENGTH);
-
-               dprintk("RPC:      make_seal_token: cksum data: \n");
-               print_hexl((u32 *) (krb5_hdr + 16), KRB5_CKSUM_LENGTH, 0);
-               break;
-       default:
-               BUG();
-       }
+       if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+                         md5cksum.data, md5cksum.len))
+               return GSS_S_FAILURE;
+       memcpy(krb5_hdr + 16,
+              md5cksum.data + md5cksum.len - KRB5_CKSUM_LENGTH,
+              KRB5_CKSUM_LENGTH);
 
        spin_lock(&krb5_seq_lock);
        seq_send = kctx->seq_send++;
@@ -215,15 +189,13 @@ gss_wrap_kerberos(struct gss_ctx *ctx, int offset,
         * and encrypt at the same time: */
        if ((krb5_make_seq_num(kctx->seq, kctx->initiate ? 0 : 0xff,
                               seq_send, krb5_hdr + 16, krb5_hdr + 8)))
-               goto out_err;
+               return GSS_S_FAILURE;
 
        if (gss_encrypt_xdr_buf(kctx->enc, buf, offset + headlen - blocksize,
                                                                        pages))
-               goto out_err;
+               return GSS_S_FAILURE;
 
-       return ((kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE);
-out_err:
-       return GSS_S_FAILURE;
+       return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
 }
 
 u32
@@ -232,7 +204,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
        struct krb5_ctx         *kctx = ctx->internal_ctx_id;
        int                     signalg;
        int                     sealalg;
-       s32                     checksum_type;
        char                    cksumdata[16];
        struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        s32                     now;
@@ -240,7 +211,6 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
        s32                     seqnum;
        unsigned char           *ptr;
        int                     bodysize;
-       u32                     ret = GSS_S_DEFECTIVE_TOKEN;
        void                    *data_start, *orig_start;
        int                     data_len;
        int                     blocksize;
@@ -250,98 +220,58 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
        ptr = (u8 *)buf->head[0].iov_base + offset;
        if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr,
                                        buf->len - offset))
-               goto out;
+               return GSS_S_DEFECTIVE_TOKEN;
 
        if ((*ptr++ != ((KG_TOK_WRAP_MSG>>8)&0xff)) ||
            (*ptr++ !=  (KG_TOK_WRAP_MSG    &0xff))   )
-               goto out;
+               return GSS_S_DEFECTIVE_TOKEN;
 
        /* XXX sanity-check bodysize?? */
 
        /* get the sign and seal algorithms */
 
        signalg = ptr[0] + (ptr[1] << 8);
-       sealalg = ptr[2] + (ptr[3] << 8);
+       if (signalg != SGN_ALG_DES_MAC_MD5)
+               return GSS_S_DEFECTIVE_TOKEN;
 
-       /* Sanity checks */
+       sealalg = ptr[2] + (ptr[3] << 8);
+       if (sealalg != SEAL_ALG_DES)
+               return GSS_S_DEFECTIVE_TOKEN;
 
        if ((ptr[4] != 0xff) || (ptr[5] != 0xff))
-               goto out;
-
-       if (sealalg == 0xffff)
-               goto out;
-
-       /* in the current spec, there is only one valid seal algorithm per
-          key type, so a simple comparison is ok */
-
-       if (sealalg != kctx->sealalg)
-               goto out;
-
-       /* there are several mappings of seal algorithms to sign algorithms,
-          but few enough that we can try them all. */
-
-       if ((kctx->sealalg == SEAL_ALG_NONE && signalg > 1) ||
-           (kctx->sealalg == SEAL_ALG_1 && signalg != SGN_ALG_3) ||
-           (kctx->sealalg == SEAL_ALG_DES3KD &&
-            signalg != SGN_ALG_HMAC_SHA1_DES3_KD))
-               goto out;
+               return GSS_S_DEFECTIVE_TOKEN;
 
        if (gss_decrypt_xdr_buf(kctx->enc, buf,
                        ptr + 22 - (unsigned char *)buf->head[0].iov_base))
-               goto out;
+               return GSS_S_DEFECTIVE_TOKEN;
 
-       /* compute the checksum of the message */
+       if (make_checksum("md5", ptr - 2, 8, buf,
+                ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum))
+               return GSS_S_FAILURE;
 
-       /* initialize the the cksum */
-       switch (signalg) {
-       case SGN_ALG_DES_MAC_MD5:
-               checksum_type = CKSUMTYPE_RSA_MD5;
-               break;
-       default:
-               ret = GSS_S_DEFECTIVE_TOKEN;
-               goto out;
-       }
-
-       switch (signalg) {
-       case SGN_ALG_DES_MAC_MD5:
-               ret = make_checksum(checksum_type, ptr - 2, 8, buf,
-                        ptr + 22 - (unsigned char *)buf->head[0].iov_base, &md5cksum);
-               if (ret)
-                       goto out;
-
-               ret = krb5_encrypt(kctx->seq, NULL, md5cksum.data,
-                                  md5cksum.data, md5cksum.len);
-               if (ret)
-                       goto out;
-
-               if (memcmp(md5cksum.data + 8, ptr + 14, 8)) {
-                       ret = GSS_S_BAD_SIG;
-                       goto out;
-               }
-               break;
-       default:
-               ret = GSS_S_DEFECTIVE_TOKEN;
-               goto out;
-       }
+       if (krb5_encrypt(kctx->seq, NULL, md5cksum.data,
+                          md5cksum.data, md5cksum.len))
+               return GSS_S_FAILURE;
+
+       if (memcmp(md5cksum.data + 8, ptr + 14, 8))
+               return GSS_S_BAD_SIG;
 
        /* it got through unscathed.  Make sure the context is unexpired */
 
        now = get_seconds();
 
-       ret = GSS_S_CONTEXT_EXPIRED;
        if (now > kctx->endtime)
-               goto out;
+               return GSS_S_CONTEXT_EXPIRED;
 
        /* do sequencing checks */
 
-       ret = GSS_S_BAD_SIG;
-       if ((ret = krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
-                                   &seqnum)))
-               goto out;
+       if (krb5_get_seq_num(kctx->seq, ptr + 14, ptr + 6, &direction,
+                                   &seqnum))
+               return GSS_S_BAD_SIG;
 
        if ((kctx->initiate && direction != 0xff) ||
            (!kctx->initiate && direction != 0))
-               goto out;
+               return GSS_S_BAD_SIG;
 
        /* Copy the data back to the right position.  XXX: Would probably be
         * better to copy and encrypt at the same time. */
@@ -354,11 +284,8 @@ gss_unwrap_kerberos(struct gss_ctx *ctx, int offset, struct xdr_buf *buf)
        buf->head[0].iov_len -= (data_start - orig_start);
        buf->len -= (data_start - orig_start);
 
-       ret = GSS_S_DEFECTIVE_TOKEN;
        if (gss_krb5_remove_padding(buf, blocksize))
-               goto out;
+               return GSS_S_DEFECTIVE_TOKEN;
 
-       ret = GSS_S_COMPLETE;
-out:
-       return ret;
+       return GSS_S_COMPLETE;
 }
index d57f60838895d878efca25ba46628ca795237f16..41465072d0b59168cbdcb9a3aed3294a58ab7458 100644 (file)
@@ -82,133 +82,73 @@ simple_get_netobj(const void *p, const void *end, struct xdr_netobj *res)
        return q;
 }
 
-static inline const void *
-get_key(const void *p, const void *end, struct crypto_blkcipher **res,
-       int *resalg)
-{
-       struct xdr_netobj       key = { 0 };
-       int                     setkey = 0;
-       char                    *alg_name;
-
-       p = simple_get_bytes(p, end, resalg, sizeof(*resalg));
-       if (IS_ERR(p))
-               goto out_err;
-       p = simple_get_netobj(p, end, &key);
-       if (IS_ERR(p))
-               goto out_err;
-
-       switch (*resalg) {
-               case NID_des_cbc:
-                       alg_name = "cbc(des)";
-                       setkey = 1;
-                       break;
-               case NID_cast5_cbc:
-                       /* XXXX here in name only, not used */
-                       alg_name = "cbc(cast5)";
-                       setkey = 0; /* XXX will need to set to 1 */
-                       break;
-               case NID_md5:
-                       if (key.len == 0) {
-                               dprintk("RPC: SPKM3 get_key: NID_md5 zero Key length\n");
-                       }
-                       alg_name = "md5";
-                       setkey = 0;
-                       break;
-               default:
-                       dprintk("gss_spkm3_mech: unsupported algorithm %d\n", *resalg);
-                       goto out_err_free_key;
-       }
-       *res = crypto_alloc_blkcipher(alg_name, 0, CRYPTO_ALG_ASYNC);
-       if (IS_ERR(*res)) {
-               printk("gss_spkm3_mech: unable to initialize crypto algorthm %s\n", alg_name);
-               *res = NULL;
-               goto out_err_free_key;
-       }
-       if (setkey) {
-               if (crypto_blkcipher_setkey(*res, key.data, key.len)) {
-                       printk("gss_spkm3_mech: error setting key for crypto algorthm %s\n", alg_name);
-                       goto out_err_free_tfm;
-               }
-       }
-
-       if(key.len > 0)
-               kfree(key.data);
-       return p;
-
-out_err_free_tfm:
-       crypto_free_blkcipher(*res);
-out_err_free_key:
-       if(key.len > 0)
-               kfree(key.data);
-       p = ERR_PTR(-EINVAL);
-out_err:
-       return p;
-}
-
 static int
 gss_import_sec_context_spkm3(const void *p, size_t len,
                                struct gss_ctx *ctx_id)
 {
        const void *end = (const void *)((const char *)p + len);
        struct  spkm3_ctx *ctx;
+       int     version;
 
        if (!(ctx = kzalloc(sizeof(*ctx), GFP_KERNEL)))
                goto out_err;
 
+       p = simple_get_bytes(p, end, &version, sizeof(version));
+       if (IS_ERR(p))
+               goto out_err_free_ctx;
+       if (version != 1) {
+               dprintk("RPC: unknown spkm3 token format: obsolete nfs-utils?\n");
+               goto out_err_free_ctx;
+       }
+
        p = simple_get_netobj(p, end, &ctx->ctx_id);
        if (IS_ERR(p))
                goto out_err_free_ctx;
 
-       p = simple_get_bytes(p, end, &ctx->qop, sizeof(ctx->qop));
+       p = simple_get_bytes(p, end, &ctx->endtime, sizeof(ctx->endtime));
        if (IS_ERR(p))
                goto out_err_free_ctx_id;
 
        p = simple_get_netobj(p, end, &ctx->mech_used);
        if (IS_ERR(p))
-               goto out_err_free_mech;
+               goto out_err_free_ctx_id;
 
        p = simple_get_bytes(p, end, &ctx->ret_flags, sizeof(ctx->ret_flags));
        if (IS_ERR(p))
                goto out_err_free_mech;
 
-       p = simple_get_bytes(p, end, &ctx->req_flags, sizeof(ctx->req_flags));
+       p = simple_get_netobj(p, end, &ctx->conf_alg);
        if (IS_ERR(p))
                goto out_err_free_mech;
 
-       p = simple_get_netobj(p, end, &ctx->share_key);
-       if (IS_ERR(p))
-               goto out_err_free_s_key;
-
-       p = get_key(p, end, &ctx->derived_conf_key, &ctx->conf_alg);
+       p = simple_get_netobj(p, end, &ctx->derived_conf_key);
        if (IS_ERR(p))
-               goto out_err_free_s_key;
+               goto out_err_free_conf_alg;
 
-       p = get_key(p, end, &ctx->derived_integ_key, &ctx->intg_alg);
+       p = simple_get_netobj(p, end, &ctx->intg_alg);
        if (IS_ERR(p))
-               goto out_err_free_key1;
+               goto out_err_free_conf_key;
 
-       p = simple_get_bytes(p, end, &ctx->keyestb_alg, sizeof(ctx->keyestb_alg));
+       p = simple_get_netobj(p, end, &ctx->derived_integ_key);
        if (IS_ERR(p))
-               goto out_err_free_key2;
-
-       p = simple_get_bytes(p, end, &ctx->owf_alg, sizeof(ctx->owf_alg));
-       if (IS_ERR(p))
-               goto out_err_free_key2;
+               goto out_err_free_intg_alg;
 
        if (p != end)
-               goto out_err_free_key2;
+               goto out_err_free_intg_key;
 
        ctx_id->internal_ctx_id = ctx;
 
        dprintk("Successfully imported new spkm context.\n");
        return 0;
 
-out_err_free_key2:
-       crypto_free_blkcipher(ctx->derived_integ_key);
-out_err_free_key1:
-       crypto_free_blkcipher(ctx->derived_conf_key);
-out_err_free_s_key:
-       kfree(ctx->share_key.data);
+out_err_free_intg_key:
+       kfree(ctx->derived_integ_key.data);
+out_err_free_intg_alg:
+       kfree(ctx->intg_alg.data);
+out_err_free_conf_key:
+       kfree(ctx->derived_conf_key.data);
+out_err_free_conf_alg:
+       kfree(ctx->conf_alg.data);
 out_err_free_mech:
        kfree(ctx->mech_used.data);
 out_err_free_ctx_id:
@@ -220,13 +160,16 @@ out_err:
 }
 
 static void
-gss_delete_sec_context_spkm3(void *internal_ctx) {
+gss_delete_sec_context_spkm3(void *internal_ctx)
+{
        struct spkm3_ctx *sctx = internal_ctx;
 
-       crypto_free_blkcipher(sctx->derived_integ_key);
-       crypto_free_blkcipher(sctx->derived_conf_key);
-       kfree(sctx->share_key.data);
+       kfree(sctx->derived_integ_key.data);
+       kfree(sctx->intg_alg.data);
+       kfree(sctx->derived_conf_key.data);
+       kfree(sctx->conf_alg.data);
        kfree(sctx->mech_used.data);
+       kfree(sctx->ctx_id.data);
        kfree(sctx);
 }
 
@@ -238,7 +181,6 @@ gss_verify_mic_spkm3(struct gss_ctx         *ctx,
        u32 maj_stat = 0;
        struct spkm3_ctx *sctx = ctx->internal_ctx_id;
 
-       dprintk("RPC: gss_verify_mic_spkm3 calling spkm3_read_token\n");
        maj_stat = spkm3_read_token(sctx, checksum, signbuf, SPKM_MIC_TOK);
 
        dprintk("RPC: gss_verify_mic_spkm3 returning %d\n", maj_stat);
@@ -253,10 +195,9 @@ gss_get_mic_spkm3(struct gss_ctx   *ctx,
        u32 err = 0;
        struct spkm3_ctx *sctx = ctx->internal_ctx_id;
 
-       dprintk("RPC: gss_get_mic_spkm3\n");
-
        err = spkm3_make_token(sctx, message_buffer,
-                             message_token, SPKM_MIC_TOK);
+                               message_token, SPKM_MIC_TOK);
+       dprintk("RPC: gss_get_mic_spkm3 returning %d\n", err);
        return err;
 }
 
index 18c7862bc234a989c14e26bee79212de3f2a63fc..b179d58c6249cf66b2004aa18fb0560011f0c8e5 100644 (file)
 #include <linux/sunrpc/gss_spkm3.h>
 #include <linux/random.h>
 #include <linux/crypto.h>
+#include <linux/pagemap.h>
+#include <linux/scatterlist.h>
+#include <linux/sunrpc/xdr.h>
 
 #ifdef RPC_DEBUG
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
+const struct xdr_netobj hmac_md5_oid = { 8, "\x2B\x06\x01\x05\x05\x08\x01\x01"};
+const struct xdr_netobj cast5_cbc_oid = {9, "\x2A\x86\x48\x86\xF6\x7D\x07\x42\x0A"};
+
 /*
  * spkm3_make_token()
  *
@@ -66,29 +72,23 @@ spkm3_make_token(struct spkm3_ctx *ctx,
        int                     ctxelen = 0, ctxzbit = 0;
        int                     md5elen = 0, md5zbit = 0;
 
-       dprintk("RPC: spkm3_make_token\n");
-
        now = jiffies;
 
        if (ctx->ctx_id.len != 16) {
                dprintk("RPC: spkm3_make_token BAD ctx_id.len %d\n",
-                       ctx->ctx_id.len);
+                               ctx->ctx_id.len);
                goto out_err;
        }
-               
-       switch (ctx->intg_alg) {
-               case NID_md5:
-                       checksum_type = CKSUMTYPE_RSA_MD5;
-                       break;
-               default:
-                       dprintk("RPC: gss_spkm3_seal: ctx->signalg %d not"
-                               " supported\n", ctx->intg_alg);
-                       goto out_err;
-       }
-       /* XXX since we don't support WRAP, perhaps we don't care... */
-       if (ctx->conf_alg != NID_cast5_cbc) {
-               dprintk("RPC: gss_spkm3_seal: ctx->sealalg %d not supported\n",
-                       ctx->conf_alg);
+
+       if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
+               dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm."
+                               "only support hmac-md5 I-ALG.\n");
+               goto out_err;
+       } else
+               checksum_type = CKSUMTYPE_HMAC_MD5;
+
+       if (!g_OID_equal(&ctx->conf_alg, &cast5_cbc_oid)) {
+               dprintk("RPC: gss_spkm3_seal: unsupported C-ALG algorithm\n");
                goto out_err;
        }
 
@@ -96,10 +96,10 @@ spkm3_make_token(struct spkm3_ctx *ctx,
                /* Calculate checksum over the mic-header */
                asn1_bitstring_len(&ctx->ctx_id, &ctxelen, &ctxzbit);
                spkm3_mic_header(&mic_hdr.data, &mic_hdr.len, ctx->ctx_id.data,
-                                        ctxelen, ctxzbit);
-
-               if (make_checksum(checksum_type, mic_hdr.data, mic_hdr.len, 
-                                            text, 0, &md5cksum))
+                               ctxelen, ctxzbit);
+               if (make_spkm3_checksum(checksum_type, &ctx->derived_integ_key,
+                                       (char *)mic_hdr.data, mic_hdr.len,
+                                       text, 0, &md5cksum))
                        goto out_err;
 
                asn1_bitstring_len(&md5cksum, &md5elen, &md5zbit);
@@ -121,7 +121,66 @@ spkm3_make_token(struct spkm3_ctx *ctx,
 
        return  GSS_S_COMPLETE;
 out_err:
+       if (md5cksum.data)
+               kfree(md5cksum.data);
+
        token->data = NULL;
        token->len = 0;
        return GSS_S_FAILURE;
 }
+
+static int
+spkm3_checksummer(struct scatterlist *sg, void *data)
+{
+       struct hash_desc *desc = data;
+
+       return crypto_hash_update(desc, sg, sg->length);
+}
+
+/* checksum the plaintext data and hdrlen bytes of the token header */
+s32
+make_spkm3_checksum(s32 cksumtype, struct xdr_netobj *key, char *header,
+                   unsigned int hdrlen, struct xdr_buf *body,
+                   unsigned int body_offset, struct xdr_netobj *cksum)
+{
+       char                            *cksumname;
+       struct hash_desc                desc; /* XXX add to ctx? */
+       struct scatterlist              sg[1];
+       int err;
+
+       switch (cksumtype) {
+               case CKSUMTYPE_HMAC_MD5:
+                       cksumname = "md5";
+                       break;
+               default:
+                       dprintk("RPC:      spkm3_make_checksum:"
+                                       " unsupported checksum %d", cksumtype);
+                       return GSS_S_FAILURE;
+       }
+
+       if (key->data == NULL || key->len <= 0) return GSS_S_FAILURE;
+
+       desc.tfm = crypto_alloc_hash(cksumname, 0, CRYPTO_ALG_ASYNC);
+       if (IS_ERR(desc.tfm))
+               return GSS_S_FAILURE;
+       cksum->len = crypto_hash_digestsize(desc.tfm);
+       desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+       err = crypto_hash_setkey(desc.tfm, key->data, key->len);
+       if (err)
+               goto out;
+
+       sg_set_buf(sg, header, hdrlen);
+       crypto_hash_update(&desc, sg, 1);
+
+       xdr_process_buf(body, body_offset, body->len - body_offset,
+                       spkm3_checksummer, &desc);
+       crypto_hash_final(&desc, cksum->data);
+
+out:
+       crypto_free_hash(desc.tfm);
+
+       return err ? GSS_S_FAILURE : 0;
+}
+
+EXPORT_SYMBOL(make_spkm3_checksum);
index 854a983ccf26fd7c5fbc7c9e439bbae19ad31b70..35188b6ea8f75a950709498fa5dc187e73b0c02f 100644 (file)
@@ -172,10 +172,10 @@ spkm3_mic_header(unsigned char **hdrbuf, unsigned int *hdrlen, unsigned char *ct
        *(u8 *)hptr++ = zbit;
        memcpy(hptr, ctxdata, elen);
        hptr += elen;
-       *hdrlen = hptr - top; 
+       *hdrlen = hptr - top;
 }
-               
-/* 
+
+/*
  * spkm3_mic_innercontext_token()
  *
  * *tokp points to the beginning of the SPKM_MIC token  described 
index 8537f581ef9b904f7abe5d821a6dcc7da1a41dec..e54581ca75702b43321aae63bced573ceffc73b8 100644 (file)
@@ -54,70 +54,70 @@ spkm3_read_token(struct spkm3_ctx *ctx,
                struct xdr_buf *message_buffer, /* signbuf */
                int toktype)
 {
+       s32                     checksum_type;
        s32                     code;
        struct xdr_netobj       wire_cksum = {.len =0, .data = NULL};
        char                    cksumdata[16];
        struct xdr_netobj       md5cksum = {.len = 0, .data = cksumdata};
        unsigned char           *ptr = (unsigned char *)read_token->data;
-       unsigned char           *cksum;
+       unsigned char           *cksum;
        int                     bodysize, md5elen;
        int                     mic_hdrlen;
        u32                     ret = GSS_S_DEFECTIVE_TOKEN;
 
-       dprintk("RPC: spkm3_read_token read_token->len %d\n", read_token->len);
-
        if (g_verify_token_header((struct xdr_netobj *) &ctx->mech_used,
                                        &bodysize, &ptr, read_token->len))
                goto out;
 
        /* decode the token */
 
-       if (toktype == SPKM_MIC_TOK) {
-
-               if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum))) 
-                       goto out;
-
-               if (*cksum++ != 0x03) {
-                       dprintk("RPC: spkm3_read_token BAD checksum type\n");
-                       goto out;
-               }
-               md5elen = *cksum++; 
-               cksum++;        /* move past the zbit */
-       
-               if(!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
-                       goto out;
-
-               /* HARD CODED FOR MD5 */
-
-               /* compute the checksum of the message.
-               *  ptr + 2 = start of header piece of checksum
-               *  mic_hdrlen + 2 = length of header piece of checksum
-               */
-               ret = GSS_S_DEFECTIVE_TOKEN;
-               code = make_checksum(CKSUMTYPE_RSA_MD5, ptr + 2, 
-                                       mic_hdrlen + 2, 
-                                       message_buffer, 0, &md5cksum);
-
-               if (code)
-                       goto out;
-
-               dprintk("RPC: spkm3_read_token: digest wire_cksum.len %d:\n", 
-                       wire_cksum.len);
-               dprintk("          md5cksum.data\n");
-               print_hexl((u32 *) md5cksum.data, 16, 0);
-               dprintk("          cksum.data:\n");
-               print_hexl((u32 *) wire_cksum.data, wire_cksum.len, 0);
-
-               ret = GSS_S_BAD_SIG;
-               code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
-               if (code)
-                       goto out;
-
-       } else { 
-               dprintk("RPC: BAD or UNSUPPORTED SPKM3 token type: %d\n",toktype);
+       if (toktype != SPKM_MIC_TOK) {
+               dprintk("RPC: BAD SPKM3 token type: %d\n", toktype);
+               goto out;
+       }
+
+       if ((ret = spkm3_verify_mic_token(&ptr, &mic_hdrlen, &cksum)))
+               goto out;
+
+       if (*cksum++ != 0x03) {
+               dprintk("RPC: spkm3_read_token BAD checksum type\n");
+               goto out;
+       }
+       md5elen = *cksum++;
+       cksum++;        /* move past the zbit */
+
+       if (!decode_asn1_bitstring(&wire_cksum, cksum, md5elen - 1, 16))
+               goto out;
+
+       /* HARD CODED FOR MD5 */
+
+       /* compute the checksum of the message.
+        * ptr + 2 = start of header piece of checksum
+        * mic_hdrlen + 2 = length of header piece of checksum
+        */
+       ret = GSS_S_DEFECTIVE_TOKEN;
+       if (!g_OID_equal(&ctx->intg_alg, &hmac_md5_oid)) {
+               dprintk("RPC: gss_spkm3_seal: unsupported I-ALG algorithm\n");
+               goto out;
+       }
+
+       checksum_type = CKSUMTYPE_HMAC_MD5;
+
+       code = make_spkm3_checksum(checksum_type,
+               &ctx->derived_integ_key, ptr + 2, mic_hdrlen + 2,
+               message_buffer, 0, &md5cksum);
+
+       if (code)
+               goto out;
+
+       ret = GSS_S_BAD_SIG;
+       code = memcmp(md5cksum.data, wire_cksum.data, wire_cksum.len);
+       if (code) {
+               dprintk("RPC: bad MIC checksum\n");
                goto out;
        }
 
+
        /* XXX: need to add expiration and sequencing */
        ret = GSS_S_COMPLETE;
 out:
index dfeea4fea95a7f6dfb4ea038491aaba39ced48dc..aba528b9ae769a8aa0fb5b5ece07ff704dab2291 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/types.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include <linux/smp_lock.h>
 #include <linux/utsname.h>
 #include <linux/workqueue.h>
 
@@ -141,6 +142,10 @@ static struct rpc_clnt * rpc_new_client(struct rpc_xprt *xprt, char *servname, s
        clnt->cl_vers     = version->number;
        clnt->cl_stats    = program->stats;
        clnt->cl_metrics  = rpc_alloc_iostats(clnt);
+       err = -ENOMEM;
+       if (clnt->cl_metrics == NULL)
+               goto out_no_stats;
+       clnt->cl_program  = program;
 
        if (!xprt_bound(clnt->cl_xprt))
                clnt->cl_autobind = 1;
@@ -173,6 +178,8 @@ out_no_auth:
                rpc_put_mount();
        }
 out_no_path:
+       rpc_free_iostats(clnt->cl_metrics);
+out_no_stats:
        if (clnt->cl_server != clnt->cl_inline_name)
                kfree(clnt->cl_server);
        kfree(clnt);
@@ -252,12 +259,19 @@ struct rpc_clnt *
 rpc_clone_client(struct rpc_clnt *clnt)
 {
        struct rpc_clnt *new;
+       int err = -ENOMEM;
 
        new = kmemdup(clnt, sizeof(*new), GFP_KERNEL);
        if (!new)
                goto out_no_clnt;
        atomic_set(&new->cl_count, 1);
        atomic_set(&new->cl_users, 0);
+       new->cl_metrics = rpc_alloc_iostats(clnt);
+       if (new->cl_metrics == NULL)
+               goto out_no_stats;
+       err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name);
+       if (err != 0)
+               goto out_no_path;
        new->cl_parent = clnt;
        atomic_inc(&clnt->cl_count);
        new->cl_xprt = xprt_get(clnt->cl_xprt);
@@ -265,16 +279,17 @@ rpc_clone_client(struct rpc_clnt *clnt)
        new->cl_autobind = 0;
        new->cl_oneshot = 0;
        new->cl_dead = 0;
-       if (!IS_ERR(new->cl_dentry))
-               dget(new->cl_dentry);
        rpc_init_rtt(&new->cl_rtt_default, clnt->cl_xprt->timeout.to_initval);
        if (new->cl_auth)
                atomic_inc(&new->cl_auth->au_count);
-       new->cl_metrics = rpc_alloc_iostats(clnt);
        return new;
+out_no_path:
+       rpc_free_iostats(new->cl_metrics);
+out_no_stats:
+       kfree(new);
 out_no_clnt:
-       printk(KERN_INFO "RPC: out of memory in %s\n", __FUNCTION__);
-       return ERR_PTR(-ENOMEM);
+       dprintk("RPC: %s returned error %d\n", __FUNCTION__, err);
+       return ERR_PTR(err);
 }
 
 /*
@@ -327,16 +342,14 @@ rpc_destroy_client(struct rpc_clnt *clnt)
                rpcauth_destroy(clnt->cl_auth);
                clnt->cl_auth = NULL;
        }
-       if (clnt->cl_parent != clnt) {
-               if (!IS_ERR(clnt->cl_dentry))
-                       dput(clnt->cl_dentry);
-               rpc_destroy_client(clnt->cl_parent);
-               goto out_free;
-       }
        if (!IS_ERR(clnt->cl_dentry)) {
                rpc_rmdir(clnt->cl_dentry);
                rpc_put_mount();
        }
+       if (clnt->cl_parent != clnt) {
+               rpc_destroy_client(clnt->cl_parent);
+               goto out_free;
+       }
        if (clnt->cl_server != clnt->cl_inline_name)
                kfree(clnt->cl_server);
 out_free:
@@ -466,10 +479,9 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 
        BUG_ON(flags & RPC_TASK_ASYNC);
 
-       status = -ENOMEM;
        task = rpc_new_task(clnt, flags, &rpc_default_ops, NULL);
        if (task == NULL)
-               goto out;
+               return -ENOMEM;
 
        /* Mask signals on RPC calls _and_ GSS_AUTH upcalls */
        rpc_task_sigmask(task, &oldset);
@@ -478,15 +490,17 @@ int rpc_call_sync(struct rpc_clnt *clnt, struct rpc_message *msg, int flags)
 
        /* Set up the call info struct and execute the task */
        status = task->tk_status;
-       if (status == 0) {
-               atomic_inc(&task->tk_count);
-               status = rpc_execute(task);
-               if (status == 0)
-                       status = task->tk_status;
+       if (status != 0) {
+               rpc_release_task(task);
+               goto out;
        }
-       rpc_restore_sigmask(&oldset);
-       rpc_release_task(task);
+       atomic_inc(&task->tk_count);
+       status = rpc_execute(task);
+       if (status == 0)
+               status = task->tk_status;
+       rpc_put_task(task);
 out:
+       rpc_restore_sigmask(&oldset);
        return status;
 }
 
@@ -528,8 +542,7 @@ rpc_call_async(struct rpc_clnt *clnt, struct rpc_message *msg, int flags,
        rpc_restore_sigmask(&oldset);           
        return status;
 out_release:
-       if (tk_ops->rpc_release != NULL)
-               tk_ops->rpc_release(data);
+       rpc_release_calldata(tk_ops, data);
        return status;
 }
 
@@ -581,7 +594,11 @@ EXPORT_SYMBOL_GPL(rpc_peeraddr);
 char *rpc_peeraddr2str(struct rpc_clnt *clnt, enum rpc_display_format_t format)
 {
        struct rpc_xprt *xprt = clnt->cl_xprt;
-       return xprt->ops->print_addr(xprt, format);
+
+       if (xprt->address_strings[format] != NULL)
+               return xprt->address_strings[format];
+       else
+               return "unprintable";
 }
 EXPORT_SYMBOL_GPL(rpc_peeraddr2str);
 
@@ -811,8 +828,10 @@ call_encode(struct rpc_task *task)
        if (encode == NULL)
                return;
 
+       lock_kernel();
        task->tk_status = rpcauth_wrap_req(task, encode, req, p,
                        task->tk_msg.rpc_argp);
+       unlock_kernel();
        if (task->tk_status == -ENOMEM) {
                /* XXX: Is this sane? */
                rpc_delay(task, 3*HZ);
@@ -1143,9 +1162,12 @@ call_decode(struct rpc_task *task)
 
        task->tk_action = rpc_exit_task;
 
-       if (decode)
+       if (decode) {
+               lock_kernel();
                task->tk_status = rpcauth_unwrap_resp(task, decode, req, p,
                                                      task->tk_msg.rpc_resp);
+               unlock_kernel();
+       }
        dprintk("RPC: %4d call_decode result %d\n", task->tk_pid,
                                        task->tk_status);
        return;
index e52afab413ded56212cad69a2801df58bc6b6d6c..3946ec3eb517a674881fc3b8d35277f2a5f3de44 100644 (file)
@@ -101,14 +101,14 @@ void rpc_getport(struct rpc_task *task)
        /* Autobind on cloned rpc clients is discouraged */
        BUG_ON(clnt->cl_parent != clnt);
 
+       status = -EACCES;               /* tell caller to check again */
+       if (xprt_test_and_set_binding(xprt))
+               goto bailout_nowake;
+
        /* Put self on queue before sending rpcbind request, in case
         * pmap_getport_done completes before we return from rpc_run_task */
        rpc_sleep_on(&xprt->binding, task, NULL, NULL);
 
-       status = -EACCES;               /* tell caller to check again */
-       if (xprt_test_and_set_binding(xprt))
-               goto bailout_nofree;
-
        /* Someone else may have bound if we slept */
        status = 0;
        if (xprt_bound(xprt))
@@ -134,7 +134,7 @@ void rpc_getport(struct rpc_task *task)
        child = rpc_run_task(pmap_clnt, RPC_TASK_ASYNC, &pmap_getport_ops, map);
        if (IS_ERR(child))
                goto bailout;
-       rpc_release_task(child);
+       rpc_put_task(child);
 
        task->tk_xprt->stat.bind_count++;
        return;
@@ -143,8 +143,9 @@ bailout:
        pmap_map_free(map);
        xprt_put(xprt);
 bailout_nofree:
-       task->tk_status = status;
        pmap_wake_portmap_waiters(xprt, status);
+bailout_nowake:
+       task->tk_status = status;
 }
 
 #ifdef CONFIG_ROOT_NFS
index 225e6510b523717b51d35b776e4e5fdb891249bd..79bc4cdf5d4861e70c2614012dfbec35d7409e72 100644 (file)
@@ -266,12 +266,28 @@ static int rpc_wait_bit_interruptible(void *word)
        return 0;
 }
 
+static void rpc_set_active(struct rpc_task *task)
+{
+       if (test_and_set_bit(RPC_TASK_ACTIVE, &task->tk_runstate) != 0)
+               return;
+       spin_lock(&rpc_sched_lock);
+#ifdef RPC_DEBUG
+       task->tk_magic = RPC_TASK_MAGIC_ID;
+       task->tk_pid = rpc_task_id++;
+#endif
+       /* Add to global list of all tasks */
+       list_add_tail(&task->tk_task, &all_tasks);
+       spin_unlock(&rpc_sched_lock);
+}
+
 /*
  * Mark an RPC call as having completed by clearing the 'active' bit
  */
-static inline void rpc_mark_complete_task(struct rpc_task *task)
+static void rpc_mark_complete_task(struct rpc_task *task)
 {
-       rpc_clear_active(task);
+       smp_mb__before_clear_bit();
+       clear_bit(RPC_TASK_ACTIVE, &task->tk_runstate);
+       smp_mb__after_clear_bit();
        wake_up_bit(&task->tk_runstate, RPC_TASK_ACTIVE);
 }
 
@@ -295,13 +311,15 @@ EXPORT_SYMBOL(__rpc_wait_for_completion_task);
  */
 static void rpc_make_runnable(struct rpc_task *task)
 {
-       int do_ret;
-
        BUG_ON(task->tk_timeout_fn);
-       do_ret = rpc_test_and_set_running(task);
        rpc_clear_queued(task);
-       if (do_ret)
+       if (rpc_test_and_set_running(task))
                return;
+       /* We might have raced */
+       if (RPC_IS_QUEUED(task)) {
+               rpc_clear_running(task);
+               return;
+       }
        if (RPC_IS_ASYNC(task)) {
                int status;
 
@@ -333,9 +351,6 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
                return;
        }
 
-       /* Mark the task as being activated if so needed */
-       rpc_set_active(task);
-
        __rpc_add_wait_queue(q, task);
 
        BUG_ON(task->tk_callback != NULL);
@@ -346,6 +361,9 @@ static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
 void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
                                rpc_action action, rpc_action timer)
 {
+       /* Mark the task as being activated if so needed */
+       rpc_set_active(task);
+
        /*
         * Protect the queue operations.
         */
@@ -409,16 +427,19 @@ __rpc_default_timer(struct rpc_task *task)
  */
 void rpc_wake_up_task(struct rpc_task *task)
 {
+       rcu_read_lock_bh();
        if (rpc_start_wakeup(task)) {
                if (RPC_IS_QUEUED(task)) {
                        struct rpc_wait_queue *queue = task->u.tk_wait.rpc_waitq;
 
-                       spin_lock_bh(&queue->lock);
+                       /* Note: we're already in a bh-safe context */
+                       spin_lock(&queue->lock);
                        __rpc_do_wake_up_task(task);
-                       spin_unlock_bh(&queue->lock);
+                       spin_unlock(&queue->lock);
                }
                rpc_finish_wakeup(task);
        }
+       rcu_read_unlock_bh();
 }
 
 /*
@@ -481,14 +502,16 @@ struct rpc_task * rpc_wake_up_next(struct rpc_wait_queue *queue)
        struct rpc_task *task = NULL;
 
        dprintk("RPC:      wake_up_next(%p \"%s\")\n", queue, rpc_qname(queue));
-       spin_lock_bh(&queue->lock);
+       rcu_read_lock_bh();
+       spin_lock(&queue->lock);
        if (RPC_IS_PRIORITY(queue))
                task = __rpc_wake_up_next_priority(queue);
        else {
                task_for_first(task, &queue->tasks[0])
                        __rpc_wake_up_task(task);
        }
-       spin_unlock_bh(&queue->lock);
+       spin_unlock(&queue->lock);
+       rcu_read_unlock_bh();
 
        return task;
 }
@@ -504,7 +527,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
        struct rpc_task *task, *next;
        struct list_head *head;
 
-       spin_lock_bh(&queue->lock);
+       rcu_read_lock_bh();
+       spin_lock(&queue->lock);
        head = &queue->tasks[queue->maxpriority];
        for (;;) {
                list_for_each_entry_safe(task, next, head, u.tk_wait.list)
@@ -513,7 +537,8 @@ void rpc_wake_up(struct rpc_wait_queue *queue)
                        break;
                head--;
        }
-       spin_unlock_bh(&queue->lock);
+       spin_unlock(&queue->lock);
+       rcu_read_unlock_bh();
 }
 
 /**
@@ -528,7 +553,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
        struct rpc_task *task, *next;
        struct list_head *head;
 
-       spin_lock_bh(&queue->lock);
+       rcu_read_lock_bh();
+       spin_lock(&queue->lock);
        head = &queue->tasks[queue->maxpriority];
        for (;;) {
                list_for_each_entry_safe(task, next, head, u.tk_wait.list) {
@@ -539,7 +565,8 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status)
                        break;
                head--;
        }
-       spin_unlock_bh(&queue->lock);
+       spin_unlock(&queue->lock);
+       rcu_read_unlock_bh();
 }
 
 static void __rpc_atrun(struct rpc_task *task)
@@ -561,7 +588,9 @@ void rpc_delay(struct rpc_task *task, unsigned long delay)
  */
 static void rpc_prepare_task(struct rpc_task *task)
 {
+       lock_kernel();
        task->tk_ops->rpc_call_prepare(task, task->tk_calldata);
+       unlock_kernel();
 }
 
 /*
@@ -571,7 +600,9 @@ void rpc_exit_task(struct rpc_task *task)
 {
        task->tk_action = NULL;
        if (task->tk_ops->rpc_call_done != NULL) {
+               lock_kernel();
                task->tk_ops->rpc_call_done(task, task->tk_calldata);
+               unlock_kernel();
                if (task->tk_action != NULL) {
                        WARN_ON(RPC_ASSASSINATED(task));
                        /* Always release the RPC slot and buffer memory */
@@ -581,6 +612,15 @@ void rpc_exit_task(struct rpc_task *task)
 }
 EXPORT_SYMBOL(rpc_exit_task);
 
+void rpc_release_calldata(const struct rpc_call_ops *ops, void *calldata)
+{
+       if (ops->rpc_release != NULL) {
+               lock_kernel();
+               ops->rpc_release(calldata);
+               unlock_kernel();
+       }
+}
+
 /*
  * This is the RPC `scheduler' (or rather, the finite state machine).
  */
@@ -615,9 +655,7 @@ static int __rpc_execute(struct rpc_task *task)
                         */
                        save_callback=task->tk_callback;
                        task->tk_callback=NULL;
-                       lock_kernel();
                        save_callback(task);
-                       unlock_kernel();
                }
 
                /*
@@ -628,9 +666,7 @@ static int __rpc_execute(struct rpc_task *task)
                if (!RPC_IS_QUEUED(task)) {
                        if (task->tk_action == NULL)
                                break;
-                       lock_kernel();
                        task->tk_action(task);
-                       unlock_kernel();
                }
 
                /*
@@ -671,8 +707,6 @@ static int __rpc_execute(struct rpc_task *task)
        }
 
        dprintk("RPC: %4d, return %d, status %d\n", task->tk_pid, status, task->tk_status);
-       /* Wake up anyone who is waiting for task completion */
-       rpc_mark_complete_task(task);
        /* Release all resources associated with the task */
        rpc_release_task(task);
        return status;
@@ -786,15 +820,6 @@ void rpc_init_task(struct rpc_task *task, struct rpc_clnt *clnt, int flags, cons
                        task->tk_flags |= RPC_TASK_NOINTR;
        }
 
-#ifdef RPC_DEBUG
-       task->tk_magic = RPC_TASK_MAGIC_ID;
-       task->tk_pid = rpc_task_id++;
-#endif
-       /* Add to global list of all tasks */
-       spin_lock(&rpc_sched_lock);
-       list_add_tail(&task->tk_task, &all_tasks);
-       spin_unlock(&rpc_sched_lock);
-
        BUG_ON(task->tk_ops == NULL);
 
        /* starting timestamp */
@@ -810,8 +835,9 @@ rpc_alloc_task(void)
        return (struct rpc_task *)mempool_alloc(rpc_task_mempool, GFP_NOFS);
 }
 
-static void rpc_free_task(struct rpc_task *task)
+static void rpc_free_task(struct rcu_head *rcu)
 {
+       struct rpc_task *task = container_of(rcu, struct rpc_task, u.tk_rcu);
        dprintk("RPC: %4d freeing task\n", task->tk_pid);
        mempool_free(task, rpc_task_mempool);
 }
@@ -847,16 +873,34 @@ cleanup:
        goto out;
 }
 
-void rpc_release_task(struct rpc_task *task)
+
+void rpc_put_task(struct rpc_task *task)
 {
        const struct rpc_call_ops *tk_ops = task->tk_ops;
        void *calldata = task->tk_calldata;
 
+       if (!atomic_dec_and_test(&task->tk_count))
+               return;
+       /* Release resources */
+       if (task->tk_rqstp)
+               xprt_release(task);
+       if (task->tk_msg.rpc_cred)
+               rpcauth_unbindcred(task);
+       if (task->tk_client) {
+               rpc_release_client(task->tk_client);
+               task->tk_client = NULL;
+       }
+       if (task->tk_flags & RPC_TASK_DYNAMIC)
+               call_rcu_bh(&task->u.tk_rcu, rpc_free_task);
+       rpc_release_calldata(tk_ops, calldata);
+}
+EXPORT_SYMBOL(rpc_put_task);
+
+void rpc_release_task(struct rpc_task *task)
+{
 #ifdef RPC_DEBUG
        BUG_ON(task->tk_magic != RPC_TASK_MAGIC_ID);
 #endif
-       if (!atomic_dec_and_test(&task->tk_count))
-               return;
        dprintk("RPC: %4d release task\n", task->tk_pid);
 
        /* Remove from global task list */
@@ -869,23 +913,13 @@ void rpc_release_task(struct rpc_task *task)
        /* Synchronously delete any running timer */
        rpc_delete_timer(task);
 
-       /* Release resources */
-       if (task->tk_rqstp)
-               xprt_release(task);
-       if (task->tk_msg.rpc_cred)
-               rpcauth_unbindcred(task);
-       if (task->tk_client) {
-               rpc_release_client(task->tk_client);
-               task->tk_client = NULL;
-       }
-
 #ifdef RPC_DEBUG
        task->tk_magic = 0;
 #endif
-       if (task->tk_flags & RPC_TASK_DYNAMIC)
-               rpc_free_task(task);
-       if (tk_ops->rpc_release)
-               tk_ops->rpc_release(calldata);
+       /* Wake up anyone who is waiting for task completion */
+       rpc_mark_complete_task(task);
+
+       rpc_put_task(task);
 }
 
 /**
@@ -902,8 +936,7 @@ struct rpc_task *rpc_run_task(struct rpc_clnt *clnt, int flags,
        struct rpc_task *task;
        task = rpc_new_task(clnt, flags, ops, data);
        if (task == NULL) {
-               if (ops->rpc_release != NULL)
-                       ops->rpc_release(data);
+               rpc_release_calldata(ops, data);
                return ERR_PTR(-ENOMEM);
        }
        atomic_inc(&task->tk_count);
index 2635c543ba067979f4d31b95227117d1451ded73..634885b0c04dcc5e6e4909297b7368e73898086d 100644 (file)
@@ -16,7 +16,7 @@
 
 
 /**
- * skb_read_bits - copy some data bits from skb to internal buffer
+ * xdr_skb_read_bits - copy some data bits from skb to internal buffer
  * @desc: sk_buff copy helper
  * @to: copy destination
  * @len: number of bytes to copy
  * Possibly called several times to iterate over an sk_buff and copy
  * data out of it.
  */
-static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len)
+size_t xdr_skb_read_bits(struct xdr_skb_reader *desc, void *to, size_t len)
 {
        if (len > desc->count)
                len = desc->count;
-       if (skb_copy_bits(desc->skb, desc->offset, to, len))
+       if (unlikely(skb_copy_bits(desc->skb, desc->offset, to, len)))
                return 0;
        desc->count -= len;
        desc->offset += len;
@@ -36,14 +36,14 @@ static size_t skb_read_bits(skb_reader_t *desc, void *to, size_t len)
 }
 
 /**
- * skb_read_and_csum_bits - copy and checksum from skb to buffer
+ * xdr_skb_read_and_csum_bits - copy and checksum from skb to buffer
  * @desc: sk_buff copy helper
  * @to: copy destination
  * @len: number of bytes to copy
  *
  * Same as skb_read_bits, but calculate a checksum at the same time.
  */
-static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
+static size_t xdr_skb_read_and_csum_bits(struct xdr_skb_reader *desc, void *to, size_t len)
 {
        unsigned int pos;
        __wsum csum2;
@@ -66,7 +66,7 @@ static size_t skb_read_and_csum_bits(skb_reader_t *desc, void *to, size_t len)
  * @copy_actor: virtual method for copying data
  *
  */
-ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, skb_reader_t *desc, skb_read_actor_t copy_actor)
+ssize_t xdr_partial_copy_from_skb(struct xdr_buf *xdr, unsigned int base, struct xdr_skb_reader *desc, xdr_skb_read_actor copy_actor)
 {
        struct page     **ppage = xdr->pages;
        unsigned int    len, pglen = xdr->page_len;
@@ -148,7 +148,7 @@ out:
  */
 int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
 {
-       skb_reader_t    desc;
+       struct xdr_skb_reader   desc;
 
        desc.skb = skb;
        desc.offset = sizeof(struct udphdr);
@@ -158,7 +158,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
                goto no_checksum;
 
        desc.csum = csum_partial(skb->data, desc.offset, skb->csum);
-       if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_and_csum_bits) < 0)
+       if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_and_csum_bits) < 0)
                return -1;
        if (desc.offset != skb->len) {
                __wsum csum2;
@@ -173,7 +173,7 @@ int csum_partial_copy_to_xdr(struct xdr_buf *xdr, struct sk_buff *skb)
                netdev_rx_csum_fault(skb->dev);
        return 0;
 no_checksum:
-       if (xdr_partial_copy_from_skb(xdr, 0, &desc, skb_read_bits) < 0)
+       if (xdr_partial_copy_from_skb(xdr, 0, &desc, xdr_skb_read_bits) < 0)
                return -1;
        if (desc.count)
                return -1;
index 192dff5dabcbc76258a378179d7132f5e3845bf8..d85fddeb6388f54e4293b85e1e64da965c2ebd76 100644 (file)
@@ -33,7 +33,6 @@ EXPORT_SYMBOL(rpciod_down);
 EXPORT_SYMBOL(rpciod_up);
 EXPORT_SYMBOL(rpc_new_task);
 EXPORT_SYMBOL(rpc_wake_up_status);
-EXPORT_SYMBOL(rpc_release_task);
 
 /* RPC client functions */
 EXPORT_SYMBOL(rpc_clone_client);
@@ -139,6 +138,8 @@ EXPORT_SYMBOL(nlm_debug);
 extern int register_rpc_pipefs(void);
 extern void unregister_rpc_pipefs(void);
 extern struct cache_detail ip_map_cache;
+extern int init_socket_xprt(void);
+extern void cleanup_socket_xprt(void);
 
 static int __init
 init_sunrpc(void)
@@ -156,6 +157,7 @@ init_sunrpc(void)
        rpc_proc_init();
 #endif
        cache_register(&ip_map_cache);
+       init_socket_xprt();
 out:
        return err;
 }
@@ -163,6 +165,7 @@ out:
 static void __exit
 cleanup_sunrpc(void)
 {
+       cleanup_socket_xprt();
        unregister_rpc_pipefs();
        rpc_destroy_mempool();
        if (cache_unregister(&ip_map_cache))
index d89b048ad6bba57a4964c72a216a93d3a3bff4fd..82b27528d0c45b50b2b25416b84b9f03b1d6ac74 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/sunrpc/types.h>
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/stats.h>
-#include <linux/sunrpc/xprt.h>
 
 /*
  * Declare the debug flags here
@@ -119,11 +118,6 @@ done:
 }
 
 
-static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
-static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
-static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
-static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
-
 static ctl_table debug_table[] = {
        {
                .ctl_name       = CTL_RPCDEBUG,
@@ -157,50 +151,6 @@ static ctl_table debug_table[] = {
                .mode           = 0644,
                .proc_handler   = &proc_dodebug
        }, 
-       {
-               .ctl_name       = CTL_SLOTTABLE_UDP,
-               .procname       = "udp_slot_table_entries",
-               .data           = &xprt_udp_slot_table_entries,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
-               .extra1         = &min_slot_table_size,
-               .extra2         = &max_slot_table_size
-       },
-       {
-               .ctl_name       = CTL_SLOTTABLE_TCP,
-               .procname       = "tcp_slot_table_entries",
-               .data           = &xprt_tcp_slot_table_entries,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
-               .extra1         = &min_slot_table_size,
-               .extra2         = &max_slot_table_size
-       },
-       {
-               .ctl_name       = CTL_MIN_RESVPORT,
-               .procname       = "min_resvport",
-               .data           = &xprt_min_resvport,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
-               .extra1         = &xprt_min_resvport_limit,
-               .extra2         = &xprt_max_resvport_limit
-       },
-       {
-               .ctl_name       = CTL_MAX_RESVPORT,
-               .procname       = "max_resvport",
-               .data           = &xprt_max_resvport,
-               .maxlen         = sizeof(unsigned int),
-               .mode           = 0644,
-               .proc_handler   = &proc_dointvec_minmax,
-               .strategy       = &sysctl_intvec,
-               .extra1         = &xprt_min_resvport_limit,
-               .extra2         = &xprt_max_resvport_limit
-       },
        { .ctl_name = 0 }
 };
 
index 9022eb8b37ed9d05ea2e1bd983747b49b4c0ff7b..a0af250ca319752a6c940ff6be4ef7beebc937f2 100644 (file)
@@ -640,41 +640,30 @@ xdr_buf_from_iov(struct kvec *iov, struct xdr_buf *buf)
        buf->buflen = buf->len = iov->iov_len;
 }
 
-/* Sets subiov to the intersection of iov with the buffer of length len
- * starting base bytes after iov.  Indicates empty intersection by setting
- * length of subiov to zero.  Decrements len by length of subiov, sets base
- * to zero (or decrements it by length of iov if subiov is empty). */
-static void
-iov_subsegment(struct kvec *iov, struct kvec *subiov, int *base, int *len)
-{
-       if (*base > iov->iov_len) {
-               subiov->iov_base = NULL;
-               subiov->iov_len = 0;
-               *base -= iov->iov_len;
-       } else {
-               subiov->iov_base = iov->iov_base + *base;
-               subiov->iov_len = min(*len, (int)iov->iov_len - *base);
-               *base = 0;
-       }
-       *len -= subiov->iov_len; 
-}
-
 /* Sets subbuf to the portion of buf of length len beginning base bytes
  * from the start of buf. Returns -1 if base of length are out of bounds. */
 int
 xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
-                       int base, int len)
+                       unsigned int base, unsigned int len)
 {
-       int i;
-
        subbuf->buflen = subbuf->len = len;
-       iov_subsegment(buf->head, subbuf->head, &base, &len);
+       if (base < buf->head[0].iov_len) {
+               subbuf->head[0].iov_base = buf->head[0].iov_base + base;
+               subbuf->head[0].iov_len = min_t(unsigned int, len,
+                                               buf->head[0].iov_len - base);
+               len -= subbuf->head[0].iov_len;
+               base = 0;
+       } else {
+               subbuf->head[0].iov_base = NULL;
+               subbuf->head[0].iov_len = 0;
+               base -= buf->head[0].iov_len;
+       }
 
        if (base < buf->page_len) {
-               i = (base + buf->page_base) >> PAGE_CACHE_SHIFT;
-               subbuf->pages = &buf->pages[i];
-               subbuf->page_base = (base + buf->page_base) & ~PAGE_CACHE_MASK;
-               subbuf->page_len = min((int)buf->page_len - base, len);
+               subbuf->page_len = min(buf->page_len - base, len);
+               base += buf->page_base;
+               subbuf->page_base = base & ~PAGE_CACHE_MASK;
+               subbuf->pages = &buf->pages[base >> PAGE_CACHE_SHIFT];
                len -= subbuf->page_len;
                base = 0;
        } else {
@@ -682,66 +671,85 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf,
                subbuf->page_len = 0;
        }
 
-       iov_subsegment(buf->tail, subbuf->tail, &base, &len);
+       if (base < buf->tail[0].iov_len) {
+               subbuf->tail[0].iov_base = buf->tail[0].iov_base + base;
+               subbuf->tail[0].iov_len = min_t(unsigned int, len,
+                                               buf->tail[0].iov_len - base);
+               len -= subbuf->tail[0].iov_len;
+               base = 0;
+       } else {
+               subbuf->tail[0].iov_base = NULL;
+               subbuf->tail[0].iov_len = 0;
+               base -= buf->tail[0].iov_len;
+       }
+
        if (base || len)
                return -1;
        return 0;
 }
 
-/* obj is assumed to point to allocated memory of size at least len: */
-int
-read_bytes_from_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
+static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
 {
-       struct xdr_buf subbuf;
-       int this_len;
-       int status;
+       unsigned int this_len;
 
-       status = xdr_buf_subsegment(buf, &subbuf, base, len);
-       if (status)
-               goto out;
-       this_len = min(len, (int)subbuf.head[0].iov_len);
-       memcpy(obj, subbuf.head[0].iov_base, this_len);
+       this_len = min_t(unsigned int, len, subbuf->head[0].iov_len);
+       memcpy(obj, subbuf->head[0].iov_base, this_len);
        len -= this_len;
        obj += this_len;
-       this_len = min(len, (int)subbuf.page_len);
+       this_len = min_t(unsigned int, len, subbuf->page_len);
        if (this_len)
-               _copy_from_pages(obj, subbuf.pages, subbuf.page_base, this_len);
+               _copy_from_pages(obj, subbuf->pages, subbuf->page_base, this_len);
        len -= this_len;
        obj += this_len;
-       this_len = min(len, (int)subbuf.tail[0].iov_len);
-       memcpy(obj, subbuf.tail[0].iov_base, this_len);
-out:
-       return status;
+       this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len);
+       memcpy(obj, subbuf->tail[0].iov_base, this_len);
 }
 
 /* obj is assumed to point to allocated memory of size at least len: */
-int
-write_bytes_to_xdr_buf(struct xdr_buf *buf, int base, void *obj, int len)
+int read_bytes_from_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len)
 {
        struct xdr_buf subbuf;
-       int this_len;
        int status;
 
        status = xdr_buf_subsegment(buf, &subbuf, base, len);
-       if (status)
-               goto out;
-       this_len = min(len, (int)subbuf.head[0].iov_len);
-       memcpy(subbuf.head[0].iov_base, obj, this_len);
+       if (status != 0)
+               return status;
+       __read_bytes_from_xdr_buf(&subbuf, obj, len);
+       return 0;
+}
+
+static void __write_bytes_to_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len)
+{
+       unsigned int this_len;
+
+       this_len = min_t(unsigned int, len, subbuf->head[0].iov_len);
+       memcpy(subbuf->head[0].iov_base, obj, this_len);
        len -= this_len;
        obj += this_len;
-       this_len = min(len, (int)subbuf.page_len);
+       this_len = min_t(unsigned int, len, subbuf->page_len);
        if (this_len)
-               _copy_to_pages(subbuf.pages, subbuf.page_base, obj, this_len);
+               _copy_to_pages(subbuf->pages, subbuf->page_base, obj, this_len);
        len -= this_len;
        obj += this_len;
-       this_len = min(len, (int)subbuf.tail[0].iov_len);
-       memcpy(subbuf.tail[0].iov_base, obj, this_len);
-out:
-       return status;
+       this_len = min_t(unsigned int, len, subbuf->tail[0].iov_len);
+       memcpy(subbuf->tail[0].iov_base, obj, this_len);
+}
+
+/* obj is assumed to point to allocated memory of size at least len: */
+int write_bytes_to_xdr_buf(struct xdr_buf *buf, unsigned int base, void *obj, unsigned int len)
+{
+       struct xdr_buf subbuf;
+       int status;
+
+       status = xdr_buf_subsegment(buf, &subbuf, base, len);
+       if (status != 0)
+               return status;
+       __write_bytes_to_xdr_buf(&subbuf, obj, len);
+       return 0;
 }
 
 int
-xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
+xdr_decode_word(struct xdr_buf *buf, unsigned int base, u32 *obj)
 {
        __be32  raw;
        int     status;
@@ -754,7 +762,7 @@ xdr_decode_word(struct xdr_buf *buf, int base, u32 *obj)
 }
 
 int
-xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
+xdr_encode_word(struct xdr_buf *buf, unsigned int base, u32 obj)
 {
        __be32  raw = htonl(obj);
 
@@ -765,44 +773,37 @@ xdr_encode_word(struct xdr_buf *buf, int base, u32 obj)
  * entirely in the head or the tail, set object to point to it; otherwise
  * try to find space for it at the end of the tail, copy it there, and
  * set obj to point to it. */
-int
-xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, int offset)
+int xdr_buf_read_netobj(struct xdr_buf *buf, struct xdr_netobj *obj, unsigned int offset)
 {
-       u32     tail_offset = buf->head[0].iov_len + buf->page_len;
-       u32     obj_end_offset;
+       struct xdr_buf subbuf;
 
        if (xdr_decode_word(buf, offset, &obj->len))
-               goto out;
-       obj_end_offset = offset + 4 + obj->len;
-
-       if (obj_end_offset <= buf->head[0].iov_len) {
-               /* The obj is contained entirely in the head: */
-               obj->data = buf->head[0].iov_base + offset + 4;
-       } else if (offset + 4 >= tail_offset) {
-               if (obj_end_offset - tail_offset
-                               > buf->tail[0].iov_len)
-                       goto out;
-               /* The obj is contained entirely in the tail: */
-               obj->data = buf->tail[0].iov_base
-                       + offset - tail_offset + 4;
-       } else {
-               /* use end of tail as storage for obj:
-                * (We don't copy to the beginning because then we'd have
-                * to worry about doing a potentially overlapping copy.
-                * This assumes the object is at most half the length of the
-                * tail.) */
-               if (obj->len > buf->tail[0].iov_len)
-                       goto out;
-               obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len - 
-                               obj->len;
-               if (read_bytes_from_xdr_buf(buf, offset + 4,
-                                       obj->data, obj->len))
-                       goto out;
+               return -EFAULT;
+       if (xdr_buf_subsegment(buf, &subbuf, offset + 4, obj->len))
+               return -EFAULT;
 
-       }
+       /* Is the obj contained entirely in the head? */
+       obj->data = subbuf.head[0].iov_base;
+       if (subbuf.head[0].iov_len == obj->len)
+               return 0;
+       /* ..or is the obj contained entirely in the tail? */
+       obj->data = subbuf.tail[0].iov_base;
+       if (subbuf.tail[0].iov_len == obj->len)
+               return 0;
+
+       /* use end of tail as storage for obj:
+        * (We don't copy to the beginning because then we'd have
+        * to worry about doing a potentially overlapping copy.
+        * This assumes the object is at most half the length of the
+        * tail.) */
+       if (obj->len > buf->buflen - buf->len)
+               return -ENOMEM;
+       if (buf->tail[0].iov_len != 0)
+               obj->data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+       else
+               obj->data = buf->head[0].iov_base + buf->head[0].iov_len;
+       __read_bytes_from_xdr_buf(&subbuf, obj->data, obj->len);
        return 0;
-out:
-       return -1;
 }
 
 /* Returns 0 on success, or else a negative error code. */
@@ -1020,3 +1021,71 @@ xdr_encode_array2(struct xdr_buf *buf, unsigned int base,
 
        return xdr_xcode_array2(buf, base, desc, 1);
 }
+
+int
+xdr_process_buf(struct xdr_buf *buf, unsigned int offset, unsigned int len,
+                int (*actor)(struct scatterlist *, void *), void *data)
+{
+       int i, ret = 0;
+       unsigned page_len, thislen, page_offset;
+       struct scatterlist      sg[1];
+
+       if (offset >= buf->head[0].iov_len) {
+               offset -= buf->head[0].iov_len;
+       } else {
+               thislen = buf->head[0].iov_len - offset;
+               if (thislen > len)
+                       thislen = len;
+               sg_set_buf(sg, buf->head[0].iov_base + offset, thislen);
+               ret = actor(sg, data);
+               if (ret)
+                       goto out;
+               offset = 0;
+               len -= thislen;
+       }
+       if (len == 0)
+               goto out;
+
+       if (offset >= buf->page_len) {
+               offset -= buf->page_len;
+       } else {
+               page_len = buf->page_len - offset;
+               if (page_len > len)
+                       page_len = len;
+               len -= page_len;
+               page_offset = (offset + buf->page_base) & (PAGE_CACHE_SIZE - 1);
+               i = (offset + buf->page_base) >> PAGE_CACHE_SHIFT;
+               thislen = PAGE_CACHE_SIZE - page_offset;
+               do {
+                       if (thislen > page_len)
+                               thislen = page_len;
+                       sg->page = buf->pages[i];
+                       sg->offset = page_offset;
+                       sg->length = thislen;
+                       ret = actor(sg, data);
+                       if (ret)
+                               goto out;
+                       page_len -= thislen;
+                       i++;
+                       page_offset = 0;
+                       thislen = PAGE_CACHE_SIZE;
+               } while (page_len != 0);
+               offset = 0;
+       }
+       if (len == 0)
+               goto out;
+       if (offset < buf->tail[0].iov_len) {
+               thislen = buf->tail[0].iov_len - offset;
+               if (thislen > len)
+                       thislen = len;
+               sg_set_buf(sg, buf->tail[0].iov_base + offset, thislen);
+               ret = actor(sg, data);
+               len -= thislen;
+       }
+       if (len != 0)
+               ret = -EINVAL;
+out:
+       return ret;
+}
+EXPORT_SYMBOL(xdr_process_buf);
+
index 4f9a5d9791fb11fabeff5d1eb48b191ac070d3d9..7a3999f0a4a2aabc7e156bdb459a68a5cab7d6a7 100644 (file)
@@ -459,7 +459,6 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
                if (to->to_maxval && req->rq_timeout >= to->to_maxval)
                        req->rq_timeout = to->to_maxval;
                req->rq_retries++;
-               pprintk("RPC: %lu retrans\n", jiffies);
        } else {
                req->rq_timeout = to->to_initval;
                req->rq_retries = 0;
@@ -468,7 +467,6 @@ int xprt_adjust_timeout(struct rpc_rqst *req)
                spin_lock_bh(&xprt->transport_lock);
                rpc_init_rtt(req->rq_task->tk_client->cl_rtt, to->to_initval);
                spin_unlock_bh(&xprt->transport_lock);
-               pprintk("RPC: %lu timeout\n", jiffies);
                status = -ETIMEDOUT;
        }
 
@@ -892,39 +890,25 @@ void xprt_set_timeout(struct rpc_timeout *to, unsigned int retr, unsigned long i
  */
 struct rpc_xprt *xprt_create_transport(int proto, struct sockaddr *ap, size_t size, struct rpc_timeout *to)
 {
-       int result;
        struct rpc_xprt *xprt;
        struct rpc_rqst *req;
 
-       if ((xprt = kzalloc(sizeof(struct rpc_xprt), GFP_KERNEL)) == NULL) {
-               dprintk("RPC:      xprt_create_transport: no memory\n");
-               return ERR_PTR(-ENOMEM);
-       }
-       if (size <= sizeof(xprt->addr)) {
-               memcpy(&xprt->addr, ap, size);
-               xprt->addrlen = size;
-       } else {
-               kfree(xprt);
-               dprintk("RPC:      xprt_create_transport: address too large\n");
-               return ERR_PTR(-EBADF);
-       }
-
        switch (proto) {
        case IPPROTO_UDP:
-               result = xs_setup_udp(xprt, to);
+               xprt = xs_setup_udp(ap, size, to);
                break;
        case IPPROTO_TCP:
-               result = xs_setup_tcp(xprt, to);
+               xprt = xs_setup_tcp(ap, size, to);
                break;
        default:
                printk(KERN_ERR "RPC: unrecognized transport protocol: %d\n",
                                proto);
                return ERR_PTR(-EIO);
        }
-       if (result) {
-               kfree(xprt);
-               dprintk("RPC:      xprt_create_transport: failed, %d\n", result);
-               return ERR_PTR(result);
+       if (IS_ERR(xprt)) {
+               dprintk("RPC:      xprt_create_transport: failed, %ld\n",
+                               -PTR_ERR(xprt));
+               return xprt;
        }
 
        kref_init(&xprt->kref);
@@ -970,8 +954,11 @@ static void xprt_destroy(struct kref *kref)
        dprintk("RPC:      destroying transport %p\n", xprt);
        xprt->shutdown = 1;
        del_timer_sync(&xprt->timer);
+
+       /*
+        * Tear down transport state and free the rpc_xprt
+        */
        xprt->ops->destroy(xprt);
-       kfree(xprt);
 }
 
 /**
index 2fc4a3123261b1b9c37ff3ef122da2d10e817129..49cabffd7fdb87a884a9416b9a46b5848d509506 100644 (file)
@@ -45,6 +45,92 @@ unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE;
 unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
 unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
 
+/*
+ * We can register our own files under /proc/sys/sunrpc by
+ * calling register_sysctl_table() again.  The files in that
+ * directory become the union of all files registered there.
+ *
+ * We simply need to make sure that we don't collide with
+ * someone else's file names!
+ */
+
+#ifdef RPC_DEBUG
+
+static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
+static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
+static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
+static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
+
+static struct ctl_table_header *sunrpc_table_header;
+
+/*
+ * FIXME: changing the UDP slot table size should also resize the UDP
+ *        socket buffers for existing UDP transports
+ */
+static ctl_table xs_tunables_table[] = {
+       {
+               .ctl_name       = CTL_SLOTTABLE_UDP,
+               .procname       = "udp_slot_table_entries",
+               .data           = &xprt_udp_slot_table_entries,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &min_slot_table_size,
+               .extra2         = &max_slot_table_size
+       },
+       {
+               .ctl_name       = CTL_SLOTTABLE_TCP,
+               .procname       = "tcp_slot_table_entries",
+               .data           = &xprt_tcp_slot_table_entries,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &min_slot_table_size,
+               .extra2         = &max_slot_table_size
+       },
+       {
+               .ctl_name       = CTL_MIN_RESVPORT,
+               .procname       = "min_resvport",
+               .data           = &xprt_min_resvport,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &xprt_min_resvport_limit,
+               .extra2         = &xprt_max_resvport_limit
+       },
+       {
+               .ctl_name       = CTL_MAX_RESVPORT,
+               .procname       = "max_resvport",
+               .data           = &xprt_max_resvport,
+               .maxlen         = sizeof(unsigned int),
+               .mode           = 0644,
+               .proc_handler   = &proc_dointvec_minmax,
+               .strategy       = &sysctl_intvec,
+               .extra1         = &xprt_min_resvport_limit,
+               .extra2         = &xprt_max_resvport_limit
+       },
+       {
+               .ctl_name = 0,
+       },
+};
+
+static ctl_table sunrpc_table[] = {
+       {
+               .ctl_name       = CTL_SUNRPC,
+               .procname       = "sunrpc",
+               .mode           = 0555,
+               .child          = xs_tunables_table
+       },
+       {
+               .ctl_name = 0,
+       },
+};
+
+#endif
+
 /*
  * How many times to try sending a request on a socket before waiting
  * for the socket buffer to clear.
@@ -125,6 +211,55 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
 }
 #endif
 
+struct sock_xprt {
+       struct rpc_xprt         xprt;
+
+       /*
+        * Network layer
+        */
+       struct socket *         sock;
+       struct sock *           inet;
+
+       /*
+        * State of TCP reply receive
+        */
+       __be32                  tcp_fraghdr,
+                               tcp_xid;
+
+       u32                     tcp_offset,
+                               tcp_reclen;
+
+       unsigned long           tcp_copied,
+                               tcp_flags;
+
+       /*
+        * Connection of transports
+        */
+       struct delayed_work     connect_worker;
+       unsigned short          port;
+
+       /*
+        * UDP socket buffer size parameters
+        */
+       size_t                  rcvsize,
+                               sndsize;
+
+       /*
+        * Saved socket callback addresses
+        */
+       void                    (*old_data_ready)(struct sock *, int);
+       void                    (*old_state_change)(struct sock *);
+       void                    (*old_write_space)(struct sock *);
+};
+
+/*
+ * TCP receive state flags
+ */
+#define TCP_RCV_LAST_FRAG      (1UL << 0)
+#define TCP_RCV_COPY_FRAGHDR   (1UL << 1)
+#define TCP_RCV_COPY_XID       (1UL << 2)
+#define TCP_RCV_COPY_DATA      (1UL << 3)
+
 static void xs_format_peer_addresses(struct rpc_xprt *xprt)
 {
        struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
@@ -168,37 +303,52 @@ static void xs_free_peer_addresses(struct rpc_xprt *xprt)
 
 #define XS_SENDMSG_FLAGS       (MSG_DONTWAIT | MSG_NOSIGNAL)
 
-static inline int xs_send_head(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, unsigned int len)
+static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen, struct kvec *vec, unsigned int base, int more)
 {
-       struct kvec iov = {
-               .iov_base       = xdr->head[0].iov_base + base,
-               .iov_len        = len - base,
-       };
        struct msghdr msg = {
                .msg_name       = addr,
                .msg_namelen    = addrlen,
-               .msg_flags      = XS_SENDMSG_FLAGS,
+               .msg_flags      = XS_SENDMSG_FLAGS | (more ? MSG_MORE : 0),
+       };
+       struct kvec iov = {
+               .iov_base       = vec->iov_base + base,
+               .iov_len        = vec->iov_len - base,
        };
 
-       if (xdr->len > len)
-               msg.msg_flags |= MSG_MORE;
-
-       if (likely(iov.iov_len))
+       if (iov.iov_len != 0)
                return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
        return kernel_sendmsg(sock, &msg, NULL, 0, 0);
 }
 
-static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int base, unsigned int len)
+static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more)
 {
-       struct kvec iov = {
-               .iov_base       = xdr->tail[0].iov_base + base,
-               .iov_len        = len - base,
-       };
-       struct msghdr msg = {
-               .msg_flags      = XS_SENDMSG_FLAGS,
-       };
+       struct page **ppage;
+       unsigned int remainder;
+       int err, sent = 0;
+
+       remainder = xdr->page_len - base;
+       base += xdr->page_base;
+       ppage = xdr->pages + (base >> PAGE_SHIFT);
+       base &= ~PAGE_MASK;
+       for(;;) {
+               unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
+               int flags = XS_SENDMSG_FLAGS;
 
-       return kernel_sendmsg(sock, &msg, &iov, 1, iov.iov_len);
+               remainder -= len;
+               if (remainder != 0 || more)
+                       flags |= MSG_MORE;
+               err = sock->ops->sendpage(sock, *ppage, base, len, flags);
+               if (remainder == 0 || err != len)
+                       break;
+               sent += err;
+               ppage++;
+               base = 0;
+       }
+       if (sent == 0)
+               return err;
+       if (err > 0)
+               sent += err;
+       return sent;
 }
 
 /**
@@ -210,76 +360,51 @@ static int xs_send_tail(struct socket *sock, struct xdr_buf *xdr, unsigned int b
  * @base: starting position in the buffer
  *
  */
-static inline int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
 {
-       struct page **ppage = xdr->pages;
-       unsigned int len, pglen = xdr->page_len;
-       int err, ret = 0;
+       unsigned int remainder = xdr->len - base;
+       int err, sent = 0;
 
        if (unlikely(!sock))
                return -ENOTCONN;
 
        clear_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+       if (base != 0) {
+               addr = NULL;
+               addrlen = 0;
+       }
 
-       len = xdr->head[0].iov_len;
-       if (base < len || (addr != NULL && base == 0)) {
-               err = xs_send_head(sock, addr, addrlen, xdr, base, len);
-               if (ret == 0)
-                       ret = err;
-               else if (err > 0)
-                       ret += err;
-               if (err != (len - base))
+       if (base < xdr->head[0].iov_len || addr != NULL) {
+               unsigned int len = xdr->head[0].iov_len - base;
+               remainder -= len;
+               err = xs_send_kvec(sock, addr, addrlen, &xdr->head[0], base, remainder != 0);
+               if (remainder == 0 || err != len)
                        goto out;
+               sent += err;
                base = 0;
        } else
-               base -= len;
-
-       if (unlikely(pglen == 0))
-               goto copy_tail;
-       if (unlikely(base >= pglen)) {
-               base -= pglen;
-               goto copy_tail;
-       }
-       if (base || xdr->page_base) {
-               pglen -= base;
-               base += xdr->page_base;
-               ppage += base >> PAGE_CACHE_SHIFT;
-               base &= ~PAGE_CACHE_MASK;
-       }
-
-       do {
-               int flags = XS_SENDMSG_FLAGS;
-
-               len = PAGE_CACHE_SIZE;
-               if (base)
-                       len -= base;
-               if (pglen < len)
-                       len = pglen;
-
-               if (pglen != len || xdr->tail[0].iov_len != 0)
-                       flags |= MSG_MORE;
+               base -= xdr->head[0].iov_len;
 
-               err = kernel_sendpage(sock, *ppage, base, len, flags);
-               if (ret == 0)
-                       ret = err;
-               else if (err > 0)
-                       ret += err;
-               if (err != len)
+       if (base < xdr->page_len) {
+               unsigned int len = xdr->page_len - base;
+               remainder -= len;
+               err = xs_send_pagedata(sock, xdr, base, remainder != 0);
+               if (remainder == 0 || err != len)
                        goto out;
+               sent += err;
                base = 0;
-               ppage++;
-       } while ((pglen -= len) != 0);
-copy_tail:
-       len = xdr->tail[0].iov_len;
-       if (base < len) {
-               err = xs_send_tail(sock, xdr, base, len);
-               if (ret == 0)
-                       ret = err;
-               else if (err > 0)
-                       ret += err;
-       }
+       } else
+               base -= xdr->page_len;
+
+       if (base >= xdr->tail[0].iov_len)
+               return sent;
+       err = xs_send_kvec(sock, NULL, 0, &xdr->tail[0], base, 0);
 out:
-       return ret;
+       if (sent == 0)
+               return err;
+       if (err > 0)
+               sent += err;
+       return sent;
 }
 
 /**
@@ -291,19 +416,20 @@ static void xs_nospace(struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = req->rq_xprt;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
        dprintk("RPC: %4d xmit incomplete (%u left of %u)\n",
                        task->tk_pid, req->rq_slen - req->rq_bytes_sent,
                        req->rq_slen);
 
-       if (test_bit(SOCK_ASYNC_NOSPACE, &xprt->sock->flags)) {
+       if (test_bit(SOCK_ASYNC_NOSPACE, &transport->sock->flags)) {
                /* Protect against races with write_space */
                spin_lock_bh(&xprt->transport_lock);
 
                /* Don't race with disconnect */
                if (!xprt_connected(xprt))
                        task->tk_status = -ENOTCONN;
-               else if (test_bit(SOCK_NOSPACE, &xprt->sock->flags))
+               else if (test_bit(SOCK_NOSPACE, &transport->sock->flags))
                        xprt_wait_for_buffer_space(task);
 
                spin_unlock_bh(&xprt->transport_lock);
@@ -327,6 +453,7 @@ static int xs_udp_send_request(struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = req->rq_xprt;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct xdr_buf *xdr = &req->rq_snd_buf;
        int status;
 
@@ -335,8 +462,10 @@ static int xs_udp_send_request(struct rpc_task *task)
                                req->rq_svec->iov_len);
 
        req->rq_xtime = jiffies;
-       status = xs_sendpages(xprt->sock, (struct sockaddr *) &xprt->addr,
-                               xprt->addrlen, xdr, req->rq_bytes_sent);
+       status = xs_sendpages(transport->sock,
+                             (struct sockaddr *) &xprt->addr,
+                             xprt->addrlen, xdr,
+                             req->rq_bytes_sent);
 
        dprintk("RPC:      xs_udp_send_request(%u) = %d\n",
                        xdr->len - req->rq_bytes_sent, status);
@@ -392,6 +521,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
 {
        struct rpc_rqst *req = task->tk_rqstp;
        struct rpc_xprt *xprt = req->rq_xprt;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct xdr_buf *xdr = &req->rq_snd_buf;
        int status, retry = 0;
 
@@ -406,8 +536,8 @@ static int xs_tcp_send_request(struct rpc_task *task)
         * called sendmsg(). */
        while (1) {
                req->rq_xtime = jiffies;
-               status = xs_sendpages(xprt->sock, NULL, 0, xdr,
-                                               req->rq_bytes_sent);
+               status = xs_sendpages(transport->sock,
+                                       NULL, 0, xdr, req->rq_bytes_sent);
 
                dprintk("RPC:      xs_tcp_send_request(%u) = %d\n",
                                xdr->len - req->rq_bytes_sent, status);
@@ -485,8 +615,9 @@ out_release:
  */
 static void xs_close(struct rpc_xprt *xprt)
 {
-       struct socket *sock = xprt->sock;
-       struct sock *sk = xprt->inet;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       struct socket *sock = transport->sock;
+       struct sock *sk = transport->inet;
 
        if (!sk)
                goto clear_close_wait;
@@ -494,13 +625,13 @@ static void xs_close(struct rpc_xprt *xprt)
        dprintk("RPC:      xs_close xprt %p\n", xprt);
 
        write_lock_bh(&sk->sk_callback_lock);
-       xprt->inet = NULL;
-       xprt->sock = NULL;
+       transport->inet = NULL;
+       transport->sock = NULL;
 
        sk->sk_user_data = NULL;
-       sk->sk_data_ready = xprt->old_data_ready;
-       sk->sk_state_change = xprt->old_state_change;
-       sk->sk_write_space = xprt->old_write_space;
+       sk->sk_data_ready = transport->old_data_ready;
+       sk->sk_state_change = transport->old_state_change;
+       sk->sk_write_space = transport->old_write_space;
        write_unlock_bh(&sk->sk_callback_lock);
 
        sk->sk_no_check = 0;
@@ -519,15 +650,18 @@ clear_close_wait:
  */
 static void xs_destroy(struct rpc_xprt *xprt)
 {
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
        dprintk("RPC:      xs_destroy xprt %p\n", xprt);
 
-       cancel_delayed_work(&xprt->connect_worker);
+       cancel_delayed_work(&transport->connect_worker);
        flush_scheduled_work();
 
        xprt_disconnect(xprt);
        xs_close(xprt);
        xs_free_peer_addresses(xprt);
        kfree(xprt->slot);
+       kfree(xprt);
 }
 
 static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -603,91 +737,75 @@ static void xs_udp_data_ready(struct sock *sk, int len)
        read_unlock(&sk->sk_callback_lock);
 }
 
-static inline size_t xs_tcp_copy_data(skb_reader_t *desc, void *p, size_t len)
-{
-       if (len > desc->count)
-               len = desc->count;
-       if (skb_copy_bits(desc->skb, desc->offset, p, len)) {
-               dprintk("RPC:      failed to copy %zu bytes from skb. %zu bytes remain\n",
-                               len, desc->count);
-               return 0;
-       }
-       desc->offset += len;
-       desc->count -= len;
-       dprintk("RPC:      copied %zu bytes from skb. %zu bytes remain\n",
-                       len, desc->count);
-       return len;
-}
-
-static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, skb_reader_t *desc)
+static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
 {
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        size_t len, used;
        char *p;
 
-       p = ((char *) &xprt->tcp_recm) + xprt->tcp_offset;
-       len = sizeof(xprt->tcp_recm) - xprt->tcp_offset;
-       used = xs_tcp_copy_data(desc, p, len);
-       xprt->tcp_offset += used;
+       p = ((char *) &transport->tcp_fraghdr) + transport->tcp_offset;
+       len = sizeof(transport->tcp_fraghdr) - transport->tcp_offset;
+       used = xdr_skb_read_bits(desc, p, len);
+       transport->tcp_offset += used;
        if (used != len)
                return;
 
-       xprt->tcp_reclen = ntohl(xprt->tcp_recm);
-       if (xprt->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
-               xprt->tcp_flags |= XPRT_LAST_FRAG;
+       transport->tcp_reclen = ntohl(transport->tcp_fraghdr);
+       if (transport->tcp_reclen & RPC_LAST_STREAM_FRAGMENT)
+               transport->tcp_flags |= TCP_RCV_LAST_FRAG;
        else
-               xprt->tcp_flags &= ~XPRT_LAST_FRAG;
-       xprt->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
+               transport->tcp_flags &= ~TCP_RCV_LAST_FRAG;
+       transport->tcp_reclen &= RPC_FRAGMENT_SIZE_MASK;
 
-       xprt->tcp_flags &= ~XPRT_COPY_RECM;
-       xprt->tcp_offset = 0;
+       transport->tcp_flags &= ~TCP_RCV_COPY_FRAGHDR;
+       transport->tcp_offset = 0;
 
        /* Sanity check of the record length */
-       if (unlikely(xprt->tcp_reclen < 4)) {
+       if (unlikely(transport->tcp_reclen < 4)) {
                dprintk("RPC:      invalid TCP record fragment length\n");
                xprt_disconnect(xprt);
                return;
        }
        dprintk("RPC:      reading TCP record fragment of length %d\n",
-                       xprt->tcp_reclen);
+                       transport->tcp_reclen);
 }
 
-static void xs_tcp_check_recm(struct rpc_xprt *xprt)
+static void xs_tcp_check_fraghdr(struct sock_xprt *transport)
 {
-       dprintk("RPC:      xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u, tcp_flags = %lx\n",
-                       xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen, xprt->tcp_flags);
-       if (xprt->tcp_offset == xprt->tcp_reclen) {
-               xprt->tcp_flags |= XPRT_COPY_RECM;
-               xprt->tcp_offset = 0;
-               if (xprt->tcp_flags & XPRT_LAST_FRAG) {
-                       xprt->tcp_flags &= ~XPRT_COPY_DATA;
-                       xprt->tcp_flags |= XPRT_COPY_XID;
-                       xprt->tcp_copied = 0;
+       if (transport->tcp_offset == transport->tcp_reclen) {
+               transport->tcp_flags |= TCP_RCV_COPY_FRAGHDR;
+               transport->tcp_offset = 0;
+               if (transport->tcp_flags & TCP_RCV_LAST_FRAG) {
+                       transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
+                       transport->tcp_flags |= TCP_RCV_COPY_XID;
+                       transport->tcp_copied = 0;
                }
        }
 }
 
-static inline void xs_tcp_read_xid(struct rpc_xprt *xprt, skb_reader_t *desc)
+static inline void xs_tcp_read_xid(struct sock_xprt *transport, struct xdr_skb_reader *desc)
 {
        size_t len, used;
        char *p;
 
-       len = sizeof(xprt->tcp_xid) - xprt->tcp_offset;
+       len = sizeof(transport->tcp_xid) - transport->tcp_offset;
        dprintk("RPC:      reading XID (%Zu bytes)\n", len);
-       p = ((char *) &xprt->tcp_xid) + xprt->tcp_offset;
-       used = xs_tcp_copy_data(desc, p, len);
-       xprt->tcp_offset += used;
+       p = ((char *) &transport->tcp_xid) + transport->tcp_offset;
+       used = xdr_skb_read_bits(desc, p, len);
+       transport->tcp_offset += used;
        if (used != len)
                return;
-       xprt->tcp_flags &= ~XPRT_COPY_XID;
-       xprt->tcp_flags |= XPRT_COPY_DATA;
-       xprt->tcp_copied = 4;
+       transport->tcp_flags &= ~TCP_RCV_COPY_XID;
+       transport->tcp_flags |= TCP_RCV_COPY_DATA;
+       transport->tcp_copied = 4;
        dprintk("RPC:      reading reply for XID %08x\n",
-                                               ntohl(xprt->tcp_xid));
-       xs_tcp_check_recm(xprt);
+                       ntohl(transport->tcp_xid));
+       xs_tcp_check_fraghdr(transport);
 }
 
-static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc)
+static inline void xs_tcp_read_request(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
 {
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct rpc_rqst *req;
        struct xdr_buf *rcvbuf;
        size_t len;
@@ -695,116 +813,118 @@ static inline void xs_tcp_read_request(struct rpc_xprt *xprt, skb_reader_t *desc
 
        /* Find and lock the request corresponding to this xid */
        spin_lock(&xprt->transport_lock);
-       req = xprt_lookup_rqst(xprt, xprt->tcp_xid);
+       req = xprt_lookup_rqst(xprt, transport->tcp_xid);
        if (!req) {
-               xprt->tcp_flags &= ~XPRT_COPY_DATA;
+               transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
                dprintk("RPC:      XID %08x request not found!\n",
-                               ntohl(xprt->tcp_xid));
+                               ntohl(transport->tcp_xid));
                spin_unlock(&xprt->transport_lock);
                return;
        }
 
        rcvbuf = &req->rq_private_buf;
        len = desc->count;
-       if (len > xprt->tcp_reclen - xprt->tcp_offset) {
-               skb_reader_t my_desc;
+       if (len > transport->tcp_reclen - transport->tcp_offset) {
+               struct xdr_skb_reader my_desc;
 
-               len = xprt->tcp_reclen - xprt->tcp_offset;
+               len = transport->tcp_reclen - transport->tcp_offset;
                memcpy(&my_desc, desc, sizeof(my_desc));
                my_desc.count = len;
-               r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
-                                         &my_desc, xs_tcp_copy_data);
+               r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
+                                         &my_desc, xdr_skb_read_bits);
                desc->count -= r;
                desc->offset += r;
        } else
-               r = xdr_partial_copy_from_skb(rcvbuf, xprt->tcp_copied,
-                                         desc, xs_tcp_copy_data);
+               r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied,
+                                         desc, xdr_skb_read_bits);
 
        if (r > 0) {
-               xprt->tcp_copied += r;
-               xprt->tcp_offset += r;
+               transport->tcp_copied += r;
+               transport->tcp_offset += r;
        }
        if (r != len) {
                /* Error when copying to the receive buffer,
                 * usually because we weren't able to allocate
                 * additional buffer pages. All we can do now
-                * is turn off XPRT_COPY_DATA, so the request
+                * is turn off TCP_RCV_COPY_DATA, so the request
                 * will not receive any additional updates,
                 * and time out.
                 * Any remaining data from this record will
                 * be discarded.
                 */
-               xprt->tcp_flags &= ~XPRT_COPY_DATA;
+               transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
                dprintk("RPC:      XID %08x truncated request\n",
-                               ntohl(xprt->tcp_xid));
+                               ntohl(transport->tcp_xid));
                dprintk("RPC:      xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
-                               xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
+                               xprt, transport->tcp_copied, transport->tcp_offset,
+                                       transport->tcp_reclen);
                goto out;
        }
 
        dprintk("RPC:      XID %08x read %Zd bytes\n",
-                       ntohl(xprt->tcp_xid), r);
+                       ntohl(transport->tcp_xid), r);
        dprintk("RPC:      xprt = %p, tcp_copied = %lu, tcp_offset = %u, tcp_reclen = %u\n",
-                       xprt, xprt->tcp_copied, xprt->tcp_offset, xprt->tcp_reclen);
-
-       if (xprt->tcp_copied == req->rq_private_buf.buflen)
-               xprt->tcp_flags &= ~XPRT_COPY_DATA;
-       else if (xprt->tcp_offset == xprt->tcp_reclen) {
-               if (xprt->tcp_flags & XPRT_LAST_FRAG)
-                       xprt->tcp_flags &= ~XPRT_COPY_DATA;
+                       xprt, transport->tcp_copied, transport->tcp_offset,
+                               transport->tcp_reclen);
+
+       if (transport->tcp_copied == req->rq_private_buf.buflen)
+               transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
+       else if (transport->tcp_offset == transport->tcp_reclen) {
+               if (transport->tcp_flags & TCP_RCV_LAST_FRAG)
+                       transport->tcp_flags &= ~TCP_RCV_COPY_DATA;
        }
 
 out:
-       if (!(xprt->tcp_flags & XPRT_COPY_DATA))
-               xprt_complete_rqst(req->rq_task, xprt->tcp_copied);
+       if (!(transport->tcp_flags & TCP_RCV_COPY_DATA))
+               xprt_complete_rqst(req->rq_task, transport->tcp_copied);
        spin_unlock(&xprt->transport_lock);
-       xs_tcp_check_recm(xprt);
+       xs_tcp_check_fraghdr(transport);
 }
 
-static inline void xs_tcp_read_discard(struct rpc_xprt *xprt, skb_reader_t *desc)
+static inline void xs_tcp_read_discard(struct sock_xprt *transport, struct xdr_skb_reader *desc)
 {
        size_t len;
 
-       len = xprt->tcp_reclen - xprt->tcp_offset;
+       len = transport->tcp_reclen - transport->tcp_offset;
        if (len > desc->count)
                len = desc->count;
        desc->count -= len;
        desc->offset += len;
-       xprt->tcp_offset += len;
+       transport->tcp_offset += len;
        dprintk("RPC:      discarded %Zu bytes\n", len);
-       xs_tcp_check_recm(xprt);
+       xs_tcp_check_fraghdr(transport);
 }
 
 static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, unsigned int offset, size_t len)
 {
        struct rpc_xprt *xprt = rd_desc->arg.data;
-       skb_reader_t desc = {
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       struct xdr_skb_reader desc = {
                .skb    = skb,
                .offset = offset,
                .count  = len,
-               .csum   = 0
        };
 
        dprintk("RPC:      xs_tcp_data_recv started\n");
        do {
                /* Read in a new fragment marker if necessary */
                /* Can we ever really expect to get completely empty fragments? */
-               if (xprt->tcp_flags & XPRT_COPY_RECM) {
+               if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
                        xs_tcp_read_fraghdr(xprt, &desc);
                        continue;
                }
                /* Read in the xid if necessary */
-               if (xprt->tcp_flags & XPRT_COPY_XID) {
-                       xs_tcp_read_xid(xprt, &desc);
+               if (transport->tcp_flags & TCP_RCV_COPY_XID) {
+                       xs_tcp_read_xid(transport, &desc);
                        continue;
                }
                /* Read in the request data */
-               if (xprt->tcp_flags & XPRT_COPY_DATA) {
+               if (transport->tcp_flags & TCP_RCV_COPY_DATA) {
                        xs_tcp_read_request(xprt, &desc);
                        continue;
                }
                /* Skip over any trailing bytes on short reads */
-               xs_tcp_read_discard(xprt, &desc);
+               xs_tcp_read_discard(transport, &desc);
        } while (desc.count);
        dprintk("RPC:      xs_tcp_data_recv done\n");
        return len - desc.count;
@@ -858,11 +978,16 @@ static void xs_tcp_state_change(struct sock *sk)
        case TCP_ESTABLISHED:
                spin_lock_bh(&xprt->transport_lock);
                if (!xprt_test_and_set_connected(xprt)) {
+                       struct sock_xprt *transport = container_of(xprt,
+                                       struct sock_xprt, xprt);
+
                        /* Reset TCP record info */
-                       xprt->tcp_offset = 0;
-                       xprt->tcp_reclen = 0;
-                       xprt->tcp_copied = 0;
-                       xprt->tcp_flags = XPRT_COPY_RECM | XPRT_COPY_XID;
+                       transport->tcp_offset = 0;
+                       transport->tcp_reclen = 0;
+                       transport->tcp_copied = 0;
+                       transport->tcp_flags =
+                               TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
+
                        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
                        xprt_wake_pending_tasks(xprt, 0);
                }
@@ -951,15 +1076,16 @@ static void xs_tcp_write_space(struct sock *sk)
 
 static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
 {
-       struct sock *sk = xprt->inet;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+       struct sock *sk = transport->inet;
 
-       if (xprt->rcvsize) {
+       if (transport->rcvsize) {
                sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-               sk->sk_rcvbuf = xprt->rcvsize * xprt->max_reqs *  2;
+               sk->sk_rcvbuf = transport->rcvsize * xprt->max_reqs * 2;
        }
-       if (xprt->sndsize) {
+       if (transport->sndsize) {
                sk->sk_userlocks |= SOCK_SNDBUF_LOCK;
-               sk->sk_sndbuf = xprt->sndsize * xprt->max_reqs * 2;
+               sk->sk_sndbuf = transport->sndsize * xprt->max_reqs * 2;
                sk->sk_write_space(sk);
        }
 }
@@ -974,12 +1100,14 @@ static void xs_udp_do_set_buffer_size(struct rpc_xprt *xprt)
  */
 static void xs_udp_set_buffer_size(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize)
 {
-       xprt->sndsize = 0;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
+       transport->sndsize = 0;
        if (sndsize)
-               xprt->sndsize = sndsize + 1024;
-       xprt->rcvsize = 0;
+               transport->sndsize = sndsize + 1024;
+       transport->rcvsize = 0;
        if (rcvsize)
-               xprt->rcvsize = rcvsize + 1024;
+               transport->rcvsize = rcvsize + 1024;
 
        xs_udp_do_set_buffer_size(xprt);
 }
@@ -1002,19 +1130,6 @@ static unsigned short xs_get_random_port(void)
        return rand + xprt_min_resvport;
 }
 
-/**
- * xs_print_peer_address - format an IPv4 address for printing
- * @xprt: generic transport
- * @format: flags field indicating which parts of the address to render
- */
-static char *xs_print_peer_address(struct rpc_xprt *xprt, enum rpc_display_format_t format)
-{
-       if (xprt->address_strings[format] != NULL)
-               return xprt->address_strings[format];
-       else
-               return "unprintable";
-}
-
 /**
  * xs_set_port - reset the port number in the remote endpoint address
  * @xprt: generic transport
@@ -1030,20 +1145,20 @@ static void xs_set_port(struct rpc_xprt *xprt, unsigned short port)
        sap->sin_port = htons(port);
 }
 
-static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
+static int xs_bindresvport(struct sock_xprt *transport, struct socket *sock)
 {
        struct sockaddr_in myaddr = {
                .sin_family = AF_INET,
        };
        int err;
-       unsigned short port = xprt->port;
+       unsigned short port = transport->port;
 
        do {
                myaddr.sin_port = htons(port);
                err = kernel_bind(sock, (struct sockaddr *) &myaddr,
                                                sizeof(myaddr));
                if (err == 0) {
-                       xprt->port = port;
+                       transport->port = port;
                        dprintk("RPC:      xs_bindresvport bound to port %u\n",
                                        port);
                        return 0;
@@ -1052,7 +1167,7 @@ static int xs_bindresvport(struct rpc_xprt *xprt, struct socket *sock)
                        port = xprt_max_resvport;
                else
                        port--;
-       } while (err == -EADDRINUSE && port != xprt->port);
+       } while (err == -EADDRINUSE && port != transport->port);
 
        dprintk("RPC:      can't bind to reserved port (%d).\n", -err);
        return err;
@@ -1095,9 +1210,10 @@ static inline void xs_reclassify_socket(struct socket *sock)
  */
 static void xs_udp_connect_worker(struct work_struct *work)
 {
-       struct rpc_xprt *xprt =
-               container_of(work, struct rpc_xprt, connect_worker.work);
-       struct socket *sock = xprt->sock;
+       struct sock_xprt *transport =
+               container_of(work, struct sock_xprt, connect_worker.work);
+       struct rpc_xprt *xprt = &transport->xprt;
+       struct socket *sock = transport->sock;
        int err, status = -EIO;
 
        if (xprt->shutdown || !xprt_bound(xprt))
@@ -1112,23 +1228,23 @@ static void xs_udp_connect_worker(struct work_struct *work)
        }
        xs_reclassify_socket(sock);
 
-       if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) {
+       if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
                sock_release(sock);
                goto out;
        }
 
        dprintk("RPC:      worker connecting xprt %p to address: %s\n",
-                       xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+                       xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
 
-       if (!xprt->inet) {
+       if (!transport->inet) {
                struct sock *sk = sock->sk;
 
                write_lock_bh(&sk->sk_callback_lock);
 
                sk->sk_user_data = xprt;
-               xprt->old_data_ready = sk->sk_data_ready;
-               xprt->old_state_change = sk->sk_state_change;
-               xprt->old_write_space = sk->sk_write_space;
+               transport->old_data_ready = sk->sk_data_ready;
+               transport->old_state_change = sk->sk_state_change;
+               transport->old_write_space = sk->sk_write_space;
                sk->sk_data_ready = xs_udp_data_ready;
                sk->sk_write_space = xs_udp_write_space;
                sk->sk_no_check = UDP_CSUM_NORCV;
@@ -1137,8 +1253,8 @@ static void xs_udp_connect_worker(struct work_struct *work)
                xprt_set_connected(xprt);
 
                /* Reset to new socket */
-               xprt->sock = sock;
-               xprt->inet = sk;
+               transport->sock = sock;
+               transport->inet = sk;
 
                write_unlock_bh(&sk->sk_callback_lock);
        }
@@ -1156,7 +1272,7 @@ out:
 static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
 {
        int result;
-       struct socket *sock = xprt->sock;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        struct sockaddr any;
 
        dprintk("RPC:      disconnecting xprt %p to reuse port\n", xprt);
@@ -1167,7 +1283,7 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
         */
        memset(&any, 0, sizeof(any));
        any.sa_family = AF_UNSPEC;
-       result = kernel_connect(sock, &any, sizeof(any), 0);
+       result = kernel_connect(transport->sock, &any, sizeof(any), 0);
        if (result)
                dprintk("RPC:      AF_UNSPEC connect return code %d\n",
                                result);
@@ -1181,15 +1297,16 @@ static void xs_tcp_reuse_connection(struct rpc_xprt *xprt)
  */
 static void xs_tcp_connect_worker(struct work_struct *work)
 {
-       struct rpc_xprt *xprt =
-               container_of(work, struct rpc_xprt, connect_worker.work);
-       struct socket *sock = xprt->sock;
+       struct sock_xprt *transport =
+               container_of(work, struct sock_xprt, connect_worker.work);
+       struct rpc_xprt *xprt = &transport->xprt;
+       struct socket *sock = transport->sock;
        int err, status = -EIO;
 
        if (xprt->shutdown || !xprt_bound(xprt))
                goto out;
 
-       if (!xprt->sock) {
+       if (!sock) {
                /* start from scratch */
                if ((err = sock_create_kern(PF_INET, SOCK_STREAM, IPPROTO_TCP, &sock)) < 0) {
                        dprintk("RPC:      can't create TCP transport socket (%d).\n", -err);
@@ -1197,7 +1314,7 @@ static void xs_tcp_connect_worker(struct work_struct *work)
                }
                xs_reclassify_socket(sock);
 
-               if (xprt->resvport && xs_bindresvport(xprt, sock) < 0) {
+               if (xprt->resvport && xs_bindresvport(transport, sock) < 0) {
                        sock_release(sock);
                        goto out;
                }
@@ -1206,17 +1323,17 @@ static void xs_tcp_connect_worker(struct work_struct *work)
                xs_tcp_reuse_connection(xprt);
 
        dprintk("RPC:      worker connecting xprt %p to address: %s\n",
-                       xprt, xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+                       xprt, xprt->address_strings[RPC_DISPLAY_ALL]);
 
-       if (!xprt->inet) {
+       if (!transport->inet) {
                struct sock *sk = sock->sk;
 
                write_lock_bh(&sk->sk_callback_lock);
 
                sk->sk_user_data = xprt;
-               xprt->old_data_ready = sk->sk_data_ready;
-               xprt->old_state_change = sk->sk_state_change;
-               xprt->old_write_space = sk->sk_write_space;
+               transport->old_data_ready = sk->sk_data_ready;
+               transport->old_state_change = sk->sk_state_change;
+               transport->old_write_space = sk->sk_write_space;
                sk->sk_data_ready = xs_tcp_data_ready;
                sk->sk_state_change = xs_tcp_state_change;
                sk->sk_write_space = xs_tcp_write_space;
@@ -1231,8 +1348,8 @@ static void xs_tcp_connect_worker(struct work_struct *work)
                xprt_clear_connected(xprt);
 
                /* Reset to new socket */
-               xprt->sock = sock;
-               xprt->inet = sk;
+               transport->sock = sock;
+               transport->inet = sk;
 
                write_unlock_bh(&sk->sk_callback_lock);
        }
@@ -1281,21 +1398,22 @@ out_clear:
 static void xs_connect(struct rpc_task *task)
 {
        struct rpc_xprt *xprt = task->tk_xprt;
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 
        if (xprt_test_and_set_connecting(xprt))
                return;
 
-       if (xprt->sock != NULL) {
+       if (transport->sock != NULL) {
                dprintk("RPC:      xs_connect delayed xprt %p for %lu seconds\n",
                                xprt, xprt->reestablish_timeout / HZ);
-               schedule_delayed_work(&xprt->connect_worker,
+               schedule_delayed_work(&transport->connect_worker,
                                        xprt->reestablish_timeout);
                xprt->reestablish_timeout <<= 1;
                if (xprt->reestablish_timeout > XS_TCP_MAX_REEST_TO)
                        xprt->reestablish_timeout = XS_TCP_MAX_REEST_TO;
        } else {
                dprintk("RPC:      xs_connect scheduled xprt %p\n", xprt);
-               schedule_delayed_work(&xprt->connect_worker, 0);
+               schedule_delayed_work(&transport->connect_worker, 0);
 
                /* flush_scheduled_work can sleep... */
                if (!RPC_IS_ASYNC(task))
@@ -1311,8 +1429,10 @@ static void xs_connect(struct rpc_task *task)
  */
 static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 {
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
+
        seq_printf(seq, "\txprt:\tudp %u %lu %lu %lu %lu %Lu %Lu\n",
-                       xprt->port,
+                       transport->port,
                        xprt->stat.bind_count,
                        xprt->stat.sends,
                        xprt->stat.recvs,
@@ -1329,13 +1449,14 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
  */
 static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 {
+       struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
        long idle_time = 0;
 
        if (xprt_connected(xprt))
                idle_time = (long)(jiffies - xprt->last_used) / HZ;
 
        seq_printf(seq, "\txprt:\ttcp %u %lu %lu %lu %ld %lu %lu %lu %Lu %Lu\n",
-                       xprt->port,
+                       transport->port,
                        xprt->stat.bind_count,
                        xprt->stat.connect_count,
                        xprt->stat.connect_time,
@@ -1349,7 +1470,6 @@ static void xs_tcp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 
 static struct rpc_xprt_ops xs_udp_ops = {
        .set_buffer_size        = xs_udp_set_buffer_size,
-       .print_addr             = xs_print_peer_address,
        .reserve_xprt           = xprt_reserve_xprt_cong,
        .release_xprt           = xprt_release_xprt_cong,
        .rpcbind                = rpc_getport,
@@ -1367,7 +1487,6 @@ static struct rpc_xprt_ops xs_udp_ops = {
 };
 
 static struct rpc_xprt_ops xs_tcp_ops = {
-       .print_addr             = xs_print_peer_address,
        .reserve_xprt           = xprt_reserve_xprt,
        .release_xprt           = xs_tcp_release_xprt,
        .rpcbind                = rpc_getport,
@@ -1382,33 +1501,64 @@ static struct rpc_xprt_ops xs_tcp_ops = {
        .print_stats            = xs_tcp_print_stats,
 };
 
+static struct rpc_xprt *xs_setup_xprt(struct sockaddr *addr, size_t addrlen, unsigned int slot_table_size)
+{
+       struct rpc_xprt *xprt;
+       struct sock_xprt *new;
+
+       if (addrlen > sizeof(xprt->addr)) {
+               dprintk("RPC:      xs_setup_xprt: address too large\n");
+               return ERR_PTR(-EBADF);
+       }
+
+       new = kzalloc(sizeof(*new), GFP_KERNEL);
+       if (new == NULL) {
+               dprintk("RPC:      xs_setup_xprt: couldn't allocate rpc_xprt\n");
+               return ERR_PTR(-ENOMEM);
+       }
+       xprt = &new->xprt;
+
+       xprt->max_reqs = slot_table_size;
+       xprt->slot = kcalloc(xprt->max_reqs, sizeof(struct rpc_rqst), GFP_KERNEL);
+       if (xprt->slot == NULL) {
+               kfree(xprt);
+               dprintk("RPC:      xs_setup_xprt: couldn't allocate slot table\n");
+               return ERR_PTR(-ENOMEM);
+       }
+
+       memcpy(&xprt->addr, addr, addrlen);
+       xprt->addrlen = addrlen;
+       new->port = xs_get_random_port();
+
+       return xprt;
+}
+
 /**
  * xs_setup_udp - Set up transport to use a UDP socket
- * @xprt: transport to set up
+ * @addr: address of remote server
+ * @addrlen: length of address in bytes
  * @to:   timeout parameters
  *
  */
-int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
+struct rpc_xprt *xs_setup_udp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
 {
-       size_t slot_table_size;
-       struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+       struct rpc_xprt *xprt;
+       struct sock_xprt *transport;
 
-       xprt->max_reqs = xprt_udp_slot_table_entries;
-       slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
-       xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
-       if (xprt->slot == NULL)
-               return -ENOMEM;
+       xprt = xs_setup_xprt(addr, addrlen, xprt_udp_slot_table_entries);
+       if (IS_ERR(xprt))
+               return xprt;
+       transport = container_of(xprt, struct sock_xprt, xprt);
 
-       if (ntohs(addr->sin_port) != 0)
+       if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
                xprt_set_bound(xprt);
-       xprt->port = xs_get_random_port();
 
        xprt->prot = IPPROTO_UDP;
        xprt->tsh_size = 0;
        /* XXX: header size can vary due to auth type, IPv6, etc. */
        xprt->max_payload = (1U << 16) - (MAX_HEADER << 3);
 
-       INIT_DELAYED_WORK(&xprt->connect_worker, xs_udp_connect_worker);
+       INIT_DELAYED_WORK(&transport->connect_worker, xs_udp_connect_worker);
        xprt->bind_timeout = XS_BIND_TO;
        xprt->connect_timeout = XS_UDP_CONN_TO;
        xprt->reestablish_timeout = XS_UDP_REEST_TO;
@@ -1423,37 +1573,36 @@ int xs_setup_udp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 
        xs_format_peer_addresses(xprt);
        dprintk("RPC:      set up transport to address %s\n",
-                       xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+                       xprt->address_strings[RPC_DISPLAY_ALL]);
 
-       return 0;
+       return xprt;
 }
 
 /**
  * xs_setup_tcp - Set up transport to use a TCP socket
- * @xprt: transport to set up
+ * @addr: address of remote server
+ * @addrlen: length of address in bytes
  * @to: timeout parameters
  *
  */
-int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
+struct rpc_xprt *xs_setup_tcp(struct sockaddr *addr, size_t addrlen, struct rpc_timeout *to)
 {
-       size_t slot_table_size;
-       struct sockaddr_in *addr = (struct sockaddr_in *) &xprt->addr;
+       struct rpc_xprt *xprt;
+       struct sock_xprt *transport;
 
-       xprt->max_reqs = xprt_tcp_slot_table_entries;
-       slot_table_size = xprt->max_reqs * sizeof(xprt->slot[0]);
-       xprt->slot = kzalloc(slot_table_size, GFP_KERNEL);
-       if (xprt->slot == NULL)
-               return -ENOMEM;
+       xprt = xs_setup_xprt(addr, addrlen, xprt_tcp_slot_table_entries);
+       if (IS_ERR(xprt))
+               return xprt;
+       transport = container_of(xprt, struct sock_xprt, xprt);
 
-       if (ntohs(addr->sin_port) != 0)
+       if (ntohs(((struct sockaddr_in *)addr)->sin_port) != 0)
                xprt_set_bound(xprt);
-       xprt->port = xs_get_random_port();
 
        xprt->prot = IPPROTO_TCP;
        xprt->tsh_size = sizeof(rpc_fraghdr) / sizeof(u32);
        xprt->max_payload = RPC_MAX_FRAGMENT_SIZE;
 
-       INIT_DELAYED_WORK(&xprt->connect_worker, xs_tcp_connect_worker);
+       INIT_DELAYED_WORK(&transport->connect_worker, xs_tcp_connect_worker);
        xprt->bind_timeout = XS_BIND_TO;
        xprt->connect_timeout = XS_TCP_CONN_TO;
        xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
@@ -1468,7 +1617,40 @@ int xs_setup_tcp(struct rpc_xprt *xprt, struct rpc_timeout *to)
 
        xs_format_peer_addresses(xprt);
        dprintk("RPC:      set up transport to address %s\n",
-                       xs_print_peer_address(xprt, RPC_DISPLAY_ALL));
+                       xprt->address_strings[RPC_DISPLAY_ALL]);
+
+       return xprt;
+}
+
+/**
+ * init_socket_xprt - set up xprtsock's sysctls
+ *
+ */
+int init_socket_xprt(void)
+{
+#ifdef RPC_DEBUG
+       if (!sunrpc_table_header) {
+               sunrpc_table_header = register_sysctl_table(sunrpc_table, 1);
+#ifdef CONFIG_PROC_FS
+               if (sunrpc_table[0].de)
+                       sunrpc_table[0].de->owner = THIS_MODULE;
+#endif
+       }
+#endif
 
        return 0;
 }
+
+/**
+ * cleanup_socket_xprt - remove xprtsock's sysctls
+ *
+ */
+void cleanup_socket_xprt(void)
+{
+#ifdef RPC_DEBUG
+       if (sunrpc_table_header) {
+               unregister_sysctl_table(sunrpc_table_header);
+               sunrpc_table_header = NULL;
+       }
+#endif
+}