Merge branches 'irq-core-for-linus' and 'core-locking-for-linus' of git://git.kernel...

[net-next-2.6.git] / fs / btrfs / ioctl.c
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c

index 3fe15e435b5cbc5a541718be67f05bebd55ed759..463d91b4dd3a720ece2afb592b2edefca855aaba 100644 (file)
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -224,7 +224,8 @@ static int btrfs_ioctl_getversion(struct file *file, int __user *arg)
  
  static noinline int create_subvol(struct btrfs_root *root,
                                   struct dentry *dentry,
-                                 char *name, int namelen)
+                                 char *name, int namelen,
+                                 u64 *async_transid)
  {
         struct btrfs_trans_handle *trans;
         struct btrfs_key key;
@@ -338,13 +339,19 @@ static noinline int create_subvol(struct btrfs_root *root,
  
         d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
  fail:
-       err = btrfs_commit_transaction(trans, root);
+       if (async_transid) {
+               *async_transid = trans->transid;
+               err = btrfs_commit_transaction_async(trans, root, 1);
+       } else {
+               err = btrfs_commit_transaction(trans, root);
+       }
         if (err && !ret)
                 ret = err;
         return ret;
  }
  
-static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
+static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+                          char *name, int namelen, u64 *async_transid)
  {
         struct inode *inode;
         struct btrfs_pending_snapshot *pending_snapshot;
@@ -373,7 +380,14 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry)
  
         list_add(&pending_snapshot->list,
                  &trans->transaction->pending_snapshots);
-       ret = btrfs_commit_transaction(trans, root->fs_info->extent_root);
+       if (async_transid) {
+               *async_transid = trans->transid;
+               ret = btrfs_commit_transaction_async(trans,
+                                    root->fs_info->extent_root, 1);
+       } else {
+               ret = btrfs_commit_transaction(trans,
+                                              root->fs_info->extent_root);
+       }
         BUG_ON(ret);
  
         ret = pending_snapshot->error;
@@ -395,6 +409,76 @@ fail:
         return ret;
  }
  
+/*  copy of check_sticky in fs/namei.c()
+* It's inline, so penalty for filesystems that don't use sticky bit is
+* minimal.
+*/
+static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
+{
+       uid_t fsuid = current_fsuid();
+
+       if (!(dir->i_mode & S_ISVTX))
+               return 0;
+       if (inode->i_uid == fsuid)
+               return 0;
+       if (dir->i_uid == fsuid)
+               return 0;
+       return !capable(CAP_FOWNER);
+}
+
+/*  copy of may_delete in fs/namei.c()
+ *     Check whether we can remove a link victim from directory dir, check
+ *  whether the type of victim is right.
+ *  1. We can't do it if dir is read-only (done in permission())
+ *  2. We should have write and exec permissions on dir
+ *  3. We can't remove anything from append-only dir
+ *  4. We can't do anything with immutable dir (done in permission())
+ *  5. If the sticky bit on dir is set we should either
+ *     a. be owner of dir, or
+ *     b. be owner of victim, or
+ *     c. have CAP_FOWNER capability
+ *  6. If the victim is append-only or immutable we can't do antyhing with
+ *     links pointing to it.
+ *  7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
+ *  8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
+ *  9. We can't remove a root or mountpoint.
+ * 10. We don't allow removal of NFS sillyrenamed files; it's handled by
+ *     nfs_async_unlink().
+ */
+
+static int btrfs_may_delete(struct inode *dir,struct dentry *victim,int isdir)
+{
+       int error;
+
+       if (!victim->d_inode)
+               return -ENOENT;
+
+       BUG_ON(victim->d_parent->d_inode != dir);
+       audit_inode_child(victim, dir);
+
+       error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       if (error)
+               return error;
+       if (IS_APPEND(dir))
+               return -EPERM;
+       if (btrfs_check_sticky(dir, victim->d_inode)||
+               IS_APPEND(victim->d_inode)||
+           IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
+               return -EPERM;
+       if (isdir) {
+               if (!S_ISDIR(victim->d_inode->i_mode))
+                       return -ENOTDIR;
+               if (IS_ROOT(victim))
+                       return -EBUSY;
+       } else if (S_ISDIR(victim->d_inode->i_mode))
+               return -EISDIR;
+       if (IS_DEADDIR(dir))
+               return -ENOENT;
+       if (victim->d_flags & DCACHE_NFSFS_RENAMED)
+               return -EBUSY;
+       return 0;
+}
+
  /* copy of may_create in fs/namei.c() */
  static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
  {
@@ -412,7 +496,8 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
   */
  static noinline int btrfs_mksubvol(struct path *parent,
                                    char *name, int namelen,
-                                  struct btrfs_root *snap_src)
+                                  struct btrfs_root *snap_src,
+                                  u64 *async_transid)
  {
         struct inode *dir  = parent->dentry->d_inode;
         struct dentry *dentry;
@@ -443,10 +528,11 @@ static noinline int btrfs_mksubvol(struct path *parent,
                 goto out_up_read;
  
         if (snap_src) {
-               error = create_snapshot(snap_src, dentry);
+               error = create_snapshot(snap_src, dentry,
+                                       name, namelen, async_transid);
         } else {
                 error = create_subvol(BTRFS_I(dir)->root, dentry,
-                                     name, namelen);
+                                     name, namelen, async_transid);
         }
         if (!error)
                 fsnotify_mkdir(dir, dentry);
@@ -799,11 +885,13 @@ out_unlock:
         return ret;
  }
  
-static noinline int btrfs_ioctl_snap_create(struct file *file,
-                                           void __user *arg, int subvol)
+static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
+                                                   char *name,
+                                                   unsigned long fd,
+                                                   int subvol,
+                                                   u64 *transid)
  {
         struct btrfs_root *root = BTRFS_I(fdentry(file)->d_inode)->root;
-       struct btrfs_ioctl_vol_args *vol_args;
         struct file *src_file;
         int namelen;
         int ret = 0;
@@ -811,23 +899,18 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
         if (root->fs_info->sb->s_flags & MS_RDONLY)
                 return -EROFS;
  
-       vol_args = memdup_user(arg, sizeof(*vol_args));
-       if (IS_ERR(vol_args))
-               return PTR_ERR(vol_args);
-
-       vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
-       namelen = strlen(vol_args->name);
-       if (strchr(vol_args->name, '/')) {
+       namelen = strlen(name);
+       if (strchr(name, '/')) {
                 ret = -EINVAL;
                 goto out;
         }
  
         if (subvol) {
-               ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
-                                    NULL);
+               ret = btrfs_mksubvol(&file->f_path, name, namelen,
+                                    NULL, transid);
         } else {
                 struct inode *src_inode;
-               src_file = fget(vol_args->fd);
+               src_file = fget(fd);
                 if (!src_file) {
                         ret = -EINVAL;
                         goto out;
@@ -841,12 +924,56 @@ static noinline int btrfs_ioctl_snap_create(struct file *file,
                         fput(src_file);
                         goto out;
                 }
-               ret = btrfs_mksubvol(&file->f_path, vol_args->name, namelen,
-                                    BTRFS_I(src_inode)->root);
+               ret = btrfs_mksubvol(&file->f_path, name, namelen,
+                                    BTRFS_I(src_inode)->root,
+                                    transid);
                 fput(src_file);
         }
  out:
+       return ret;
+}
+
+static noinline int btrfs_ioctl_snap_create(struct file *file,
+                                           void __user *arg, int subvol,
+                                           int async)
+{
+       struct btrfs_ioctl_vol_args *vol_args = NULL;
+       struct btrfs_ioctl_async_vol_args *async_vol_args = NULL;
+       char *name;
+       u64 fd;
+       u64 transid = 0;
+       int ret;
+
+       if (async) {
+               async_vol_args = memdup_user(arg, sizeof(*async_vol_args));
+               if (IS_ERR(async_vol_args))
+                       return PTR_ERR(async_vol_args);
+
+               name = async_vol_args->name;
+               fd = async_vol_args->fd;
+               async_vol_args->name[BTRFS_SNAPSHOT_NAME_MAX] = '\0';
+       } else {
+               vol_args = memdup_user(arg, sizeof(*vol_args));
+               if (IS_ERR(vol_args))
+                       return PTR_ERR(vol_args);
+               name = vol_args->name;
+               fd = vol_args->fd;
+               vol_args->name[BTRFS_PATH_NAME_MAX] = '\0';
+       }
+
+       ret = btrfs_ioctl_snap_create_transid(file, name, fd,
+                                             subvol, &transid);
+
+       if (!ret && async) {
+               if (copy_to_user(arg +
+                               offsetof(struct btrfs_ioctl_async_vol_args,
+                               transid), &transid, sizeof(transid)))
+                       return -EFAULT;
+       }
+
         kfree(vol_args);
+       kfree(async_vol_args);
+
         return ret;
  }
  
@@ -1217,9 +1344,6 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
         int ret;
         int err = 0;
  
-       if (!capable(CAP_SYS_ADMIN))
-               return -EPERM;
-
         vol_args = memdup_user(arg, sizeof(*vol_args));
         if (IS_ERR(vol_args))
                 return PTR_ERR(vol_args);
@@ -1249,13 +1373,51 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
         }
  
         inode = dentry->d_inode;
+       dest = BTRFS_I(inode)->root;
+       if (!capable(CAP_SYS_ADMIN)){
+               /*
+                * Regular user.  Only allow this with a special mount
+                * option, when the user has write+exec access to the
+                * subvol root, and when rmdir(2) would have been
+                * allowed.
+                *
+                * Note that this is _not_ check that the subvol is
+                * empty or doesn't contain data that we wouldn't
+                * otherwise be able to delete.
+                *
+                * Users who want to delete empty subvols should try
+                * rmdir(2).
+                */
+               err = -EPERM;
+               if (!btrfs_test_opt(root, USER_SUBVOL_RM_ALLOWED))
+                       goto out_dput;
+
+               /*
+                * Do not allow deletion if the parent dir is the same
+                * as the dir to be deleted.  That means the ioctl
+                * must be called on the dentry referencing the root
+                * of the subvol, not a random directory contained
+                * within it.
+                */
+               err = -EINVAL;
+               if (root == dest)
+                       goto out_dput;
+
+               err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+               if (err)
+                       goto out_dput;
+
+               /* check if subvolume may be deleted by a non-root user */
+               err = btrfs_may_delete(dir, dentry, 1);
+               if (err)
+                       goto out_dput;
+       }
+
         if (inode->i_ino != BTRFS_FIRST_FREE_OBJECTID) {
                 err = -EINVAL;
                 goto out_dput;
         }
  
-       dest = BTRFS_I(inode)->root;
-
         mutex_lock(&inode->i_mutex);
         err = d_invalidate(dentry);
         if (err)
@@ -1294,7 +1456,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                 BUG_ON(ret);
         }
  
-       ret = btrfs_commit_transaction(trans, root);
+       ret = btrfs_end_transaction(trans, root);
         BUG_ON(ret);
         inode->i_flags |= S_DEAD;
  out_up_write:
@@ -1492,11 +1654,11 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
         path->reada = 2;
  
         if (inode < src) {
-               mutex_lock(&inode->i_mutex);
-               mutex_lock(&src->i_mutex);
+               mutex_lock_nested(&inode->i_mutex, I_MUTEX_PARENT);
+               mutex_lock_nested(&src->i_mutex, I_MUTEX_CHILD);
         } else {
-               mutex_lock(&src->i_mutex);
-               mutex_lock(&inode->i_mutex);
+               mutex_lock_nested(&src->i_mutex, I_MUTEX_PARENT);
+               mutex_lock_nested(&inode->i_mutex, I_MUTEX_CHILD);
         }
  
         /* determine range to clone */
@@ -2028,6 +2190,36 @@ long btrfs_ioctl_trans_end(struct file *file)
         return 0;
  }
  
+static noinline long btrfs_ioctl_start_sync(struct file *file, void __user *argp)
+{
+       struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+       struct btrfs_trans_handle *trans;
+       u64 transid;
+
+       trans = btrfs_start_transaction(root, 0);
+       transid = trans->transid;
+       btrfs_commit_transaction_async(trans, root, 0);
+
+       if (argp)
+               if (copy_to_user(argp, &transid, sizeof(transid)))
+                       return -EFAULT;
+       return 0;
+}
+
+static noinline long btrfs_ioctl_wait_sync(struct file *file, void __user *argp)
+{
+       struct btrfs_root *root = BTRFS_I(file->f_dentry->d_inode)->root;
+       u64 transid;
+
+       if (argp) {
+               if (copy_from_user(&transid, argp, sizeof(transid)))
+                       return -EFAULT;
+       } else {
+               transid = 0;  /* current trans */
+       }
+       return btrfs_wait_for_commit(root, transid);
+}
+
  long btrfs_ioctl(struct file *file, unsigned int
                 cmd, unsigned long arg)
  {
@@ -2042,9 +2234,11 @@ long btrfs_ioctl(struct file *file, unsigned int
         case FS_IOC_GETVERSION:
                 return btrfs_ioctl_getversion(file, argp);
         case BTRFS_IOC_SNAP_CREATE:
-               return btrfs_ioctl_snap_create(file, argp, 0);
+               return btrfs_ioctl_snap_create(file, argp, 0, 0);
+       case BTRFS_IOC_SNAP_CREATE_ASYNC:
+               return btrfs_ioctl_snap_create(file, argp, 0, 1);
         case BTRFS_IOC_SUBVOL_CREATE:
-               return btrfs_ioctl_snap_create(file, argp, 1);
+               return btrfs_ioctl_snap_create(file, argp, 1, 0);
         case BTRFS_IOC_SNAP_DESTROY:
                 return btrfs_ioctl_snap_destroy(file, argp);
         case BTRFS_IOC_DEFAULT_SUBVOL:
@@ -2078,6 +2272,10 @@ long btrfs_ioctl(struct file *file, unsigned int
         case BTRFS_IOC_SYNC:
                 btrfs_sync_fs(file->f_dentry->d_sb, 1);
                 return 0;
+       case BTRFS_IOC_START_SYNC:
+               return btrfs_ioctl_start_sync(file, argp);
+       case BTRFS_IOC_WAIT_SYNC:
+               return btrfs_ioctl_wait_sync(file, argp);
         }
  
         return -ENOTTY;