]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
xfs: don't use vfs writeback for pure metadata modifications
authorDave Chinner <dchinner@redhat.com>
Tue, 28 Sep 2010 02:27:25 +0000 (12:27 +1000)
committerAlex Elder <aelder@sgi.com>
Mon, 18 Oct 2010 20:07:45 +0000 (15:07 -0500)
Under heavy multi-way parallel create workloads, the VFS struggles
to write back all the inodes that have been changed in age order.
The bdi flusher thread becomes CPU bound, spending 85% of it's time
in the VFS code, mostly traversing the superblock dirty inode list
to separate dirty inodes old enough to flush.

We already keep an index of all metadata changes in age order - in
the AIL - and continued log pressure will do age ordered writeback
without any extra overhead at all. If there is no pressure on the
log, the xfssyncd will periodically write back metadata in ascending
disk address offset order so will be very efficient.

Hence we can stop marking VFS inodes dirty during transaction commit
or when changing timestamps during transactions. This will keep the
inodes in the superblock dirty list to those containing data or
unlogged metadata changes.

However, the timstamp changes are slightly more complex than this -
there are a couple of places that do unlogged updates of the
timestamps, and the VFS need to be informed of these. Hence add a
new function xfs_trans_ichgtime() for transactional changes,
and leave xfs_ichgtime() for the non-transactional changes.

Signed-off-by: Dave Chinner <dchinner@redhat.com>
Reviewed-by: Alex Elder <aelder@sgi.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
12 files changed:
fs/xfs/linux-2.6/xfs_ioctl.c
fs/xfs/linux-2.6/xfs_iops.c
fs/xfs/linux-2.6/xfs_super.c
fs/xfs/quota/xfs_qm_syscalls.c
fs/xfs/xfs_attr.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_inode_item.c
fs/xfs/xfs_rename.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_trans_inode.c
fs/xfs/xfs_utils.c
fs/xfs/xfs_vnodeops.c

index 03aa908a9cb9eeed8241c5e30f5182b3284616c0..10206be7a077f4866420bee8f905607f7fc9cada 100644 (file)
@@ -1088,8 +1088,8 @@ xfs_ioctl_setattr(
                xfs_diflags_to_linux(ip);
        }
 
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
 
        XFS_STATS_INC(xs_ig_attrchg);
 
index b1fc2a6bfe834667cefe95e9a842527ba886944d..a788f016d1fa33bd1a15562f138ef96a980c195f 100644 (file)
@@ -94,41 +94,6 @@ xfs_mark_inode_dirty(
                mark_inode_dirty(inode);
 }
 
-/*
- * Change the requested timestamp in the given inode.
- * We don't lock across timestamp updates, and we don't log them but
- * we do record the fact that there is dirty information in core.
- */
-void
-xfs_ichgtime(
-       xfs_inode_t     *ip,
-       int             flags)
-{
-       struct inode    *inode = VFS_I(ip);
-       timespec_t      tv;
-       int             sync_it = 0;
-
-       tv = current_fs_time(inode->i_sb);
-
-       if ((flags & XFS_ICHGTIME_MOD) &&
-           !timespec_equal(&inode->i_mtime, &tv)) {
-               inode->i_mtime = tv;
-               sync_it = 1;
-       }
-       if ((flags & XFS_ICHGTIME_CHG) &&
-           !timespec_equal(&inode->i_ctime, &tv)) {
-               inode->i_ctime = tv;
-               sync_it = 1;
-       }
-
-       /*
-        * Update complete - now make sure everyone knows that the inode
-        * is dirty.
-        */
-       if (sync_it)
-               xfs_mark_inode_dirty_sync(ip);
-}
-
 /*
  * Hook in SELinux.  This is not quite correct yet, what we really need
  * here (as we do for default ACLs) is a mechanism by which creation of
index a4e07974955be3025ebbbe3839264b9bb1acba74..83154c0a31752b2711b1e0ca4245c4119fed9371 100644 (file)
@@ -972,12 +972,7 @@ xfs_fs_inode_init_once(
 
 /*
  * Dirty the XFS inode when mark_inode_dirty_sync() is called so that
- * we catch unlogged VFS level updates to the inode. Care must be taken
- * here - the transaction code calls mark_inode_dirty_sync() to mark the
- * VFS inode dirty in a transaction and clears the i_update_core field;
- * it must clear the field after calling mark_inode_dirty_sync() to
- * correctly indicate that the dirty state has been propagated into the
- * inode log item.
+ * we catch unlogged VFS level updates to the inode.
  *
  * We need the barrier() to maintain correct ordering between unlogged
  * updates and the transaction commit code that clears the i_update_core
index 45e5849df238c1b80f6ea19b438f065f1c185d79..7a71336f792291efe1fdc3dce87d941857ddbea4 100644 (file)
@@ -276,7 +276,7 @@ xfs_qm_scall_trunc_qfile(
                goto out_unlock;
        }
 
-       xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        error = xfs_trans_commit(tp, XFS_TRANS_RELEASE_LOG_RES);
 
 out_unlock:
index c2568242a9015eaf0bd571104bfc2c468fc2e6bb..905d390c1e5c533d6b2274397ad7ccfe7863c91f 100644 (file)
@@ -355,16 +355,15 @@ xfs_attr_set_int(
                        if (mp->m_flags & XFS_MOUNT_WSYNC) {
                                xfs_trans_set_sync(args.trans);
                        }
+
+                       if (!error && (flags & ATTR_KERNOTIME) == 0) {
+                               xfs_trans_ichgtime(args.trans, dp,
+                                                       XFS_ICHGTIME_CHG);
+                       }
                        err2 = xfs_trans_commit(args.trans,
                                                 XFS_TRANS_RELEASE_LOG_RES);
                        xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
-                       /*
-                        * Hit the inode change time.
-                        */
-                       if (!error && (flags & ATTR_KERNOTIME) == 0) {
-                               xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-                       }
                        return(error == 0 ? err2 : error);
                }
 
@@ -420,6 +419,9 @@ xfs_attr_set_int(
                xfs_trans_set_sync(args.trans);
        }
 
+       if ((flags & ATTR_KERNOTIME) == 0)
+               xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
+
        /*
         * Commit the last in the sequence of transactions.
         */
@@ -427,13 +429,6 @@ xfs_attr_set_int(
        error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
-       /*
-        * Hit the inode change time.
-        */
-       if (!error && (flags & ATTR_KERNOTIME) == 0) {
-               xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-       }
-
        return(error);
 
 out:
@@ -567,6 +562,9 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
                xfs_trans_set_sync(args.trans);
        }
 
+       if ((flags & ATTR_KERNOTIME) == 0)
+               xfs_trans_ichgtime(args.trans, dp, XFS_ICHGTIME_CHG);
+
        /*
         * Commit the last in the sequence of transactions.
         */
@@ -574,13 +572,6 @@ xfs_attr_remove_int(xfs_inode_t *dp, struct xfs_name *name, int flags)
        error = xfs_trans_commit(args.trans, XFS_TRANS_RELEASE_LOG_RES);
        xfs_iunlock(dp, XFS_ILOCK_EXCL);
 
-       /*
-        * Hit the inode change time.
-        */
-       if (!error && (flags & ATTR_KERNOTIME) == 0) {
-               xfs_ichgtime(dp, XFS_ICHGTIME_CHG);
-       }
-
        return(error);
 
 out:
index 0898c5417d12b53476594d5cfdfaee665de56533..54781fcd322a74e7fdda5e3d6566c985f4bbd74c 100644 (file)
@@ -471,7 +471,6 @@ int         xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
 void           xfs_iext_realloc(xfs_inode_t *, int, int);
 void           xfs_iunpin_wait(xfs_inode_t *);
 int            xfs_iflush(xfs_inode_t *, uint);
-void           xfs_ichgtime(xfs_inode_t *, int);
 void           xfs_lock_inodes(xfs_inode_t **, int, uint);
 void           xfs_lock_two_inodes(xfs_inode_t *, xfs_inode_t *, uint);
 
index fe00777e2796317146ce1e20142cc7f4e3679635..c7ac020705df1fbad86550c58784ba34cf2ee3ba 100644 (file)
@@ -222,15 +222,6 @@ xfs_inode_item_format(
        vecp++;
        nvecs        = 1;
 
-       /*
-        * Make sure the linux inode is dirty. We do this before
-        * clearing i_update_core as the VFS will call back into
-        * XFS here and set i_update_core, so we need to dirty the
-        * inode first so that the ordering of i_update_core and
-        * unlogged modifications still works as described below.
-        */
-       xfs_mark_inode_dirty_sync(ip);
-
        /*
         * Clear i_update_core if the timestamps (or any other
         * non-transactional modification) need flushing/logging
index 8fca957200dfcba000f1f0eb4da51eb1d4f8516e..9028733f7ed8eb49a730ad54ef8af5d478fe8b12 100644 (file)
@@ -211,7 +211,9 @@ xfs_rename(
                        goto error_return;
                if (error)
                        goto abort_return;
-               xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+               xfs_trans_ichgtime(tp, target_dp,
+                                       XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
                if (new_parent && src_is_directory) {
                        error = xfs_bumplink(tp, target_dp);
@@ -249,7 +251,9 @@ xfs_rename(
                                        &first_block, &free_list, spaceres);
                if (error)
                        goto abort_return;
-               xfs_ichgtime(target_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+
+               xfs_trans_ichgtime(tp, target_dp,
+                                       XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
                /*
                 * Decrement the link count on the target since the target
@@ -292,7 +296,7 @@ xfs_rename(
         * inode isn't really being changed, but old unix file systems did
         * it and some incremental backup programs won't work without it.
         */
-       xfs_ichgtime(src_ip, XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, src_ip, XFS_ICHGTIME_CHG);
 
        /*
         * Adjust the link count on src_dp.  This is necessary when
@@ -315,7 +319,7 @@ xfs_rename(
        if (error)
                goto abort_return;
 
-       xfs_ichgtime(src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, src_dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, src_dp, XFS_ILOG_CORE);
        if (new_parent)
                xfs_trans_log_inode(tp, target_dp, XFS_ILOG_CORE);
index c13c0f97b4945d7ff80990aeb7d4c0a8e12d0139..e2cbe4da7d8e41fcfe85ca98b04bf8e99261c33f 100644 (file)
@@ -473,6 +473,7 @@ void                xfs_trans_dquot_buf(xfs_trans_t *, struct xfs_buf *, uint);
 void           xfs_trans_inode_alloc_buf(xfs_trans_t *, struct xfs_buf *);
 int            xfs_trans_iget(struct xfs_mount *, xfs_trans_t *,
                               xfs_ino_t , uint, uint, struct xfs_inode **);
+void           xfs_trans_ichgtime(struct xfs_trans *, struct xfs_inode *, int);
 void           xfs_trans_ijoin_ref(struct xfs_trans *, struct xfs_inode *, uint);
 void           xfs_trans_ijoin(struct xfs_trans *, struct xfs_inode *);
 void           xfs_trans_log_buf(xfs_trans_t *, struct xfs_buf *, uint, uint);
index cdc53a1050c56ac365a2d61114f65d1cc03fbc26..ccb34532768bd65a1c5baa14a4a0a143df1ea2cb 100644 (file)
@@ -117,6 +117,36 @@ xfs_trans_ijoin_ref(
        ip->i_itemp->ili_lock_flags = lock_flags;
 }
 
+/*
+ * Transactional inode timestamp update. Requires the inode to be locked and
+ * joined to the transaction supplied. Relies on the transaction subsystem to
+ * track dirty state and update/writeback the inode accordingly.
+ */
+void
+xfs_trans_ichgtime(
+       struct xfs_trans        *tp,
+       struct xfs_inode        *ip,
+       int                     flags)
+{
+       struct inode            *inode = VFS_I(ip);
+       timespec_t              tv;
+
+       ASSERT(tp);
+       ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
+       ASSERT(ip->i_transp == tp);
+
+       tv = current_fs_time(inode->i_sb);
+
+       if ((flags & XFS_ICHGTIME_MOD) &&
+           !timespec_equal(&inode->i_mtime, &tv)) {
+               inode->i_mtime = tv;
+       }
+       if ((flags & XFS_ICHGTIME_CHG) &&
+           !timespec_equal(&inode->i_ctime, &tv)) {
+               inode->i_ctime = tv;
+       }
+}
+
 /*
  * This is called to mark the fields indicated in fieldmask as needing
  * to be logged when the transaction is committed.  The inode must
index b7d5769d2df04a6d5599fec813ce59693275c701..4c2ba6f6adc8bf1d02384b123e32469d4beeac48 100644 (file)
@@ -235,7 +235,7 @@ xfs_droplink(
 {
        int     error;
 
-       xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 
        ASSERT (ip->i_d.di_nlink > 0);
        ip->i_d.di_nlink--;
@@ -299,7 +299,7 @@ xfs_bumplink(
 {
        if (ip->i_d.di_nlink >= XFS_MAXLINK)
                return XFS_ERROR(EMLINK);
-       xfs_ichgtime(ip, XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
 
        ASSERT(ip->i_d.di_nlink > 0);
        ip->i_d.di_nlink++;
index dc6e4fb8bbc9a7902b4a38097bfc6dde17d098dc..a230cd4d077432b76e7f191436f265dd7715ba0b 100644 (file)
@@ -184,8 +184,11 @@ xfs_setattr(
                    ip->i_size == 0 && ip->i_d.di_nextents == 0) {
                        xfs_iunlock(ip, XFS_ILOCK_EXCL);
                        lock_flags &= ~XFS_ILOCK_EXCL;
-                       if (mask & ATTR_CTIME)
-                               xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+                       if (mask & ATTR_CTIME) {
+                               inode->i_mtime = inode->i_ctime =
+                                               current_fs_time(inode->i_sb);
+                               xfs_mark_inode_dirty_sync(ip);
+                       }
                        code = 0;
                        goto error_return;
                }
@@ -1391,7 +1394,7 @@ xfs_create(
                ASSERT(error != ENOSPC);
                goto out_trans_abort;
        }
-       xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
        if (is_dir) {
@@ -1742,7 +1745,7 @@ xfs_remove(
                ASSERT(error != ENOENT);
                goto out_bmap_cancel;
        }
-       xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
 
        if (is_dir) {
                /*
@@ -1895,7 +1898,7 @@ xfs_link(
                                        &first_block, &free_list, resblks);
        if (error)
                goto abort_return;
-       xfs_ichgtime(tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, tdp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, tdp, XFS_ILOG_CORE);
 
        error = xfs_bumplink(tp, sip);
@@ -2129,7 +2132,7 @@ xfs_symlink(
                                        &first_block, &free_list, resblks);
        if (error)
                goto error1;
-       xfs_ichgtime(dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+       xfs_trans_ichgtime(tp, dp, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE);
 
        /*
@@ -2833,7 +2836,7 @@ xfs_change_file_space(
                if (ip->i_d.di_mode & S_IXGRP)
                        ip->i_d.di_mode &= ~S_ISGID;
 
-               xfs_ichgtime(ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
+               xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
        }
        if (setprealloc)
                ip->i_d.di_flags |= XFS_DIFLAG_PREALLOC;