Merge branch 'hwmon-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jdelv...

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 25 Aug 2010 15:41:18 +0000 (08:41 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 25 Aug 2010 15:41:18 +0000 (08:41 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 25 Aug 2010 15:41:18 +0000 (08:41 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 25 Aug 2010 15:41:18 +0000 (08:41 -0700)
diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h

index c0427295e8f58956e32f833c78c9ad75676778d2..1ca132fc0d039cbc8c3b7f605fa4dbbd91db7291 100644 (file)
--- a/arch/x86/include/asm/tsc.h
+++ b/arch/x86/include/asm/tsc.h
@@ -59,5 +59,7 @@ extern void check_tsc_sync_source(int cpu);
  extern void check_tsc_sync_target(void);
  
  extern int notsc_setup(char *);
+extern void save_sched_clock_state(void);
+extern void restore_sched_clock_state(void);
  
  #endif /* _ASM_X86_TSC_H */
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c

index ce8e50239332470ba329dd3045692fee42f69a16..d632934cb6386947352650f262745eb3c93c68ce 100644 (file)
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
         local_irq_restore(flags);
  }
  
+static unsigned long long cyc2ns_suspend;
+
+void save_sched_clock_state(void)
+{
+       if (!sched_clock_stable)
+               return;
+
+       cyc2ns_suspend = sched_clock();
+}
+
+/*
+ * Even on processors with invariant TSC, TSC gets reset in some the
+ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
+ * arbitrary value (still sync'd across cpu's) during resume from such sleep
+ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
+ * that sched_clock() continues from the point where it was left off during
+ * suspend.
+ */
+void restore_sched_clock_state(void)
+{
+       unsigned long long offset;
+       unsigned long flags;
+       int cpu;
+
+       if (!sched_clock_stable)
+               return;
+
+       local_irq_save(flags);
+
+       get_cpu_var(cyc2ns_offset) = 0;
+       offset = cyc2ns_suspend - sched_clock();
+
+       for_each_possible_cpu(cpu)
+               per_cpu(cyc2ns_offset, cpu) = offset;
+
+       local_irq_restore(flags);
+}
+
  #ifdef CONFIG_CPU_FREQ
  
  /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c

index e7e8c5f549563a6b65a4b139d8202ee878c80c74..87bb35e34ef175d0a8b3beedd5c4d76010c9169d 100644 (file)
--- a/arch/x86/power/cpu.c
+++ b/arch/x86/power/cpu.c
@@ -113,6 +113,7 @@ static void __save_processor_state(struct saved_context *ctxt)
  void save_processor_state(void)
  {
         __save_processor_state(&saved_context);
+       save_sched_clock_state();
  }
  #ifdef CONFIG_X86_32
  EXPORT_SYMBOL(save_processor_state);
@@ -229,6 +230,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
  void restore_processor_state(void)
  {
         __restore_processor_state(&saved_context);
+       restore_sched_clock_state();
  }
  #ifdef CONFIG_X86_32
  EXPORT_SYMBOL(restore_processor_state);
diff --git a/drivers/xen/events.c b/drivers/xen/events.c

index 72f91bff29c7d836d86844c224225e1353c76648..13365ba3521853eb738f6fa9b8eb68a010c3b04b 100644 (file)
--- a/drivers/xen/events.c
+++ b/drivers/xen/events.c
@@ -112,6 +112,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
  #define VALID_EVTCHN(chn)      ((chn) != 0)
  
  static struct irq_chip xen_dynamic_chip;
+static struct irq_chip xen_percpu_chip;
  
  /* Constructor for packed IRQ information. */
  static struct irq_info mk_unbound_info(void)
@@ -377,7 +378,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
                 irq = find_unbound_irq();
  
                 set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-                                             handle_level_irq, "event");
+                                             handle_edge_irq, "event");
  
                 evtchn_to_irq[evtchn] = irq;
                 irq_info[irq] = mk_evtchn_info(evtchn);
@@ -403,8 +404,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
                 if (irq < 0)
                         goto out;
  
-               set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-                                             handle_level_irq, "ipi");
+               set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
+                                             handle_percpu_irq, "ipi");
  
                 bind_ipi.vcpu = cpu;
                 if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
@@ -444,8 +445,8 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
  
                 irq = find_unbound_irq();
  
-               set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
-                                             handle_level_irq, "virq");
+               set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
+                                             handle_percpu_irq, "virq");
  
                 evtchn_to_irq[evtchn] = irq;
                 irq_info[irq] = mk_virq_info(evtchn, virq);
@@ -964,6 +965,16 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
         .retrigger      = retrigger_dynirq,
  };
  
+static struct irq_chip xen_percpu_chip __read_mostly = {
+       .name           = "xen-percpu",
+
+       .disable        = disable_dynirq,
+       .mask           = disable_dynirq,
+       .unmask         = enable_dynirq,
+
+       .ack            = ack_dynirq,
+};
+
  int xen_set_callback_via(uint64_t via)
  {
         struct xen_hvm_param a;
diff --git a/fs/xfs/linux-2.6/xfs_aops.c b/fs/xfs/linux-2.6/xfs_aops.c

index 15412fe15c3a47f3cc744af36cecfe2b01e5a5a3..b552f816de15942095f82ad0302b8656f7db09ae 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_aops.c
+++ b/fs/xfs/linux-2.6/xfs_aops.c
@@ -852,8 +852,8 @@ xfs_convert_page(
                 SetPageUptodate(page);
  
         if (count) {
-               wbc->nr_to_write--;
-               if (wbc->nr_to_write <= 0)
+               if (--wbc->nr_to_write <= 0 &&
+                   wbc->sync_mode == WB_SYNC_NONE)
                         done = 1;
         }
         xfs_start_page_writeback(page, !page_dirty, count);
@@ -1068,7 +1068,7 @@ xfs_vm_writepage(
          * by themselves.
          */
         if ((current->flags & (PF_MEMALLOC|PF_KSWAPD)) == PF_MEMALLOC)
-               goto out_fail;
+               goto redirty;
  
         /*
          * We need a transaction if there are delalloc or unwritten buffers
@@ -1080,7 +1080,7 @@ xfs_vm_writepage(
          */
         xfs_count_page_state(page, &delalloc, &unwritten);
         if ((current->flags & PF_FSTRANS) && (delalloc || unwritten))
-               goto out_fail;
+               goto redirty;
  
         /* Is this page beyond the end of the file? */
         offset = i_size_read(inode);
@@ -1245,12 +1245,15 @@ error:
         if (iohead)
                 xfs_cancel_ioend(iohead);
  
+       if (err == -EAGAIN)
+               goto redirty;
+
         xfs_aops_discard_page(page);
         ClearPageUptodate(page);
         unlock_page(page);
         return err;
  
-out_fail:
+redirty:
         redirty_page_for_writepage(wbc, page);
         unlock_page(page);
         return 0;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

index 15c35b62ff14ba46e0dd31d09cb5b0dfb54cf195..a4e07974955be3025ebbbe3839264b9bb1acba74 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -1226,6 +1226,7 @@ xfs_fs_statfs(
         struct xfs_inode        *ip = XFS_I(dentry->d_inode);
         __uint64_t              fakeinos, id;
         xfs_extlen_t            lsize;
+       __int64_t               ffree;
  
         statp->f_type = XFS_SB_MAGIC;
         statp->f_namelen = MAXNAMELEN - 1;
@@ -1249,7 +1250,11 @@ xfs_fs_statfs(
                 statp->f_files = min_t(typeof(statp->f_files),
                                         statp->f_files,
                                         mp->m_maxicount);
-       statp->f_ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+
+       /* make sure statp->f_ffree does not underflow */
+       ffree = statp->f_files - (sbp->sb_icount - sbp->sb_ifree);
+       statp->f_ffree = max_t(__int64_t, ffree, 0);
+
         spin_unlock(&mp->m_sb_lock);
  
         if ((ip->i_d.di_flags & XFS_DIFLAG_PROJINHERIT) ||
@@ -1402,7 +1407,7 @@ xfs_fs_freeze(
  
         xfs_save_resvblks(mp);
         xfs_quiesce_attr(mp);
-       return -xfs_fs_log_dummy(mp);
+       return -xfs_fs_log_dummy(mp, SYNC_WAIT);
  }
  
  STATIC int
diff --git a/fs/xfs/linux-2.6/xfs_sync.c b/fs/xfs/linux-2.6/xfs_sync.c

index dfcbd98d15997e62e7d8a433fa71e5b8a9912609..d59c4a65d492c9b6b0713accaec1ab1c2ba7ea5f 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_sync.c
+++ b/fs/xfs/linux-2.6/xfs_sync.c
@@ -34,6 +34,7 @@
  #include "xfs_inode_item.h"
  #include "xfs_quota.h"
  #include "xfs_trace.h"
+#include "xfs_fsops.h"
  
  #include <linux/kthread.h>
  #include <linux/freezer.h>
@@ -340,38 +341,6 @@ xfs_sync_attr(
                                      XFS_ICI_NO_TAG, 0, NULL);
  }
  
-STATIC int
-xfs_commit_dummy_trans(
-       struct xfs_mount        *mp,
-       uint                    flags)
-{
-       struct xfs_inode        *ip = mp->m_rootip;
-       struct xfs_trans        *tp;
-       int                     error;
-
-       /*
-        * Put a dummy transaction in the log to tell recovery
-        * that all others are OK.
-        */
-       tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1);
-       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
-       if (error) {
-               xfs_trans_cancel(tp, 0);
-               return error;
-       }
-
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       xfs_trans_ijoin(tp, ip);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       error = xfs_trans_commit(tp, 0);
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-
-       /* the log force ensures this transaction is pushed to disk */
-       xfs_log_force(mp, (flags & SYNC_WAIT) ? XFS_LOG_SYNC : 0);
-       return error;
-}
-
  STATIC int
  xfs_sync_fsdata(
         struct xfs_mount        *mp)
@@ -432,7 +401,7 @@ xfs_quiesce_data(
  
         /* mark the log as covered if needed */
         if (xfs_log_need_covered(mp))
-               error2 = xfs_commit_dummy_trans(mp, SYNC_WAIT);
+               error2 = xfs_fs_log_dummy(mp, SYNC_WAIT);
  
         /* flush data-only devices */
         if (mp->m_rtdev_targp)
@@ -563,7 +532,7 @@ xfs_flush_inodes(
  /*
   * Every sync period we need to unpin all items, reclaim inodes and sync
   * disk quotas.  We might need to cover the log to indicate that the
- * filesystem is idle.
+ * filesystem is idle and not frozen.
   */
  STATIC void
  xfs_sync_worker(
@@ -577,8 +546,9 @@ xfs_sync_worker(
                 xfs_reclaim_inodes(mp, 0);
                 /* dgc: errors ignored here */
                 error = xfs_qm_sync(mp, SYNC_TRYLOCK);
-               if (xfs_log_need_covered(mp))
-                       error = xfs_commit_dummy_trans(mp, 0);
+               if (mp->m_super->s_frozen == SB_UNFROZEN &&
+                   xfs_log_need_covered(mp))
+                       error = xfs_fs_log_dummy(mp, 0);
         }
         mp->m_sync_seq++;
         wake_up(&mp->m_wait_single_sync_task);
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c

index dbca5f5c37bad18fc220ce21f551d17b58f0c13a..43b1d56993350ba3af58c53757be99dfa936c017 100644 (file)
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -604,31 +604,36 @@ out:
         return 0;
  }
  
+/*
+ * Dump a transaction into the log that contains no real change. This is needed
+ * to be able to make the log dirty or stamp the current tail LSN into the log
+ * during the covering operation.
+ *
+ * We cannot use an inode here for this - that will push dirty state back up
+ * into the VFS and then periodic inode flushing will prevent log covering from
+ * making progress. Hence we log a field in the superblock instead.
+ */
  int
  xfs_fs_log_dummy(
-       xfs_mount_t     *mp)
+       xfs_mount_t     *mp,
+       int             flags)
  {
         xfs_trans_t     *tp;
-       xfs_inode_t     *ip;
         int             error;
  
         tp = _xfs_trans_alloc(mp, XFS_TRANS_DUMMY1, KM_SLEEP);
-       error = xfs_trans_reserve(tp, 0, XFS_ICHANGE_LOG_RES(mp), 0, 0, 0);
+       error = xfs_trans_reserve(tp, 0, mp->m_sb.sb_sectsize + 128, 0, 0,
+                                       XFS_DEFAULT_LOG_COUNT);
         if (error) {
                 xfs_trans_cancel(tp, 0);
                 return error;
         }
  
-       ip = mp->m_rootip;
-       xfs_ilock(ip, XFS_ILOCK_EXCL);
-
-       xfs_trans_ijoin(tp, ip);
-       xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
-       xfs_trans_set_sync(tp);
-       error = xfs_trans_commit(tp, 0);
-
-       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-       return error;
+       /* log the UUID because it is an unchanging field */
+       xfs_mod_sb(tp, XFS_SB_UUID);
+       if (flags & SYNC_WAIT)
+               xfs_trans_set_sync(tp);
+       return xfs_trans_commit(tp, 0);
  }
  
  int
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h

index 88435e0a77c9b2f67035344e8f3afc58532d4d5c..a786c5212c1e478677e46f2725105a010ebd1262 100644 (file)
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,6 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
  extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
                                 xfs_fsop_resblks_t *outval);
  extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
-extern int xfs_fs_log_dummy(xfs_mount_t *mp);
+extern int xfs_fs_log_dummy(xfs_mount_t *mp, int flags);
  
  #endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_ialloc.c b/fs/xfs/xfs_ialloc.c

index abf80ae1e95bed43db56f17dd8a7ffc06039a60a..5371d2dc360ebde33776d7f44d87e4c79afe210e 100644 (file)
--- a/fs/xfs/xfs_ialloc.c
+++ b/fs/xfs/xfs_ialloc.c
@@ -1213,7 +1213,6 @@ xfs_imap_lookup(
         struct xfs_inobt_rec_incore rec;
         struct xfs_btree_cur    *cur;
         struct xfs_buf          *agbp;
-       xfs_agino_t             startino;
         int                     error;
         int                     i;
  
@@ -1227,13 +1226,13 @@ xfs_imap_lookup(
         }
  
         /*
-        * derive and lookup the exact inode record for the given agino. If the
-        * record cannot be found, then it's an invalid inode number and we
-        * should abort.
+        * Lookup the inode record for the given agino. If the record cannot be
+        * found, then it's an invalid inode number and we should abort. Once
+        * we have a record, we need to ensure it contains the inode number
+        * we are looking up.
          */
         cur = xfs_inobt_init_cursor(mp, tp, agbp, agno);
-       startino = agino & ~(XFS_IALLOC_INODES(mp) - 1);
-       error = xfs_inobt_lookup(cur, startino, XFS_LOOKUP_EQ, &i);
+       error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &i);
         if (!error) {
                 if (i)
                         error = xfs_inobt_get_rec(cur, &rec, &i);
@@ -1246,6 +1245,11 @@ xfs_imap_lookup(
         if (error)
                 return error;
  
+       /* check that the returned record contains the required inode */
+       if (rec.ir_startino > agino ||
+           rec.ir_startino + XFS_IALLOC_INODES(mp) <= agino)
+               return EINVAL;
+
         /* for untrusted inodes check it is allocated first */
         if ((flags & XFS_IGET_UNTRUSTED) &&
             (rec.ir_free & XFS_INOBT_MASK(agino - rec.ir_startino)))
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c

index 68415cb4f23cab39861119c68838b1b099029df2..34798f391c49349018f04a47d625c6aafa035bea 100644 (file)
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -1914,6 +1914,11 @@ xfs_iunlink_remove(
         return 0;
  }
  
+/*
+ * A big issue when freeing the inode cluster is is that we _cannot_ skip any
+ * inodes that are in memory - they all must be marked stale and attached to
+ * the cluster buffer.
+ */
  STATIC void
  xfs_ifree_cluster(
         xfs_inode_t     *free_ip,
@@ -1945,8 +1950,6 @@ xfs_ifree_cluster(
         }
  
         for (j = 0; j < nbufs; j++, inum += ninodes) {
-               int     found = 0;
-
                 blkno = XFS_AGB_TO_DADDR(mp, XFS_INO_TO_AGNO(mp, inum),
                                          XFS_INO_TO_AGBNO(mp, inum));
  
@@ -1965,7 +1968,9 @@ xfs_ifree_cluster(
                 /*
                  * Walk the inodes already attached to the buffer and mark them
                  * stale. These will all have the flush locks held, so an
-                * in-memory inode walk can't lock them.
+                * in-memory inode walk can't lock them. By marking them all
+                * stale first, we will not attempt to lock them in the loop
+                * below as the XFS_ISTALE flag will be set.
                  */
                 lip = XFS_BUF_FSPRIVATE(bp, xfs_log_item_t *);
                 while (lip) {
@@ -1977,11 +1982,11 @@ xfs_ifree_cluster(
                                                         &iip->ili_flush_lsn,
                                                         &iip->ili_item.li_lsn);
                                 xfs_iflags_set(iip->ili_inode, XFS_ISTALE);
-                               found++;
                         }
                         lip = lip->li_bio_list;
                 }
  
+
                 /*
                  * For each inode in memory attempt to add it to the inode
                  * buffer and set it up for being staled on buffer IO
@@ -1993,6 +1998,7 @@ xfs_ifree_cluster(
                  * even trying to lock them.
                  */
                 for (i = 0; i < ninodes; i++) {
+retry:
                         read_lock(&pag->pag_ici_lock);
                         ip = radix_tree_lookup(&pag->pag_ici_root,
                                         XFS_INO_TO_AGINO(mp, (inum + i)));
@@ -2003,38 +2009,36 @@ xfs_ifree_cluster(
                                 continue;
                         }
  
-                       /* don't try to lock/unlock the current inode */
+                       /*
+                        * Don't try to lock/unlock the current inode, but we
+                        * _cannot_ skip the other inodes that we did not find
+                        * in the list attached to the buffer and are not
+                        * already marked stale. If we can't lock it, back off
+                        * and retry.
+                        */
                         if (ip != free_ip &&
                             !xfs_ilock_nowait(ip, XFS_ILOCK_EXCL)) {
                                 read_unlock(&pag->pag_ici_lock);
-                               continue;
+                               delay(1);
+                               goto retry;
                         }
                         read_unlock(&pag->pag_ici_lock);
  
-                       if (!xfs_iflock_nowait(ip)) {
-                               if (ip != free_ip)
-                                       xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                               continue;
-                       }
-
+                       xfs_iflock(ip);
                         xfs_iflags_set(ip, XFS_ISTALE);
-                       if (xfs_inode_clean(ip)) {
-                               ASSERT(ip != free_ip);
-                               xfs_ifunlock(ip);
-                               xfs_iunlock(ip, XFS_ILOCK_EXCL);
-                               continue;
-                       }
  
+                       /*
+                        * we don't need to attach clean inodes or those only
+                        * with unlogged changes (which we throw away, anyway).
+                        */
                         iip = ip->i_itemp;
-                       if (!iip) {
-                               /* inode with unlogged changes only */
+                       if (!iip || xfs_inode_clean(ip)) {
                                 ASSERT(ip != free_ip);
                                 ip->i_update_core = 0;
                                 xfs_ifunlock(ip);
                                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
                                 continue;
                         }
-                       found++;
  
                         iip->ili_last_fields = iip->ili_format.ilf_fields;
                         iip->ili_format.ilf_fields = 0;
@@ -2049,8 +2053,7 @@ xfs_ifree_cluster(
                                 xfs_iunlock(ip, XFS_ILOCK_EXCL);
                 }
  
-               if (found)
-                       xfs_trans_stale_inode_buf(tp, bp);
+               xfs_trans_stale_inode_buf(tp, bp);
                 xfs_trans_binval(tp, bp);
         }
  
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c

index 925d572bf0f405e9a94be11b74549c5f760b3ec4..33f718f92a4849df234880dfe190069791d7bcb6 100644 (file)
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -3015,7 +3015,8 @@ _xfs_log_force(
  
         XFS_STATS_INC(xs_log_force);
  
-       xlog_cil_push(log, 1);
+       if (log->l_cilp)
+               xlog_cil_force(log);
  
         spin_lock(&log->l_icloglock);
  
@@ -3167,7 +3168,7 @@ _xfs_log_force_lsn(
         XFS_STATS_INC(xs_log_force);
  
         if (log->l_cilp) {
-               lsn = xlog_cil_push_lsn(log, lsn);
+               lsn = xlog_cil_force_lsn(log, lsn);
                 if (lsn == NULLCOMMITLSN)
                         return 0;
         }
@@ -3724,7 +3725,7 @@ xfs_log_force_umount(
          * call below.
          */
         if (!logerror && (mp->m_flags & XFS_MOUNT_DELAYLOG))
-               xlog_cil_push(log, 1);
+               xlog_cil_force(log);
  
         /*
          * We must hold both the GRANT lock and the LOG lock,
diff --git a/fs/xfs/xfs_log_cil.c b/fs/xfs/xfs_log_cil.c

index 31e4ea2d19acfc08f069813dffa367e6b2420973..ed575fb4b49597806200f676680ff787d5be9d12 100644 (file)
--- a/fs/xfs/xfs_log_cil.c
+++ b/fs/xfs/xfs_log_cil.c
@@ -68,6 +68,7 @@ xlog_cil_init(
         ctx->sequence = 1;
         ctx->cil = cil;
         cil->xc_ctx = ctx;
+       cil->xc_current_sequence = ctx->sequence;
  
         cil->xc_log = log;
         log->l_cilp = cil;
@@ -269,15 +270,10 @@ xlog_cil_insert(
  static void
  xlog_cil_format_items(
         struct log              *log,
-       struct xfs_log_vec      *log_vector,
-       struct xlog_ticket      *ticket,
-       xfs_lsn_t               *start_lsn)
+       struct xfs_log_vec      *log_vector)
  {
         struct xfs_log_vec *lv;
  
-       if (start_lsn)
-               *start_lsn = log->l_cilp->xc_ctx->sequence;
-
         ASSERT(log_vector);
         for (lv = log_vector; lv; lv = lv->lv_next) {
                 void    *ptr;
@@ -301,9 +297,24 @@ xlog_cil_format_items(
                         ptr += vec->i_len;
                 }
                 ASSERT(ptr == lv->lv_buf + lv->lv_buf_len);
+       }
+}
+
+static void
+xlog_cil_insert_items(
+       struct log              *log,
+       struct xfs_log_vec      *log_vector,
+       struct xlog_ticket      *ticket,
+       xfs_lsn_t               *start_lsn)
+{
+       struct xfs_log_vec *lv;
+
+       if (start_lsn)
+               *start_lsn = log->l_cilp->xc_ctx->sequence;
  
+       ASSERT(log_vector);
+       for (lv = log_vector; lv; lv = lv->lv_next)
                 xlog_cil_insert(log, ticket, lv->lv_item, lv);
-       }
  }
  
  static void
@@ -320,80 +331,6 @@ xlog_cil_free_logvec(
         }
  }
  
-/*
- * Commit a transaction with the given vector to the Committed Item List.
- *
- * To do this, we need to format the item, pin it in memory if required and
- * account for the space used by the transaction. Once we have done that we
- * need to release the unused reservation for the transaction, attach the
- * transaction to the checkpoint context so we carry the busy extents through
- * to checkpoint completion, and then unlock all the items in the transaction.
- *
- * For more specific information about the order of operations in
- * xfs_log_commit_cil() please refer to the comments in
- * xfs_trans_commit_iclog().
- *
- * Called with the context lock already held in read mode to lock out
- * background commit, returns without it held once background commits are
- * allowed again.
- */
-int
-xfs_log_commit_cil(
-       struct xfs_mount        *mp,
-       struct xfs_trans        *tp,
-       struct xfs_log_vec      *log_vector,
-       xfs_lsn_t               *commit_lsn,
-       int                     flags)
-{
-       struct log              *log = mp->m_log;
-       int                     log_flags = 0;
-       int                     push = 0;
-
-       if (flags & XFS_TRANS_RELEASE_LOG_RES)
-               log_flags = XFS_LOG_REL_PERM_RESERV;
-
-       if (XLOG_FORCED_SHUTDOWN(log)) {
-               xlog_cil_free_logvec(log_vector);
-               return XFS_ERROR(EIO);
-       }
-
-       /* lock out background commit */
-       down_read(&log->l_cilp->xc_ctx_lock);
-       xlog_cil_format_items(log, log_vector, tp->t_ticket, commit_lsn);
-
-       /* check we didn't blow the reservation */
-       if (tp->t_ticket->t_curr_res < 0)
-               xlog_print_tic_res(log->l_mp, tp->t_ticket);
-
-       /* attach the transaction to the CIL if it has any busy extents */
-       if (!list_empty(&tp->t_busy)) {
-               spin_lock(&log->l_cilp->xc_cil_lock);
-               list_splice_init(&tp->t_busy,
-                                       &log->l_cilp->xc_ctx->busy_extents);
-               spin_unlock(&log->l_cilp->xc_cil_lock);
-       }
-
-       tp->t_commit_lsn = *commit_lsn;
-       xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
-       xfs_trans_unreserve_and_mod_sb(tp);
-
-       /* check for background commit before unlock */
-       if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
-               push = 1;
-       up_read(&log->l_cilp->xc_ctx_lock);
-
-       /*
-        * We need to push CIL every so often so we don't cache more than we
-        * can fit in the log. The limit really is that a checkpoint can't be
-        * more than half the log (the current checkpoint is not allowed to
-        * overwrite the previous checkpoint), but commit latency and memory
-        * usage limit this to a smaller size in most cases.
-        */
-       if (push)
-               xlog_cil_push(log, 0);
-       return 0;
-}
-
  /*
   * Mark all items committed and clear busy extents. We free the log vector
   * chains in a separate pass so that we unpin the log items as quickly as
@@ -427,13 +364,23 @@ xlog_cil_committed(
  }
  
  /*
- * Push the Committed Item List to the log. If the push_now flag is not set,
- * then it is a background flush and so we can chose to ignore it.
+ * Push the Committed Item List to the log. If @push_seq flag is zero, then it
+ * is a background flush and so we can chose to ignore it. Otherwise, if the
+ * current sequence is the same as @push_seq we need to do a flush. If
+ * @push_seq is less than the current sequence, then it has already been
+ * flushed and we don't need to do anything - the caller will wait for it to
+ * complete if necessary.
+ *
+ * @push_seq is a value rather than a flag because that allows us to do an
+ * unlocked check of the sequence number for a match. Hence we can allows log
+ * forces to run racily and not issue pushes for the same sequence twice. If we
+ * get a race between multiple pushes for the same sequence they will block on
+ * the first one and then abort, hence avoiding needless pushes.
   */
-int
+STATIC int
  xlog_cil_push(
         struct log              *log,
-       int                     push_now)
+       xfs_lsn_t               push_seq)
  {
         struct xfs_cil          *cil = log->l_cilp;
         struct xfs_log_vec      *lv;
@@ -453,12 +400,14 @@ xlog_cil_push(
         if (!cil)
                 return 0;
  
+       ASSERT(!push_seq || push_seq <= cil->xc_ctx->sequence);
+
         new_ctx = kmem_zalloc(sizeof(*new_ctx), KM_SLEEP|KM_NOFS);
         new_ctx->ticket = xlog_cil_ticket_alloc(log);
  
         /* lock out transaction commit, but don't block on background push */
         if (!down_write_trylock(&cil->xc_ctx_lock)) {
-               if (!push_now)
+               if (!push_seq)
                         goto out_free_ticket;
                 down_write(&cil->xc_ctx_lock);
         }
@@ -469,7 +418,11 @@ xlog_cil_push(
                 goto out_skip;
  
         /* check for spurious background flush */
-       if (!push_now && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+       if (!push_seq && cil->xc_ctx->space_used < XLOG_CIL_SPACE_LIMIT(log))
+               goto out_skip;
+
+       /* check for a previously pushed seqeunce */
+       if (push_seq < cil->xc_ctx->sequence)
                 goto out_skip;
  
         /*
@@ -514,6 +467,13 @@ xlog_cil_push(
         new_ctx->cil = cil;
         cil->xc_ctx = new_ctx;
  
+       /*
+        * mirror the new sequence into the cil structure so that we can do
+        * unlocked checks against the current sequence in log forces without
+        * risking deferencing a freed context pointer.
+        */
+       cil->xc_current_sequence = new_ctx->sequence;
+
         /*
          * The switch is now done, so we can drop the context lock and move out
          * of a shared context. We can't just go straight to the commit record,
@@ -625,6 +585,102 @@ out_abort:
         return XFS_ERROR(EIO);
  }
  
+/*
+ * Commit a transaction with the given vector to the Committed Item List.
+ *
+ * To do this, we need to format the item, pin it in memory if required and
+ * account for the space used by the transaction. Once we have done that we
+ * need to release the unused reservation for the transaction, attach the
+ * transaction to the checkpoint context so we carry the busy extents through
+ * to checkpoint completion, and then unlock all the items in the transaction.
+ *
+ * For more specific information about the order of operations in
+ * xfs_log_commit_cil() please refer to the comments in
+ * xfs_trans_commit_iclog().
+ *
+ * Called with the context lock already held in read mode to lock out
+ * background commit, returns without it held once background commits are
+ * allowed again.
+ */
+int
+xfs_log_commit_cil(
+       struct xfs_mount        *mp,
+       struct xfs_trans        *tp,
+       struct xfs_log_vec      *log_vector,
+       xfs_lsn_t               *commit_lsn,
+       int                     flags)
+{
+       struct log              *log = mp->m_log;
+       int                     log_flags = 0;
+       int                     push = 0;
+
+       if (flags & XFS_TRANS_RELEASE_LOG_RES)
+               log_flags = XFS_LOG_REL_PERM_RESERV;
+
+       if (XLOG_FORCED_SHUTDOWN(log)) {
+               xlog_cil_free_logvec(log_vector);
+               return XFS_ERROR(EIO);
+       }
+
+       /*
+        * do all the hard work of formatting items (including memory
+        * allocation) outside the CIL context lock. This prevents stalling CIL
+        * pushes when we are low on memory and a transaction commit spends a
+        * lot of time in memory reclaim.
+        */
+       xlog_cil_format_items(log, log_vector);
+
+       /* lock out background commit */
+       down_read(&log->l_cilp->xc_ctx_lock);
+       xlog_cil_insert_items(log, log_vector, tp->t_ticket, commit_lsn);
+
+       /* check we didn't blow the reservation */
+       if (tp->t_ticket->t_curr_res < 0)
+               xlog_print_tic_res(log->l_mp, tp->t_ticket);
+
+       /* attach the transaction to the CIL if it has any busy extents */
+       if (!list_empty(&tp->t_busy)) {
+               spin_lock(&log->l_cilp->xc_cil_lock);
+               list_splice_init(&tp->t_busy,
+                                       &log->l_cilp->xc_ctx->busy_extents);
+               spin_unlock(&log->l_cilp->xc_cil_lock);
+       }
+
+       tp->t_commit_lsn = *commit_lsn;
+       xfs_log_done(mp, tp->t_ticket, NULL, log_flags);
+       xfs_trans_unreserve_and_mod_sb(tp);
+
+       /*
+        * Once all the items of the transaction have been copied to the CIL,
+        * the items can be unlocked and freed.
+        *
+        * This needs to be done before we drop the CIL context lock because we
+        * have to update state in the log items and unlock them before they go
+        * to disk. If we don't, then the CIL checkpoint can race with us and
+        * we can run checkpoint completion before we've updated and unlocked
+        * the log items. This affects (at least) processing of stale buffers,
+        * inodes and EFIs.
+        */
+       xfs_trans_free_items(tp, *commit_lsn, 0);
+
+       /* check for background commit before unlock */
+       if (log->l_cilp->xc_ctx->space_used > XLOG_CIL_SPACE_LIMIT(log))
+               push = 1;
+
+       up_read(&log->l_cilp->xc_ctx_lock);
+
+       /*
+        * We need to push CIL every so often so we don't cache more than we
+        * can fit in the log. The limit really is that a checkpoint can't be
+        * more than half the log (the current checkpoint is not allowed to
+        * overwrite the previous checkpoint), but commit latency and memory
+        * usage limit this to a smaller size in most cases.
+        */
+       if (push)
+               xlog_cil_push(log, 0);
+       return 0;
+}
+
  /*
   * Conditionally push the CIL based on the sequence passed in.
   *
@@ -639,39 +695,34 @@ out_abort:
   * commit lsn is there. It'll be empty, so this is broken for now.
   */
  xfs_lsn_t
-xlog_cil_push_lsn(
+xlog_cil_force_lsn(
         struct log      *log,
-       xfs_lsn_t       push_seq)
+       xfs_lsn_t       sequence)
  {
         struct xfs_cil          *cil = log->l_cilp;
         struct xfs_cil_ctx      *ctx;
         xfs_lsn_t               commit_lsn = NULLCOMMITLSN;
  
-restart:
-       down_write(&cil->xc_ctx_lock);
-       ASSERT(push_seq <= cil->xc_ctx->sequence);
-
-       /* check to see if we need to force out the current context */
-       if (push_seq == cil->xc_ctx->sequence) {
-               up_write(&cil->xc_ctx_lock);
-               xlog_cil_push(log, 1);
-               goto restart;
-       }
+       ASSERT(sequence <= cil->xc_current_sequence);
+
+       /*
+        * check to see if we need to force out the current context.
+        * xlog_cil_push() handles racing pushes for the same sequence,
+        * so no need to deal with it here.
+        */
+       if (sequence == cil->xc_current_sequence)
+               xlog_cil_push(log, sequence);
  
         /*
          * See if we can find a previous sequence still committing.
-        * We can drop the flush lock as soon as we have the cil lock
-        * because we are now only comparing contexts protected by
-        * the cil lock.
-        *
          * We need to wait for all previous sequence commits to complete
          * before allowing the force of push_seq to go ahead. Hence block
          * on commits for those as well.
          */
+restart:
         spin_lock(&cil->xc_cil_lock);
-       up_write(&cil->xc_ctx_lock);
         list_for_each_entry(ctx, &cil->xc_committing, committing) {
-               if (ctx->sequence > push_seq)
+               if (ctx->sequence > sequence)
                         continue;
                 if (!ctx->commit_lsn) {
                         /*
@@ -681,7 +732,7 @@ restart:
                         sv_wait(&cil->xc_commit_wait, 0, &cil->xc_cil_lock, 0);
                         goto restart;
                 }
-               if (ctx->sequence != push_seq)
+               if (ctx->sequence != sequence)
                         continue;
                 /* found it! */
                 commit_lsn = ctx->commit_lsn;
diff --git a/fs/xfs/xfs_log_priv.h b/fs/xfs/xfs_log_priv.h

index 8c072618965caf80476a562e7e38cae026089f88..ced52b98b322e3eb1be0e0c7dfc70f6096d0cd80 100644 (file)
--- a/fs/xfs/xfs_log_priv.h
+++ b/fs/xfs/xfs_log_priv.h
@@ -422,6 +422,7 @@ struct xfs_cil {
         struct rw_semaphore     xc_ctx_lock;
         struct list_head        xc_committing;
         sv_t                    xc_commit_wait;
+       xfs_lsn_t               xc_current_sequence;
  };
  
  /*
@@ -562,8 +563,16 @@ int        xlog_cil_init(struct log *log);
  void   xlog_cil_init_post_recovery(struct log *log);
  void   xlog_cil_destroy(struct log *log);
  
-int    xlog_cil_push(struct log *log, int push_now);
-xfs_lsn_t xlog_cil_push_lsn(struct log *log, xfs_lsn_t push_sequence);
+/*
+ * CIL force routines
+ */
+xfs_lsn_t xlog_cil_force_lsn(struct log *log, xfs_lsn_t sequence);
+
+static inline void
+xlog_cil_force(struct log *log)
+{
+       xlog_cil_force_lsn(log, log->l_cilp->xc_current_sequence);
+}
  
  /*
   * Unmount record type is used as a pseudo transaction type for the ticket.
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c

index fdca7416c754636a26dae9a312003932f7306d9f..1c47edaea0d28f4def851e87f664bd9339ac19b7 100644 (file)
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1167,7 +1167,7 @@ xfs_trans_del_item(
   * Unlock all of the items of a transaction and free all the descriptors
   * of that transaction.
   */
-STATIC void
+void
  xfs_trans_free_items(
         struct xfs_trans        *tp,
         xfs_lsn_t               commit_lsn,
@@ -1653,9 +1653,6 @@ xfs_trans_commit_cil(
                 return error;
  
         current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS);
-
-       /* xfs_trans_free_items() unlocks them first */
-       xfs_trans_free_items(tp, *commit_lsn, 0);
         xfs_trans_free(tp);
         return 0;
  }
diff --git a/fs/xfs/xfs_trans_priv.h b/fs/xfs/xfs_trans_priv.h

index e2d93d8ead7b68b9b6ab74b1869a3f8e10721981..62da86c90de53bb36b3fd2928d539ae9fc339478 100644 (file)
--- a/fs/xfs/xfs_trans_priv.h
+++ b/fs/xfs/xfs_trans_priv.h
@@ -25,7 +25,8 @@ struct xfs_trans;
  
  void   xfs_trans_add_item(struct xfs_trans *, struct xfs_log_item *);
  void   xfs_trans_del_item(struct xfs_log_item *);
-
+void   xfs_trans_free_items(struct xfs_trans *tp, xfs_lsn_t commit_lsn,
+                               int flags);
  void   xfs_trans_item_committed(struct xfs_log_item *lip,
                                 xfs_lsn_t commit_lsn, int aborted);
  void   xfs_trans_unreserve_and_mod_sb(struct xfs_trans *tp);
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c

index 806d1b227a21060aac100994a8992266c00b59b5..ab661ebc4895a8471ecc808825477cf0c3558444 100644 (file)
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -3752,6 +3752,8 @@ static void task_fork_fair(struct task_struct *p)
  
         raw_spin_lock_irqsave(&rq->lock, flags);
  
+       update_rq_clock(rq);
+
         if (unlikely(task_cpu(p) != this_cpu))
                 __set_task_cpu(p, this_cpu);
  
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index c09ef5219cbe36f267a37f55d6fc670815082522..a803f5e33471fe6092a11cf233fc29d354d9c2b4 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -985,22 +985,16 @@ continue_unlock:
                                 }
                         }
  
-                       if (wbc->nr_to_write > 0) {
-                               if (--wbc->nr_to_write == 0 &&
-                                   wbc->sync_mode == WB_SYNC_NONE) {
-                                       /*
-                                        * We stop writing back only if we are
-                                        * not doing integrity sync. In case of
-                                        * integrity sync we have to keep going
-                                        * because someone may be concurrently
-                                        * dirtying pages, and we might have
-                                        * synced a lot of newly appeared dirty
-                                        * pages, but have not synced all of the
-                                        * old dirty pages.
-                                        */
-                                       done = 1;
-                                       break;
-                               }
+                       /*
+                        * We stop writing back only if we are not doing
+                        * integrity sync. In case of integrity sync we have to
+                        * keep going until we have written all the pages
+                        * we tagged for writeback prior to entering this loop.
+                        */
+                       if (--wbc->nr_to_write <= 0 &&
+                           wbc->sync_mode == WB_SYNC_NONE) {
+                               done = 1;
+                               break;
                         }
                 }
                 pagevec_release(&pvec);
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 25 Aug 2010 15:41:18 +0000 (08:41 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 25 Aug 2010 15:41:18 +0000 (08:41 -0700)
arch/x86/include/asm/tsc.h		patch \| blob \| blame \| history
arch/x86/kernel/tsc.c		patch \| blob \| blame \| history
arch/x86/power/cpu.c		patch \| blob \| blame \| history
drivers/xen/events.c		patch \| blob \| blame \| history
fs/xfs/linux-2.6/xfs_aops.c		patch \| blob \| blame \| history
fs/xfs/linux-2.6/xfs_super.c		patch \| blob \| blame \| history
fs/xfs/linux-2.6/xfs_sync.c		patch \| blob \| blame \| history
fs/xfs/xfs_fsops.c		patch \| blob \| blame \| history
fs/xfs/xfs_fsops.h		patch \| blob \| blame \| history
fs/xfs/xfs_ialloc.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.c		patch \| blob \| blame \| history
fs/xfs/xfs_log.c		patch \| blob \| blame \| history
fs/xfs/xfs_log_cil.c		patch \| blob \| blame \| history
fs/xfs/xfs_log_priv.h		patch \| blob \| blame \| history
fs/xfs/xfs_trans.c		patch \| blob \| blame \| history
fs/xfs/xfs_trans_priv.h		patch \| blob \| blame \| history
kernel/sched_fair.c		patch \| blob \| blame \| history
mm/page-writeback.c		patch \| blob \| blame \| history