Merge branch 'master' into for-2.6.35

author Jens Axboe <jens.axboe@oracle.com>

Tue, 13 Apr 2010 18:03:21 +0000 (20:03 +0200)

committer Jens Axboe <jens.axboe@oracle.com>

Tue, 13 Apr 2010 18:03:21 +0000 (20:03 +0200)
author Jens Axboe <jens.axboe@oracle.com>
Tue, 13 Apr 2010 18:03:21 +0000 (20:03 +0200)
committer Jens Axboe <jens.axboe@oracle.com>
Tue, 13 Apr 2010 18:03:21 +0000 (20:03 +0200)
diff --git a/Documentation/cgroups/blkio-controller.txt b/Documentation/cgroups/blkio-controller.txt

index 630879cd9a42693cbf2ac9748a6a2b80182d0612..d422b410a995908c1d5c5439963b64253c1b6ffd 100644 (file)
--- a/Documentation/cgroups/blkio-controller.txt
+++ b/Documentation/cgroups/blkio-controller.txt
@@ -76,10 +76,38 @@ CONFIG_DEBUG_BLK_CGROUP
  Details of cgroup files
  =======================
  - blkio.weight
-       - Specifies per cgroup weight.
-
+       - Specifies per cgroup weight. This is default weight of the group
+         on all the devices until and unless overridden by per device rule.
+         (See blkio.weight_device).
           Currently allowed range of weights is from 100 to 1000.
  
+- blkio.weight_device
+       - One can specify per cgroup per device rules using this interface.
+         These rules override the default value of group weight as specified
+         by blkio.weight.
+
+         Following is the format.
+
+         #echo dev_maj:dev_minor weight > /path/to/cgroup/blkio.weight_device
+         Configure weight=300 on /dev/sdb (8:16) in this cgroup
+         # echo 8:16 300 > blkio.weight_device
+         # cat blkio.weight_device
+         dev     weight
+         8:16    300
+
+         Configure weight=500 on /dev/sda (8:0) in this cgroup
+         # echo 8:0 500 > blkio.weight_device
+         # cat blkio.weight_device
+         dev     weight
+         8:0     500
+         8:16    300
+
+         Remove specific weight for /dev/sda in this cgroup
+         # echo 8:0 0 > blkio.weight_device
+         # cat blkio.weight_device
+         dev     weight
+         8:16    300
+
  - blkio.time
         - disk time allocated to cgroup per device in milliseconds. First
           two fields specify the major and minor number of the device and
@@ -92,6 +120,94 @@ Details of cgroup files
           third field specifies the number of sectors transferred by the
           group to/from the device.
  
+- blkio.io_service_bytes
+       - Number of bytes transferred to/from the disk by the group. These
+         are further divided by the type of operation - read or write, sync
+         or async. First two fields specify the major and minor number of the
+         device, third field specifies the operation type and the fourth field
+         specifies the number of bytes.
+
+- blkio.io_serviced
+       - Number of IOs completed to/from the disk by the group. These
+         are further divided by the type of operation - read or write, sync
+         or async. First two fields specify the major and minor number of the
+         device, third field specifies the operation type and the fourth field
+         specifies the number of IOs.
+
+- blkio.io_service_time
+       - Total amount of time between request dispatch and request completion
+         for the IOs done by this cgroup. This is in nanoseconds to make it
+         meaningful for flash devices too. For devices with queue depth of 1,
+         this time represents the actual service time. When queue_depth > 1,
+         that is no longer true as requests may be served out of order. This
+         may cause the service time for a given IO to include the service time
+         of multiple IOs when served out of order which may result in total
+         io_service_time > actual time elapsed. This time is further divided by
+         the type of operation - read or write, sync or async. First two fields
+         specify the major and minor number of the device, third field
+         specifies the operation type and the fourth field specifies the
+         io_service_time in ns.
+
+- blkio.io_wait_time
+       - Total amount of time the IOs for this cgroup spent waiting in the
+         scheduler queues for service. This can be greater than the total time
+         elapsed since it is cumulative io_wait_time for all IOs. It is not a
+         measure of total time the cgroup spent waiting but rather a measure of
+         the wait_time for its individual IOs. For devices with queue_depth > 1
+         this metric does not include the time spent waiting for service once
+         the IO is dispatched to the device but till it actually gets serviced
+         (there might be a time lag here due to re-ordering of requests by the
+         device). This is in nanoseconds to make it meaningful for flash
+         devices too. This time is further divided by the type of operation -
+         read or write, sync or async. First two fields specify the major and
+         minor number of the device, third field specifies the operation type
+         and the fourth field specifies the io_wait_time in ns.
+
+- blkio.io_merged
+       - Total number of bios/requests merged into requests belonging to this
+         cgroup. This is further divided by the type of operation - read or
+         write, sync or async.
+
+- blkio.io_queued
+       - Total number of requests queued up at any given instant for this
+         cgroup. This is further divided by the type of operation - read or
+         write, sync or async.
+
+- blkio.avg_queue_size
+       - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y.
+         The average queue size for this cgroup over the entire time of this
+         cgroup's existence. Queue size samples are taken each time one of the
+         queues of this cgroup gets a timeslice.
+
+- blkio.group_wait_time
+       - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y.
+         This is the amount of time the cgroup had to wait since it became busy
+         (i.e., went from 0 to 1 request queued) to get a timeslice for one of
+         its queues. This is different from the io_wait_time which is the
+         cumulative total of the amount of time spent by each IO in that cgroup
+         waiting in the scheduler queue. This is in nanoseconds. If this is
+         read when the cgroup is in a waiting (for timeslice) state, the stat
+         will only report the group_wait_time accumulated till the last time it
+         got a timeslice and will not include the current delta.
+
+- blkio.empty_time
+       - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y.
+         This is the amount of time a cgroup spends without any pending
+         requests when not being served, i.e., it does not include any time
+         spent idling for one of the queues of the cgroup. This is in
+         nanoseconds. If this is read when the cgroup is in an empty state,
+         the stat will only report the empty_time accumulated till the last
+         time it had a pending request and will not include the current delta.
+
+- blkio.idle_time
+       - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y.
+         This is the amount of time spent by the IO scheduler idling for a
+         given cgroup in anticipation of a better request than the exising ones
+         from other queues/cgroups. This is in nanoseconds. If this is read
+         when the cgroup is in an idling state, the stat will only report the
+         idle_time accumulated till the last idle period and will not include
+         the current delta.
+
  - blkio.dequeue
         - Debugging aid only enabled if CONFIG_DEBUG_CFQ_IOSCHED=y. This
           gives the statistics about how many a times a group was dequeued
@@ -99,6 +215,10 @@ Details of cgroup files
           and minor number of the device and third field specifies the number
           of times a group was dequeued from a particular device.
  
+- blkio.reset_stats
+       - Writing an int to this file will result in resetting all the stats
+         for that cgroup.
+
  CFQ sysfs tunable
  =================
  /sys/block/<disk>/queue/iosched/group_isolation
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c

index 5fe03def34b24c396f299aa57bda4b1cc5a3df2a..aa97cd455cefa91400759968421ec71ed36b33af 100644 (file)
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -15,8 +15,12 @@
  #include <linux/kdev_t.h>
  #include <linux/module.h>
  #include <linux/err.h>
+#include <linux/blkdev.h>
  #include <linux/slab.h>
  #include "blk-cgroup.h"
+#include <linux/genhd.h>
+
+#define MAX_KEY_LEN 100
  
  static DEFINE_SPINLOCK(blkio_list_lock);
  static LIST_HEAD(blkio_list);
@@ -49,6 +53,32 @@ struct cgroup_subsys blkio_subsys = {
  };
  EXPORT_SYMBOL_GPL(blkio_subsys);
  
+static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
+                                           struct blkio_policy_node *pn)
+{
+       list_add(&pn->node, &blkcg->policy_list);
+}
+
+/* Must be called with blkcg->lock held */
+static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
+{
+       list_del(&pn->node);
+}
+
+/* Must be called with blkcg->lock held */
+static struct blkio_policy_node *
+blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev)
+{
+       struct blkio_policy_node *pn;
+
+       list_for_each_entry(pn, &blkcg->policy_list, node) {
+               if (pn->dev == dev)
+                       return pn;
+       }
+
+       return NULL;
+}
+
  struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
  {
         return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
@@ -56,13 +86,262 @@ struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
  }
  EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
  
-void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
-                       unsigned long time, unsigned long sectors)
+void blkio_group_init(struct blkio_group *blkg)
+{
+       spin_lock_init(&blkg->stats_lock);
+}
+EXPORT_SYMBOL_GPL(blkio_group_init);
+
+/*
+ * Add to the appropriate stat variable depending on the request type.
+ * This should be called with the blkg->stats_lock held.
+ */
+static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
+                               bool sync)
+{
+       if (direction)
+               stat[BLKIO_STAT_WRITE] += add;
+       else
+               stat[BLKIO_STAT_READ] += add;
+       if (sync)
+               stat[BLKIO_STAT_SYNC] += add;
+       else
+               stat[BLKIO_STAT_ASYNC] += add;
+}
+
+/*
+ * Decrements the appropriate stat variable if non-zero depending on the
+ * request type. Panics on value being zero.
+ * This should be called with the blkg->stats_lock held.
+ */
+static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
+{
+       if (direction) {
+               BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
+               stat[BLKIO_STAT_WRITE]--;
+       } else {
+               BUG_ON(stat[BLKIO_STAT_READ] == 0);
+               stat[BLKIO_STAT_READ]--;
+       }
+       if (sync) {
+               BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
+               stat[BLKIO_STAT_SYNC]--;
+       } else {
+               BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
+               stat[BLKIO_STAT_ASYNC]--;
+       }
+}
+
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+/* This should be called with the blkg->stats_lock held. */
+static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
+                                               struct blkio_group *curr_blkg)
+{
+       if (blkio_blkg_waiting(&blkg->stats))
+               return;
+       if (blkg == curr_blkg)
+               return;
+       blkg->stats.start_group_wait_time = sched_clock();
+       blkio_mark_blkg_waiting(&blkg->stats);
+}
+
+/* This should be called with the blkg->stats_lock held. */
+static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
+{
+       unsigned long long now;
+
+       if (!blkio_blkg_waiting(stats))
+               return;
+
+       now = sched_clock();
+       if (time_after64(now, stats->start_group_wait_time))
+               stats->group_wait_time += now - stats->start_group_wait_time;
+       blkio_clear_blkg_waiting(stats);
+}
+
+/* This should be called with the blkg->stats_lock held. */
+static void blkio_end_empty_time(struct blkio_group_stats *stats)
+{
+       unsigned long long now;
+
+       if (!blkio_blkg_empty(stats))
+               return;
+
+       now = sched_clock();
+       if (time_after64(now, stats->start_empty_time))
+               stats->empty_time += now - stats->start_empty_time;
+       blkio_clear_blkg_empty(stats);
+}
+
+void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       BUG_ON(blkio_blkg_idling(&blkg->stats));
+       blkg->stats.start_idle_time = sched_clock();
+       blkio_mark_blkg_idling(&blkg->stats);
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
+
+void blkiocg_update_idle_time_stats(struct blkio_group *blkg)
  {
-       blkg->time += time;
-       blkg->sectors += sectors;
+       unsigned long flags;
+       unsigned long long now;
+       struct blkio_group_stats *stats;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       stats = &blkg->stats;
+       if (blkio_blkg_idling(stats)) {
+               now = sched_clock();
+               if (time_after64(now, stats->start_idle_time))
+                       stats->idle_time += now - stats->start_idle_time;
+               blkio_clear_blkg_idling(stats);
+       }
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
+
+void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
+{
+       unsigned long flags;
+       struct blkio_group_stats *stats;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       stats = &blkg->stats;
+       stats->avg_queue_size_sum +=
+                       stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
+                       stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
+       stats->avg_queue_size_samples++;
+       blkio_update_group_wait_time(stats);
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
  }
-EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_stats);
+EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
+
+void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
+                       unsigned long dequeue)
+{
+       blkg->stats.dequeue += dequeue;
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
+#else
+static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
+                                       struct blkio_group *curr_blkg) {}
+static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {}
+#endif
+
+void blkiocg_update_io_add_stats(struct blkio_group *blkg,
+                       struct blkio_group *curr_blkg, bool direction,
+                       bool sync)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
+                       sync);
+       blkio_end_empty_time(&blkg->stats);
+       blkio_set_start_group_wait_time(blkg, curr_blkg);
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
+
+void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
+                                               bool direction, bool sync)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED],
+                                       direction, sync);
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
+
+void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       blkg->stats.time += time;
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
+
+void blkiocg_set_start_empty_time(struct blkio_group *blkg, bool ignore)
+{
+       unsigned long flags;
+       struct blkio_group_stats *stats;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       stats = &blkg->stats;
+
+       if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
+                       stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
+               spin_unlock_irqrestore(&blkg->stats_lock, flags);
+               return;
+       }
+
+       /*
+        * If ignore is set, we do not panic on the empty flag being set
+        * already. This is to avoid cases where there are superfluous timeslice
+        * complete events (for eg., forced_dispatch in CFQ) when no IOs are
+        * served which could result in triggering the empty check incorrectly.
+        */
+       BUG_ON(!ignore && blkio_blkg_empty(stats));
+       stats->start_empty_time = sched_clock();
+       blkio_mark_blkg_empty(stats);
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
+
+void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
+                               uint64_t bytes, bool direction, bool sync)
+{
+       struct blkio_group_stats *stats;
+       unsigned long flags;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       stats = &blkg->stats;
+       stats->sectors += bytes >> 9;
+       blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
+                       sync);
+       blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
+                       direction, sync);
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
+
+void blkiocg_update_completion_stats(struct blkio_group *blkg,
+       uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
+{
+       struct blkio_group_stats *stats;
+       unsigned long flags;
+       unsigned long long now = sched_clock();
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       stats = &blkg->stats;
+       if (time_after64(now, io_start_time))
+               blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
+                               now - io_start_time, direction, sync);
+       if (time_after64(io_start_time, start_time))
+               blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
+                               io_start_time - start_time, direction, sync);
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
+
+void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
+                                       bool sync)
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&blkg->stats_lock, flags);
+       blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
+                       sync);
+       spin_unlock_irqrestore(&blkg->stats_lock, flags);
+}
+EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
  
  void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
                         struct blkio_group *blkg, void *key, dev_t dev)
@@ -154,6 +433,7 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
         struct blkio_group *blkg;
         struct hlist_node *n;
         struct blkio_policy_type *blkiop;
+       struct blkio_policy_node *pn;
  
         if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
                 return -EINVAL;
@@ -162,7 +442,13 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
         spin_lock(&blkio_list_lock);
         spin_lock_irq(&blkcg->lock);
         blkcg->weight = (unsigned int)val;
+
         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+               pn = blkio_policy_search_node(blkcg, blkg->dev);
+
+               if (pn)
+                       continue;
+
                 list_for_each_entry(blkiop, &blkio_list, list)
                         blkiop->ops.blkio_update_group_weight_fn(blkg,
                                         blkcg->weight);
@@ -172,13 +458,154 @@ blkiocg_weight_write(struct cgroup *cgroup, struct cftype *cftype, u64 val)
         return 0;
  }
  
-#define SHOW_FUNCTION_PER_GROUP(__VAR)                                 \
+static int
+blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
+{
+       struct blkio_cgroup *blkcg;
+       struct blkio_group *blkg;
+       struct blkio_group_stats *stats;
+       struct hlist_node *n;
+       uint64_t queued[BLKIO_STAT_TOTAL];
+       int i;
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+       bool idling, waiting, empty;
+       unsigned long long now = sched_clock();
+#endif
+
+       blkcg = cgroup_to_blkio_cgroup(cgroup);
+       spin_lock_irq(&blkcg->lock);
+       hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+               spin_lock(&blkg->stats_lock);
+               stats = &blkg->stats;
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+               idling = blkio_blkg_idling(stats);
+               waiting = blkio_blkg_waiting(stats);
+               empty = blkio_blkg_empty(stats);
+#endif
+               for (i = 0; i < BLKIO_STAT_TOTAL; i++)
+                       queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
+               memset(stats, 0, sizeof(struct blkio_group_stats));
+               for (i = 0; i < BLKIO_STAT_TOTAL; i++)
+                       stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+               if (idling) {
+                       blkio_mark_blkg_idling(stats);
+                       stats->start_idle_time = now;
+               }
+               if (waiting) {
+                       blkio_mark_blkg_waiting(stats);
+                       stats->start_group_wait_time = now;
+               }
+               if (empty) {
+                       blkio_mark_blkg_empty(stats);
+                       stats->start_empty_time = now;
+               }
+#endif
+               spin_unlock(&blkg->stats_lock);
+       }
+       spin_unlock_irq(&blkcg->lock);
+       return 0;
+}
+
+static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str,
+                               int chars_left, bool diskname_only)
+{
+       snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev));
+       chars_left -= strlen(str);
+       if (chars_left <= 0) {
+               printk(KERN_WARNING
+                       "Possibly incorrect cgroup stat display format");
+               return;
+       }
+       if (diskname_only)
+               return;
+       switch (type) {
+       case BLKIO_STAT_READ:
+               strlcat(str, " Read", chars_left);
+               break;
+       case BLKIO_STAT_WRITE:
+               strlcat(str, " Write", chars_left);
+               break;
+       case BLKIO_STAT_SYNC:
+               strlcat(str, " Sync", chars_left);
+               break;
+       case BLKIO_STAT_ASYNC:
+               strlcat(str, " Async", chars_left);
+               break;
+       case BLKIO_STAT_TOTAL:
+               strlcat(str, " Total", chars_left);
+               break;
+       default:
+               strlcat(str, " Invalid", chars_left);
+       }
+}
+
+static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
+                               struct cgroup_map_cb *cb, dev_t dev)
+{
+       blkio_get_key_name(0, dev, str, chars_left, true);
+       cb->fill(cb, str, val);
+       return val;
+}
+
+/* This should be called with blkg->stats_lock held */
+static uint64_t blkio_get_stat(struct blkio_group *blkg,
+               struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
+{
+       uint64_t disk_total;
+       char key_str[MAX_KEY_LEN];
+       enum stat_sub_type sub_type;
+
+       if (type == BLKIO_STAT_TIME)
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+                                       blkg->stats.time, cb, dev);
+       if (type == BLKIO_STAT_SECTORS)
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+                                       blkg->stats.sectors, cb, dev);
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+       if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
+               uint64_t sum = blkg->stats.avg_queue_size_sum;
+               uint64_t samples = blkg->stats.avg_queue_size_samples;
+               if (samples)
+                       do_div(sum, samples);
+               else
+                       sum = 0;
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev);
+       }
+       if (type == BLKIO_STAT_GROUP_WAIT_TIME)
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+                                       blkg->stats.group_wait_time, cb, dev);
+       if (type == BLKIO_STAT_IDLE_TIME)
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+                                       blkg->stats.idle_time, cb, dev);
+       if (type == BLKIO_STAT_EMPTY_TIME)
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+                                       blkg->stats.empty_time, cb, dev);
+       if (type == BLKIO_STAT_DEQUEUE)
+               return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
+                                       blkg->stats.dequeue, cb, dev);
+#endif
+
+       for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
+                       sub_type++) {
+               blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
+               cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]);
+       }
+       disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] +
+                       blkg->stats.stat_arr[type][BLKIO_STAT_WRITE];
+       blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
+       cb->fill(cb, key_str, disk_total);
+       return disk_total;
+}
+
+#define SHOW_FUNCTION_PER_GROUP(__VAR, type, show_total)               \
  static int blkiocg_##__VAR##_read(struct cgroup *cgroup,               \
-                       struct cftype *cftype, struct seq_file *m)      \
+               struct cftype *cftype, struct cgroup_map_cb *cb)        \
  {                                                                      \
         struct blkio_cgroup *blkcg;                                     \
         struct blkio_group *blkg;                                       \
         struct hlist_node *n;                                           \
+       uint64_t cgroup_total = 0;                                      \
                                                                         \
         if (!cgroup_lock_live_group(cgroup))                            \
                 return -ENODEV;                                         \
@@ -186,32 +613,233 @@ static int blkiocg_##__VAR##_read(struct cgroup *cgroup,         \
         blkcg = cgroup_to_blkio_cgroup(cgroup);                         \
         rcu_read_lock();                                                \
         hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {\
-               if (blkg->dev)                                          \
-                       seq_printf(m, "%u:%u %lu\n", MAJOR(blkg->dev),  \
-                                MINOR(blkg->dev), blkg->__VAR);        \
+               if (blkg->dev) {                                        \
+                       spin_lock_irq(&blkg->stats_lock);               \
+                       cgroup_total += blkio_get_stat(blkg, cb,        \
+                                               blkg->dev, type);       \
+                       spin_unlock_irq(&blkg->stats_lock);             \
+               }                                                       \
         }                                                               \
+       if (show_total)                                                 \
+               cb->fill(cb, "Total", cgroup_total);                    \
         rcu_read_unlock();                                              \
         cgroup_unlock();                                                \
         return 0;                                                       \
  }
  
-SHOW_FUNCTION_PER_GROUP(time);
-SHOW_FUNCTION_PER_GROUP(sectors);
+SHOW_FUNCTION_PER_GROUP(time, BLKIO_STAT_TIME, 0);
+SHOW_FUNCTION_PER_GROUP(sectors, BLKIO_STAT_SECTORS, 0);
+SHOW_FUNCTION_PER_GROUP(io_service_bytes, BLKIO_STAT_SERVICE_BYTES, 1);
+SHOW_FUNCTION_PER_GROUP(io_serviced, BLKIO_STAT_SERVICED, 1);
+SHOW_FUNCTION_PER_GROUP(io_service_time, BLKIO_STAT_SERVICE_TIME, 1);
+SHOW_FUNCTION_PER_GROUP(io_wait_time, BLKIO_STAT_WAIT_TIME, 1);
+SHOW_FUNCTION_PER_GROUP(io_merged, BLKIO_STAT_MERGED, 1);
+SHOW_FUNCTION_PER_GROUP(io_queued, BLKIO_STAT_QUEUED, 1);
  #ifdef CONFIG_DEBUG_BLK_CGROUP
-SHOW_FUNCTION_PER_GROUP(dequeue);
+SHOW_FUNCTION_PER_GROUP(dequeue, BLKIO_STAT_DEQUEUE, 0);
+SHOW_FUNCTION_PER_GROUP(avg_queue_size, BLKIO_STAT_AVG_QUEUE_SIZE, 0);
+SHOW_FUNCTION_PER_GROUP(group_wait_time, BLKIO_STAT_GROUP_WAIT_TIME, 0);
+SHOW_FUNCTION_PER_GROUP(idle_time, BLKIO_STAT_IDLE_TIME, 0);
+SHOW_FUNCTION_PER_GROUP(empty_time, BLKIO_STAT_EMPTY_TIME, 0);
  #endif
  #undef SHOW_FUNCTION_PER_GROUP
  
-#ifdef CONFIG_DEBUG_BLK_CGROUP
-void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
-                       unsigned long dequeue)
+static int blkio_check_dev_num(dev_t dev)
  {
-       blkg->dequeue += dequeue;
+       int part = 0;
+       struct gendisk *disk;
+
+       disk = get_gendisk(dev, &part);
+       if (!disk || part)
+               return -ENODEV;
+
+       return 0;
+}
+
+static int blkio_policy_parse_and_set(char *buf,
+                                     struct blkio_policy_node *newpn)
+{
+       char *s[4], *p, *major_s = NULL, *minor_s = NULL;
+       int ret;
+       unsigned long major, minor, temp;
+       int i = 0;
+       dev_t dev;
+
+       memset(s, 0, sizeof(s));
+
+       while ((p = strsep(&buf, " ")) != NULL) {
+               if (!*p)
+                       continue;
+
+               s[i++] = p;
+
+               /* Prevent from inputing too many things */
+               if (i == 3)
+                       break;
+       }
+
+       if (i != 2)
+               return -EINVAL;
+
+       p = strsep(&s[0], ":");
+       if (p != NULL)
+               major_s = p;
+       else
+               return -EINVAL;
+
+       minor_s = s[0];
+       if (!minor_s)
+               return -EINVAL;
+
+       ret = strict_strtoul(major_s, 10, &major);
+       if (ret)
+               return -EINVAL;
+
+       ret = strict_strtoul(minor_s, 10, &minor);
+       if (ret)
+               return -EINVAL;
+
+       dev = MKDEV(major, minor);
+
+       ret = blkio_check_dev_num(dev);
+       if (ret)
+               return ret;
+
+       newpn->dev = dev;
+
+       if (s[1] == NULL)
+               return -EINVAL;
+
+       ret = strict_strtoul(s[1], 10, &temp);
+       if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
+           temp > BLKIO_WEIGHT_MAX)
+               return -EINVAL;
+
+       newpn->weight =  temp;
+
+       return 0;
+}
+
+unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
+                             dev_t dev)
+{
+       struct blkio_policy_node *pn;
+
+       pn = blkio_policy_search_node(blkcg, dev);
+       if (pn)
+               return pn->weight;
+       else
+               return blkcg->weight;
+}
+EXPORT_SYMBOL_GPL(blkcg_get_weight);
+
+
+static int blkiocg_weight_device_write(struct cgroup *cgrp, struct cftype *cft,
+                                      const char *buffer)
+{
+       int ret = 0;
+       char *buf;
+       struct blkio_policy_node *newpn, *pn;
+       struct blkio_cgroup *blkcg;
+       struct blkio_group *blkg;
+       int keep_newpn = 0;
+       struct hlist_node *n;
+       struct blkio_policy_type *blkiop;
+
+       buf = kstrdup(buffer, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
+       if (!newpn) {
+               ret = -ENOMEM;
+               goto free_buf;
+       }
+
+       ret = blkio_policy_parse_and_set(buf, newpn);
+       if (ret)
+               goto free_newpn;
+
+       blkcg = cgroup_to_blkio_cgroup(cgrp);
+
+       spin_lock_irq(&blkcg->lock);
+
+       pn = blkio_policy_search_node(blkcg, newpn->dev);
+       if (!pn) {
+               if (newpn->weight != 0) {
+                       blkio_policy_insert_node(blkcg, newpn);
+                       keep_newpn = 1;
+               }
+               spin_unlock_irq(&blkcg->lock);
+               goto update_io_group;
+       }
+
+       if (newpn->weight == 0) {
+               /* weight == 0 means deleteing a specific weight */
+               blkio_policy_delete_node(pn);
+               spin_unlock_irq(&blkcg->lock);
+               goto update_io_group;
+       }
+       spin_unlock_irq(&blkcg->lock);
+
+       pn->weight = newpn->weight;
+
+update_io_group:
+       /* update weight for each cfqg */
+       spin_lock(&blkio_list_lock);
+       spin_lock_irq(&blkcg->lock);
+
+       hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
+               if (newpn->dev == blkg->dev) {
+                       list_for_each_entry(blkiop, &blkio_list, list)
+                               blkiop->ops.blkio_update_group_weight_fn(blkg,
+                                                        newpn->weight ?
+                                                        newpn->weight :
+                                                        blkcg->weight);
+               }
+       }
+
+       spin_unlock_irq(&blkcg->lock);
+       spin_unlock(&blkio_list_lock);
+
+free_newpn:
+       if (!keep_newpn)
+               kfree(newpn);
+free_buf:
+       kfree(buf);
+       return ret;
+}
+
+static int blkiocg_weight_device_read(struct cgroup *cgrp, struct cftype *cft,
+                                     struct seq_file *m)
+{
+       struct blkio_cgroup *blkcg;
+       struct blkio_policy_node *pn;
+
+       seq_printf(m, "dev\tweight\n");
+
+       blkcg = cgroup_to_blkio_cgroup(cgrp);
+       if (list_empty(&blkcg->policy_list))
+               goto out;
+
+       spin_lock_irq(&blkcg->lock);
+       list_for_each_entry(pn, &blkcg->policy_list, node) {
+               seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
+                          MINOR(pn->dev), pn->weight);
+       }
+       spin_unlock_irq(&blkcg->lock);
+
+out:
+       return 0;
  }
-EXPORT_SYMBOL_GPL(blkiocg_update_blkio_group_dequeue_stats);
-#endif
  
  struct cftype blkio_files[] = {
+       {
+               .name = "weight_device",
+               .read_seq_string = blkiocg_weight_device_read,
+               .write_string = blkiocg_weight_device_write,
+               .max_write_len = 256,
+       },
         {
                 .name = "weight",
                 .read_u64 = blkiocg_weight_read,
@@ -219,17 +847,61 @@ struct cftype blkio_files[] = {
         },
         {
                 .name = "time",
-               .read_seq_string = blkiocg_time_read,
+               .read_map = blkiocg_time_read,
         },
         {
                 .name = "sectors",
-               .read_seq_string = blkiocg_sectors_read,
+               .read_map = blkiocg_sectors_read,
+       },
+       {
+               .name = "io_service_bytes",
+               .read_map = blkiocg_io_service_bytes_read,
+       },
+       {
+               .name = "io_serviced",
+               .read_map = blkiocg_io_serviced_read,
+       },
+       {
+               .name = "io_service_time",
+               .read_map = blkiocg_io_service_time_read,
+       },
+       {
+               .name = "io_wait_time",
+               .read_map = blkiocg_io_wait_time_read,
+       },
+       {
+               .name = "io_merged",
+               .read_map = blkiocg_io_merged_read,
+       },
+       {
+               .name = "io_queued",
+               .read_map = blkiocg_io_queued_read,
+       },
+       {
+               .name = "reset_stats",
+               .write_u64 = blkiocg_reset_stats,
         },
  #ifdef CONFIG_DEBUG_BLK_CGROUP
-       {
+       {
+               .name = "avg_queue_size",
+               .read_map = blkiocg_avg_queue_size_read,
+       },
+       {
+               .name = "group_wait_time",
+               .read_map = blkiocg_group_wait_time_read,
+       },
+       {
+               .name = "idle_time",
+               .read_map = blkiocg_idle_time_read,
+       },
+       {
+               .name = "empty_time",
+               .read_map = blkiocg_empty_time_read,
+       },
+       {
                 .name = "dequeue",
-               .read_seq_string = blkiocg_dequeue_read,
-       },
+               .read_map = blkiocg_dequeue_read,
+       },
  #endif
  };
  
@@ -246,6 +918,7 @@ static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
         struct blkio_group *blkg;
         void *key;
         struct blkio_policy_type *blkiop;
+       struct blkio_policy_node *pn, *pntmp;
  
         rcu_read_lock();
  remove_entry:
@@ -276,7 +949,12 @@ remove_entry:
                 blkiop->ops.blkio_unlink_group_fn(key, blkg);
         spin_unlock(&blkio_list_lock);
         goto remove_entry;
+
  done:
+       list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
+               blkio_policy_delete_node(pn);
+               kfree(pn);
+       }
         free_css_id(&blkio_subsys, &blkcg->css);
         rcu_read_unlock();
         if (blkcg != &blkio_root_cgroup)
@@ -307,6 +985,7 @@ done:
         spin_lock_init(&blkcg->lock);
         INIT_HLIST_HEAD(&blkcg->blkg_list);
  
+       INIT_LIST_HEAD(&blkcg->policy_list);
         return &blkcg->css;
  }
  
diff --git a/block/blk-cgroup.h b/block/blk-cgroup.h

index 8ccc20464daeca7a92cd3c2c2865c45f30e713e2..1d409ad9c6e817fde04ffe1a85231d930e871a3b 100644 (file)
--- a/block/blk-cgroup.h
+++ b/block/blk-cgroup.h
@@ -23,11 +23,84 @@ extern struct cgroup_subsys blkio_subsys;
  #define blkio_subsys_id blkio_subsys.subsys_id
  #endif
  
+enum stat_type {
+       /* Total time spent (in ns) between request dispatch to the driver and
+        * request completion for IOs doen by this cgroup. This may not be
+        * accurate when NCQ is turned on. */
+       BLKIO_STAT_SERVICE_TIME = 0,
+       /* Total bytes transferred */
+       BLKIO_STAT_SERVICE_BYTES,
+       /* Total IOs serviced, post merge */
+       BLKIO_STAT_SERVICED,
+       /* Total time spent waiting in scheduler queue in ns */
+       BLKIO_STAT_WAIT_TIME,
+       /* Number of IOs merged */
+       BLKIO_STAT_MERGED,
+       /* Number of IOs queued up */
+       BLKIO_STAT_QUEUED,
+       /* All the single valued stats go below this */
+       BLKIO_STAT_TIME,
+       BLKIO_STAT_SECTORS,
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+       BLKIO_STAT_AVG_QUEUE_SIZE,
+       BLKIO_STAT_IDLE_TIME,
+       BLKIO_STAT_EMPTY_TIME,
+       BLKIO_STAT_GROUP_WAIT_TIME,
+       BLKIO_STAT_DEQUEUE
+#endif
+};
+
+enum stat_sub_type {
+       BLKIO_STAT_READ = 0,
+       BLKIO_STAT_WRITE,
+       BLKIO_STAT_SYNC,
+       BLKIO_STAT_ASYNC,
+       BLKIO_STAT_TOTAL
+};
+
+/* blkg state flags */
+enum blkg_state_flags {
+       BLKG_waiting = 0,
+       BLKG_idling,
+       BLKG_empty,
+};
+
  struct blkio_cgroup {
         struct cgroup_subsys_state css;
         unsigned int weight;
         spinlock_t lock;
         struct hlist_head blkg_list;
+       struct list_head policy_list; /* list of blkio_policy_node */
+};
+
+struct blkio_group_stats {
+       /* total disk time and nr sectors dispatched by this group */
+       uint64_t time;
+       uint64_t sectors;
+       uint64_t stat_arr[BLKIO_STAT_QUEUED + 1][BLKIO_STAT_TOTAL];
+#ifdef CONFIG_DEBUG_BLK_CGROUP
+       /* Sum of number of IOs queued across all samples */
+       uint64_t avg_queue_size_sum;
+       /* Count of samples taken for average */
+       uint64_t avg_queue_size_samples;
+       /* How many times this group has been removed from service tree */
+       unsigned long dequeue;
+
+       /* Total time spent waiting for it to be assigned a timeslice. */
+       uint64_t group_wait_time;
+       uint64_t start_group_wait_time;
+
+       /* Time spent idling for this blkio_group */
+       uint64_t idle_time;
+       uint64_t start_idle_time;
+       /*
+        * Total time when we have requests queued and do not contain the
+        * current active queue.
+        */
+       uint64_t empty_time;
+       uint64_t start_empty_time;
+       uint16_t flags;
+#endif
  };
  
  struct blkio_group {
@@ -38,17 +111,24 @@ struct blkio_group {
  #ifdef CONFIG_DEBUG_BLK_CGROUP
         /* Store cgroup path */
         char path[128];
-       /* How many times this group has been removed from service tree */
-       unsigned long dequeue;
  #endif
         /* The device MKDEV(major, minor), this group has been created for */
-       dev_t   dev;
+       dev_t dev;
  
-       /* total disk time and nr sectors dispatched by this group */
-       unsigned long time;
-       unsigned long sectors;
+       /* Need to serialize the stats in the case of reset/update */
+       spinlock_t stats_lock;
+       struct blkio_group_stats stats;
+};
+
+struct blkio_policy_node {
+       struct list_head node;
+       dev_t dev;
+       unsigned int weight;
  };
  
+extern unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
+                                    dev_t dev);
+
  typedef void (blkio_unlink_group_fn) (void *key, struct blkio_group *blkg);
  typedef void (blkio_update_group_weight_fn) (struct blkio_group *blkg,
                                                 unsigned int weight);
@@ -89,12 +169,44 @@ static inline char *blkg_path(struct blkio_group *blkg)
  {
         return blkg->path;
  }
-void blkiocg_update_blkio_group_dequeue_stats(struct blkio_group *blkg,
+void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg);
+void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
                                 unsigned long dequeue);
+void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg);
+void blkiocg_update_idle_time_stats(struct blkio_group *blkg);
+void blkiocg_set_start_empty_time(struct blkio_group *blkg, bool ignore);
+
+#define BLKG_FLAG_FNS(name)                                            \
+static inline void blkio_mark_blkg_##name(                             \
+               struct blkio_group_stats *stats)                        \
+{                                                                      \
+       stats->flags |= (1 << BLKG_##name);                             \
+}                                                                      \
+static inline void blkio_clear_blkg_##name(                            \
+               struct blkio_group_stats *stats)                        \
+{                                                                      \
+       stats->flags &= ~(1 << BLKG_##name);                            \
+}                                                                      \
+static inline int blkio_blkg_##name(struct blkio_group_stats *stats)   \
+{                                                                      \
+       return (stats->flags & (1 << BLKG_##name)) != 0;                \
+}                                                                      \
+
+BLKG_FLAG_FNS(waiting)
+BLKG_FLAG_FNS(idling)
+BLKG_FLAG_FNS(empty)
+#undef BLKG_FLAG_FNS
  #else
  static inline char *blkg_path(struct blkio_group *blkg) { return NULL; }
-static inline void blkiocg_update_blkio_group_dequeue_stats(
-                       struct blkio_group *blkg, unsigned long dequeue) {}
+static inline void blkiocg_update_avg_queue_size_stats(
+                                               struct blkio_group *blkg) {}
+static inline void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
+                                               unsigned long dequeue) {}
+static inline void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
+{}
+static inline void blkiocg_update_idle_time_stats(struct blkio_group *blkg) {}
+static inline void blkiocg_set_start_empty_time(struct blkio_group *blkg,
+                                               bool ignore) {}
  #endif
  
  #if defined(CONFIG_BLK_CGROUP) || defined(CONFIG_BLK_CGROUP_MODULE)
@@ -105,26 +217,45 @@ extern void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
  extern int blkiocg_del_blkio_group(struct blkio_group *blkg);
  extern struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg,
                                                 void *key);
-void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
-                       unsigned long time, unsigned long sectors);
+void blkio_group_init(struct blkio_group *blkg);
+void blkiocg_update_timeslice_used(struct blkio_group *blkg,
+                                       unsigned long time);
+void blkiocg_update_dispatch_stats(struct blkio_group *blkg, uint64_t bytes,
+                                               bool direction, bool sync);
+void blkiocg_update_completion_stats(struct blkio_group *blkg,
+       uint64_t start_time, uint64_t io_start_time, bool direction, bool sync);
+void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
+                                       bool sync);
+void blkiocg_update_io_add_stats(struct blkio_group *blkg,
+               struct blkio_group *curr_blkg, bool direction, bool sync);
+void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
+                                       bool direction, bool sync);
  #else
  struct cgroup;
  static inline struct blkio_cgroup *
  cgroup_to_blkio_cgroup(struct cgroup *cgroup) { return NULL; }
  
+static inline void blkio_group_init(struct blkio_group *blkg) {}
  static inline void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
-                       struct blkio_group *blkg, void *key, dev_t dev)
-{
-}
+                       struct blkio_group *blkg, void *key, dev_t dev) {}
  
  static inline int
  blkiocg_del_blkio_group(struct blkio_group *blkg) { return 0; }
  
  static inline struct blkio_group *
  blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key) { return NULL; }
-static inline void blkiocg_update_blkio_group_stats(struct blkio_group *blkg,
-                       unsigned long time, unsigned long sectors)
-{
-}
+static inline void blkiocg_update_timeslice_used(struct blkio_group *blkg,
+                                               unsigned long time) {}
+static inline void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
+                               uint64_t bytes, bool direction, bool sync) {}
+static inline void blkiocg_update_completion_stats(struct blkio_group *blkg,
+               uint64_t start_time, uint64_t io_start_time, bool direction,
+               bool sync) {}
+static inline void blkiocg_update_io_merged_stats(struct blkio_group *blkg,
+                                               bool direction, bool sync) {}
+static inline void blkiocg_update_io_add_stats(struct blkio_group *blkg,
+               struct blkio_group *curr_blkg, bool direction, bool sync) {}
+static inline void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
+                                               bool direction, bool sync) {}
  #endif
  #endif /* _BLK_CGROUP_H */
diff --git a/block/blk-core.c b/block/blk-core.c

index 9fe174dc74d1dfecca76bb19eb9dda2af53f93bd..e9a5ae25db8c2c1a10a35339fc8372613deed5e1 100644 (file)
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -127,6 +127,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
         rq->tag = -1;
         rq->ref_count = 1;
         rq->start_time = jiffies;
+       set_start_time_ns(rq);
  }
  EXPORT_SYMBOL(blk_rq_init);
  
@@ -450,6 +451,7 @@ void blk_cleanup_queue(struct request_queue *q)
          */
         blk_sync_queue(q);
  
+       del_timer_sync(&q->backing_dev_info.laptop_mode_wb_timer);
         mutex_lock(&q->sysfs_lock);
         queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q);
         mutex_unlock(&q->sysfs_lock);
@@ -510,6 +512,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
                 return NULL;
         }
  
+       setup_timer(&q->backing_dev_info.laptop_mode_wb_timer,
+                   laptop_mode_timer_fn, (unsigned long) q);
         init_timer(&q->unplug_timer);
         setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q);
         INIT_LIST_HEAD(&q->timeout_list);
@@ -1198,6 +1202,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
                 if (!blk_rq_cpu_valid(req))
                         req->cpu = bio->bi_comp_cpu;
                 drive_stat_acct(req, 0);
+               elv_bio_merged(q, req, bio);
                 if (!attempt_back_merge(q, req))
                         elv_merged_request(q, req, el_ret);
                 goto out;
@@ -1231,6 +1236,7 @@ static int __make_request(struct request_queue *q, struct bio *bio)
                 if (!blk_rq_cpu_valid(req))
                         req->cpu = bio->bi_comp_cpu;
                 drive_stat_acct(req, 0);
+               elv_bio_merged(q, req, bio);
                 if (!attempt_front_merge(q, req))
                         elv_merged_request(q, req, el_ret);
                 goto out;
@@ -1855,8 +1861,10 @@ void blk_dequeue_request(struct request *rq)
          * and to it is freed is accounted as io that is in progress at
          * the driver side.
          */
-       if (blk_account_rq(rq))
+       if (blk_account_rq(rq)) {
                 q->in_flight[rq_is_sync(rq)]++;
+               set_io_start_time_ns(rq);
+       }
  }
  
  /**
@@ -2098,7 +2106,7 @@ static void blk_finish_request(struct request *req, int error)
         BUG_ON(blk_queued_rq(req));
  
         if (unlikely(laptop_mode) && blk_fs_request(req))
-               laptop_io_completion();
+               laptop_io_completion(&req->q->backing_dev_info);
  
         blk_delete_timer(req);
  
@@ -2517,4 +2525,3 @@ int __init blk_dev_init(void)
  
         return 0;
  }
-
diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c

index 838834be115b3f341f3bc6e0eba54a5c3fcc424c..9e0df2bdcf2124f1f16afe645e43788e10ec9d9c 100644 (file)
--- a/block/cfq-iosched.c
+++ b/block/cfq-iosched.c
@@ -143,8 +143,6 @@ struct cfq_queue {
         struct cfq_queue *new_cfqq;
         struct cfq_group *cfqg;
         struct cfq_group *orig_cfqg;
-       /* Sectors dispatched in current dispatch round */
-       unsigned long nr_sectors;
  };
  
  /*
@@ -858,7 +856,7 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
         if (!RB_EMPTY_NODE(&cfqg->rb_node))
                 cfq_rb_erase(&cfqg->rb_node, st);
         cfqg->saved_workload_slice = 0;
-       blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1);
+       blkiocg_update_dequeue_stats(&cfqg->blkg, 1);
  }
  
  static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
@@ -884,13 +882,12 @@ static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
                         slice_used = cfqq->allocated_slice;
         }
  
-       cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used,
-                               cfqq->nr_sectors);
+       cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u", slice_used);
         return slice_used;
  }
  
  static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
-                               struct cfq_queue *cfqq)
+                               struct cfq_queue *cfqq, bool forced)
  {
         struct cfq_rb_root *st = &cfqd->grp_service_tree;
         unsigned int used_sl, charge_sl;
@@ -919,8 +916,8 @@ static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
  
         cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
                                         st->min_vdisktime);
-       blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl,
-                                               cfqq->nr_sectors);
+       blkiocg_update_timeslice_used(&cfqg->blkg, used_sl);
+       blkiocg_set_start_empty_time(&cfqg->blkg, forced);
  }
  
  #ifdef CONFIG_CFQ_GROUP_IOSCHED
@@ -961,10 +958,10 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
         if (!cfqg)
                 goto done;
  
-       cfqg->weight = blkcg->weight;
         for_each_cfqg_st(cfqg, i, j, st)
                 *st = CFQ_RB_ROOT;
         RB_CLEAR_NODE(&cfqg->rb_node);
+       blkio_group_init(&cfqg->blkg);
  
         /*
          * Take the initial reference that will be released on destroy
@@ -978,6 +975,7 @@ cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
         sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
         blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
                                         MKDEV(major, minor));
+       cfqg->weight = blkcg_get_weight(blkcg, cfqg->blkg.dev);
  
         /* Add group on cfqd list */
         hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
@@ -1389,7 +1387,12 @@ static void cfq_reposition_rq_rb(struct cfq_queue *cfqq, struct request *rq)
  {
         elv_rb_del(&cfqq->sort_list, rq);
         cfqq->queued[rq_is_sync(rq)]--;
+       blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq),
+                                               rq_is_sync(rq));
         cfq_add_rq_rb(rq);
+       blkiocg_update_io_add_stats(
+                       &cfqq->cfqg->blkg, &cfqq->cfqd->serving_group->blkg,
+                       rq_data_dir(rq), rq_is_sync(rq));
  }
  
  static struct request *
@@ -1445,6 +1448,8 @@ static void cfq_remove_request(struct request *rq)
         cfq_del_rq_rb(rq);
  
         cfqq->cfqd->rq_queued--;
+       blkiocg_update_io_remove_stats(&cfqq->cfqg->blkg, rq_data_dir(rq),
+                                               rq_is_sync(rq));
         if (rq_is_meta(rq)) {
                 WARN_ON(!cfqq->meta_pending);
                 cfqq->meta_pending--;
@@ -1476,6 +1481,14 @@ static void cfq_merged_request(struct request_queue *q, struct request *req,
         }
  }
  
+static void cfq_bio_merged(struct request_queue *q, struct request *req,
+                               struct bio *bio)
+{
+       struct cfq_queue *cfqq = RQ_CFQQ(req);
+       blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, bio_data_dir(bio),
+                                       cfq_bio_sync(bio));
+}
+
  static void
  cfq_merged_requests(struct request_queue *q, struct request *rq,
                     struct request *next)
@@ -1493,6 +1506,8 @@ cfq_merged_requests(struct request_queue *q, struct request *rq,
         if (cfqq->next_rq == next)
                 cfqq->next_rq = rq;
         cfq_remove_request(next);
+       blkiocg_update_io_merged_stats(&cfqq->cfqg->blkg, rq_data_dir(next),
+                                       rq_is_sync(next));
  }
  
  static int cfq_allow_merge(struct request_queue *q, struct request *rq,
@@ -1520,18 +1535,24 @@ static int cfq_allow_merge(struct request_queue *q, struct request *rq,
         return cfqq == RQ_CFQQ(rq);
  }
  
+static inline void cfq_del_timer(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+{
+       del_timer(&cfqd->idle_slice_timer);
+       blkiocg_update_idle_time_stats(&cfqq->cfqg->blkg);
+}
+
  static void __cfq_set_active_queue(struct cfq_data *cfqd,
                                    struct cfq_queue *cfqq)
  {
         if (cfqq) {
                 cfq_log_cfqq(cfqd, cfqq, "set_active wl_prio:%d wl_type:%d",
                                 cfqd->serving_prio, cfqd->serving_type);
+               blkiocg_update_avg_queue_size_stats(&cfqq->cfqg->blkg);
                 cfqq->slice_start = 0;
                 cfqq->dispatch_start = jiffies;
                 cfqq->allocated_slice = 0;
                 cfqq->slice_end = 0;
                 cfqq->slice_dispatch = 0;
-               cfqq->nr_sectors = 0;
  
                 cfq_clear_cfqq_wait_request(cfqq);
                 cfq_clear_cfqq_must_dispatch(cfqq);
@@ -1539,7 +1560,7 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
                 cfq_clear_cfqq_fifo_expire(cfqq);
                 cfq_mark_cfqq_slice_new(cfqq);
  
-               del_timer(&cfqd->idle_slice_timer);
+               cfq_del_timer(cfqd, cfqq);
         }
  
         cfqd->active_queue = cfqq;
@@ -1550,12 +1571,12 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
   */
  static void
  __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
-                   bool timed_out)
+                   bool timed_out, bool forced)
  {
         cfq_log_cfqq(cfqd, cfqq, "slice expired t=%d", timed_out);
  
         if (cfq_cfqq_wait_request(cfqq))
-               del_timer(&cfqd->idle_slice_timer);
+               cfq_del_timer(cfqd, cfqq);
  
         cfq_clear_cfqq_wait_request(cfqq);
         cfq_clear_cfqq_wait_busy(cfqq);
@@ -1577,7 +1598,7 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
         }
  
-       cfq_group_served(cfqd, cfqq->cfqg, cfqq);
+       cfq_group_served(cfqd, cfqq->cfqg, cfqq, forced);
  
         if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
                 cfq_del_cfqq_rr(cfqd, cfqq);
@@ -1596,12 +1617,13 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
         }
  }
  
-static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out)
+static inline void cfq_slice_expired(struct cfq_data *cfqd, bool timed_out,
+                                       bool forced)
  {
         struct cfq_queue *cfqq = cfqd->active_queue;
  
         if (cfqq)
-               __cfq_slice_expired(cfqd, cfqq, timed_out);
+               __cfq_slice_expired(cfqd, cfqq, timed_out, forced);
  }
  
  /*
@@ -1857,6 +1879,7 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd)
         sl = cfqd->cfq_slice_idle;
  
         mod_timer(&cfqd->idle_slice_timer, jiffies + sl);
+       blkiocg_update_set_idle_time_stats(&cfqq->cfqg->blkg);
         cfq_log_cfqq(cfqd, cfqq, "arm_idle: %lu", sl);
  }
  
@@ -1876,7 +1899,8 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
         elv_dispatch_sort(q, rq);
  
         cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]++;
-       cfqq->nr_sectors += blk_rq_sectors(rq);
+       blkiocg_update_dispatch_stats(&cfqq->cfqg->blkg, blk_rq_bytes(rq),
+                                       rq_data_dir(rq), rq_is_sync(rq));
  }
  
  /*
@@ -2167,7 +2191,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd)
         }
  
  expire:
-       cfq_slice_expired(cfqd, 0);
+       cfq_slice_expired(cfqd, 0, false);
  new_queue:
         /*
          * Current queue expired. Check if we have to switch to a new
@@ -2193,7 +2217,7 @@ static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq)
         BUG_ON(!list_empty(&cfqq->fifo));
  
         /* By default cfqq is not expired if it is empty. Do it explicitly */
-       __cfq_slice_expired(cfqq->cfqd, cfqq, 0);
+       __cfq_slice_expired(cfqq->cfqd, cfqq, 0, true);
         return dispatched;
  }
  
@@ -2376,7 +2400,7 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
             cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) ||
             cfq_class_idle(cfqq))) {
                 cfqq->slice_end = jiffies + 1;
-               cfq_slice_expired(cfqd, 0);
+               cfq_slice_expired(cfqd, 0, false);
         }
  
         cfq_log_cfqq(cfqd, cfqq, "dispatched a request");
@@ -2407,7 +2431,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
         orig_cfqg = cfqq->orig_cfqg;
  
         if (unlikely(cfqd->active_queue == cfqq)) {
-               __cfq_slice_expired(cfqd, cfqq, 0);
+               __cfq_slice_expired(cfqd, cfqq, 0, false);
                 cfq_schedule_dispatch(cfqd);
         }
  
@@ -2508,7 +2532,7 @@ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
         struct cfq_queue *__cfqq, *next;
  
         if (unlikely(cfqq == cfqd->active_queue)) {
-               __cfq_slice_expired(cfqd, cfqq, 0);
+               __cfq_slice_expired(cfqd, cfqq, 0, false);
                 cfq_schedule_dispatch(cfqd);
         }
  
@@ -3137,7 +3161,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq,
  static void cfq_preempt_queue(struct cfq_data *cfqd, struct cfq_queue *cfqq)
  {
         cfq_log_cfqq(cfqd, cfqq, "preempt");
-       cfq_slice_expired(cfqd, 1);
+       cfq_slice_expired(cfqd, 1, false);
  
         /*
          * Put the new queue at the front of the of the current list,
@@ -3185,11 +3209,14 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                 if (cfq_cfqq_wait_request(cfqq)) {
                         if (blk_rq_bytes(rq) > PAGE_CACHE_SIZE ||
                             cfqd->busy_queues > 1) {
-                               del_timer(&cfqd->idle_slice_timer);
+                               cfq_del_timer(cfqd, cfqq);
                                 cfq_clear_cfqq_wait_request(cfqq);
                                 __blk_run_queue(cfqd->queue);
-                       } else
+                       } else {
+                               blkiocg_update_idle_time_stats(
+                                               &cfqq->cfqg->blkg);
                                 cfq_mark_cfqq_must_dispatch(cfqq);
+                       }
                 }
         } else if (cfq_should_preempt(cfqd, cfqq, rq)) {
                 /*
@@ -3215,6 +3242,9 @@ static void cfq_insert_request(struct request_queue *q, struct request *rq)
         list_add_tail(&rq->queuelist, &cfqq->fifo);
         cfq_add_rq_rb(rq);
  
+       blkiocg_update_io_add_stats(&cfqq->cfqg->blkg,
+                       &cfqd->serving_group->blkg, rq_data_dir(rq),
+                       rq_is_sync(rq));
         cfq_rq_enqueued(cfqd, cfqq, rq);
  }
  
@@ -3300,6 +3330,9 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
         WARN_ON(!cfqq->dispatched);
         cfqd->rq_in_driver--;
         cfqq->dispatched--;
+       blkiocg_update_completion_stats(&cfqq->cfqg->blkg, rq_start_time_ns(rq),
+                       rq_io_start_time_ns(rq), rq_data_dir(rq),
+                       rq_is_sync(rq));
  
         cfqd->rq_in_flight[cfq_cfqq_sync(cfqq)]--;
  
@@ -3340,7 +3373,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
                  * - when there is a close cooperator
                  */
                 if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq))
-                       cfq_slice_expired(cfqd, 1);
+                       cfq_slice_expired(cfqd, 1, false);
                 else if (sync && cfqq_empty &&
                          !cfq_close_cooperator(cfqd, cfqq)) {
                         cfqd->noidle_tree_requires_idle |= !rq_noidle(rq);
@@ -3600,7 +3633,7 @@ static void cfq_idle_slice_timer(unsigned long data)
                 cfq_clear_cfqq_deep(cfqq);
         }
  expire:
-       cfq_slice_expired(cfqd, timed_out);
+       cfq_slice_expired(cfqd, timed_out, false);
  out_kick:
         cfq_schedule_dispatch(cfqd);
  out_cont:
@@ -3643,7 +3676,7 @@ static void cfq_exit_queue(struct elevator_queue *e)
         spin_lock_irq(q->queue_lock);
  
         if (cfqd->active_queue)
-               __cfq_slice_expired(cfqd, cfqd->active_queue, 0);
+               __cfq_slice_expired(cfqd, cfqd->active_queue, 0, false);
  
         while (!list_empty(&cfqd->cic_list)) {
                 struct cfq_io_context *cic = list_entry(cfqd->cic_list.next,
@@ -3870,6 +3903,7 @@ static struct elevator_type iosched_cfq = {
                 .elevator_merged_fn =           cfq_merged_request,
                 .elevator_merge_req_fn =        cfq_merged_requests,
                 .elevator_allow_merge_fn =      cfq_allow_merge,
+               .elevator_bio_merged_fn =       cfq_bio_merged,
                 .elevator_dispatch_fn =         cfq_dispatch_requests,
                 .elevator_add_req_fn =          cfq_insert_request,
                 .elevator_activate_req_fn =     cfq_activate_request,
diff --git a/block/elevator.c b/block/elevator.c

index 76e3702d53817e4aee08d092a59f07ca840145cc..5e734592bb4037f48f05bda6e11a6a73d2ad1d7f 100644 (file)
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -539,6 +539,15 @@ void elv_merge_requests(struct request_queue *q, struct request *rq,
         q->last_merge = rq;
  }
  
+void elv_bio_merged(struct request_queue *q, struct request *rq,
+                       struct bio *bio)
+{
+       struct elevator_queue *e = q->elevator;
+
+       if (e->ops->elevator_bio_merged_fn)
+               e->ops->elevator_bio_merged_fn(q, rq, bio);
+}
+
  void elv_requeue_request(struct request_queue *q, struct request *rq)
  {
         /*
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h

index fcbc26af00e479675db47d8e79d1e31b3e4368bc..2742e1adfc30f173fa71d65f684c1a6315249f87 100644 (file)
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -14,6 +14,7 @@
  #include <linux/kernel.h>
  #include <linux/fs.h>
  #include <linux/sched.h>
+#include <linux/timer.h>
  #include <linux/writeback.h>
  #include <asm/atomic.h>
  
@@ -88,6 +89,8 @@ struct backing_dev_info {
  
         struct device *dev;
  
+       struct timer_list laptop_mode_wb_timer;
+
  #ifdef CONFIG_DEBUG_FS
         struct dentry *debug_dir;
         struct dentry *debug_stats;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 6690e8bae7bb5946396577fcc12a9c85836cb7b5..d483c494672af279f0a2cb46312117a6d9b27188 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -194,7 +194,10 @@ struct request {
  
         struct gendisk *rq_disk;
         unsigned long start_time;
-
+#ifdef CONFIG_BLK_CGROUP
+       unsigned long long start_time_ns;
+       unsigned long long io_start_time_ns;    /* when passed to hardware */
+#endif
         /* Number of scatter-gather DMA addr+len pairs after
          * physical address coalescing is performed.
          */
@@ -1196,6 +1199,39 @@ static inline void put_dev_sector(Sector p)
  struct work_struct;
  int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
  
+#ifdef CONFIG_BLK_CGROUP
+static inline void set_start_time_ns(struct request *req)
+{
+       req->start_time_ns = sched_clock();
+}
+
+static inline void set_io_start_time_ns(struct request *req)
+{
+       req->io_start_time_ns = sched_clock();
+}
+
+static inline uint64_t rq_start_time_ns(struct request *req)
+{
+        return req->start_time_ns;
+}
+
+static inline uint64_t rq_io_start_time_ns(struct request *req)
+{
+        return req->io_start_time_ns;
+}
+#else
+static inline void set_start_time_ns(struct request *req) {}
+static inline void set_io_start_time_ns(struct request *req) {}
+static inline uint64_t rq_start_time_ns(struct request *req)
+{
+       return 0;
+}
+static inline uint64_t rq_io_start_time_ns(struct request *req)
+{
+       return 0;
+}
+#endif
+
  #define MODULE_ALIAS_BLOCKDEV(major,minor) \
         MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
  #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
diff --git a/include/linux/elevator.h b/include/linux/elevator.h

index 1cb3372e65d89521886a8deb0f08f46c31dfe813..2c958f4fce1ed6f1d6f4a8c3fc865155cd1eac81 100644 (file)
--- a/include/linux/elevator.h
+++ b/include/linux/elevator.h
@@ -14,6 +14,9 @@ typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int
  
  typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *);
  
+typedef void (elevator_bio_merged_fn) (struct request_queue *,
+                                               struct request *, struct bio *);
+
  typedef int (elevator_dispatch_fn) (struct request_queue *, int);
  
  typedef void (elevator_add_req_fn) (struct request_queue *, struct request *);
@@ -36,6 +39,7 @@ struct elevator_ops
         elevator_merged_fn *elevator_merged_fn;
         elevator_merge_req_fn *elevator_merge_req_fn;
         elevator_allow_merge_fn *elevator_allow_merge_fn;
+       elevator_bio_merged_fn *elevator_bio_merged_fn;
  
         elevator_dispatch_fn *elevator_dispatch_fn;
         elevator_add_req_fn *elevator_add_req_fn;
@@ -103,6 +107,8 @@ extern int elv_merge(struct request_queue *, struct request **, struct bio *);
  extern void elv_merge_requests(struct request_queue *, struct request *,
                                struct request *);
  extern void elv_merged_request(struct request_queue *, struct request *, int);
+extern void elv_bio_merged(struct request_queue *q, struct request *,
+                               struct bio *);
  extern void elv_requeue_request(struct request_queue *, struct request *);
  extern int elv_queue_empty(struct request_queue *);
  extern struct request *elv_former_request(struct request_queue *, struct request *);
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index 36520ded3e062701866a5a4941e1cc0a85d19b37..eb38a2c645f61212098de7e749d540dd14ba8683 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -96,8 +96,10 @@ static inline void inode_sync_wait(struct inode *inode)
  /*
   * mm/page-writeback.c
   */
-void laptop_io_completion(void);
+void laptop_io_completion(struct backing_dev_info *info);
  void laptop_sync_completion(void);
+void laptop_mode_sync(struct work_struct *work);
+void laptop_mode_timer_fn(unsigned long data);
  void throttle_vm_writeout(gfp_t gfp_mask);
  
  /* These are exported to sysctl. */
diff --git a/mm/page-writeback.c b/mm/page-writeback.c

index 0b19943ecf8bb7fc75e9c9842a0018bdbac3571c..d0f2b3765f8d9d3255305106331a6e2b6d85455a 100644 (file)
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -683,10 +683,6 @@ void throttle_vm_writeout(gfp_t gfp_mask)
          }
  }
  
-static void laptop_timer_fn(unsigned long unused);
-
-static DEFINE_TIMER(laptop_mode_wb_timer, laptop_timer_fn, 0, 0);
-
  /*
   * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
   */
@@ -697,21 +693,19 @@ int dirty_writeback_centisecs_handler(ctl_table *table, int write,
         return 0;
  }
  
-static void do_laptop_sync(struct work_struct *work)
+void laptop_mode_timer_fn(unsigned long data)
  {
-       wakeup_flusher_threads(0);
-       kfree(work);
-}
+       struct request_queue *q = (struct request_queue *)data;
+       int nr_pages = global_page_state(NR_FILE_DIRTY) +
+               global_page_state(NR_UNSTABLE_NFS);
  
-static void laptop_timer_fn(unsigned long unused)
-{
-       struct work_struct *work;
+       /*
+        * We want to write everything out, not just down to the dirty
+        * threshold
+        */
  
-       work = kmalloc(sizeof(*work), GFP_ATOMIC);
-       if (work) {
-               INIT_WORK(work, do_laptop_sync);
-               schedule_work(work);
-       }
+       if (bdi_has_dirty_io(&q->backing_dev_info))
+               bdi_start_writeback(&q->backing_dev_info, NULL, nr_pages);
  }
  
  /*
@@ -719,9 +713,9 @@ static void laptop_timer_fn(unsigned long unused)
   * of all dirty data a few seconds from now.  If the flush is already scheduled
   * then push it back - the user is still using the disk.
   */
-void laptop_io_completion(void)
+void laptop_io_completion(struct backing_dev_info *info)
  {
-       mod_timer(&laptop_mode_wb_timer, jiffies + laptop_mode);
+       mod_timer(&info->laptop_mode_wb_timer, jiffies + laptop_mode);
  }
  
  /*
@@ -731,7 +725,14 @@ void laptop_io_completion(void)
   */
  void laptop_sync_completion(void)
  {
-       del_timer(&laptop_mode_wb_timer);
+       struct backing_dev_info *bdi;
+
+       rcu_read_lock();
+
+       list_for_each_entry_rcu(bdi, &bdi_list, bdi_list)
+               del_timer(&bdi->laptop_mode_wb_timer);
+
+       rcu_read_unlock();
  }
  
  /*
author	Jens Axboe <jens.axboe@oracle.com>
	Tue, 13 Apr 2010 18:03:21 +0000 (20:03 +0200)
committer	Jens Axboe <jens.axboe@oracle.com>
	Tue, 13 Apr 2010 18:03:21 +0000 (20:03 +0200)
Documentation/cgroups/blkio-controller.txt		patch \| blob \| blame \| history
block/blk-cgroup.c		patch \| blob \| blame \| history
block/blk-cgroup.h		patch \| blob \| blame \| history
block/blk-core.c		patch \| blob \| blame \| history
block/cfq-iosched.c		patch \| blob \| blame \| history
block/elevator.c		patch \| blob \| blame \| history
include/linux/backing-dev.h		patch \| blob \| blame \| history
include/linux/blkdev.h		patch \| blob \| blame \| history
include/linux/elevator.h		patch \| blob \| blame \| history
include/linux/writeback.h		patch \| blob \| blame \| history
mm/page-writeback.c		patch \| blob \| blame \| history