]> bbs.cooldavid.org Git - net-next-2.6.git/blob - block/blk-cgroup.c
blkio: Recalculate the throttled bio dispatch time upon throttle limit change
[net-next-2.6.git] / block / blk-cgroup.c
1 /*
2  * Common Block IO controller cgroup interface
3  *
4  * Based on ideas and code from CFQ, CFS and BFQ:
5  * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
6  *
7  * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8  *                    Paolo Valente <paolo.valente@unimore.it>
9  *
10  * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11  *                    Nauman Rafique <nauman@google.com>
12  */
13 #include <linux/ioprio.h>
14 #include <linux/seq_file.h>
15 #include <linux/kdev_t.h>
16 #include <linux/module.h>
17 #include <linux/err.h>
18 #include <linux/blkdev.h>
19 #include <linux/slab.h>
20 #include "blk-cgroup.h"
21 #include <linux/genhd.h>
22
23 #define MAX_KEY_LEN 100
24
25 static DEFINE_SPINLOCK(blkio_list_lock);
26 static LIST_HEAD(blkio_list);
27
28 struct blkio_cgroup blkio_root_cgroup = { .weight = 2*BLKIO_WEIGHT_DEFAULT };
29 EXPORT_SYMBOL_GPL(blkio_root_cgroup);
30
31 static struct cgroup_subsys_state *blkiocg_create(struct cgroup_subsys *,
32                                                   struct cgroup *);
33 static int blkiocg_can_attach(struct cgroup_subsys *, struct cgroup *,
34                               struct task_struct *, bool);
35 static void blkiocg_attach(struct cgroup_subsys *, struct cgroup *,
36                            struct cgroup *, struct task_struct *, bool);
37 static void blkiocg_destroy(struct cgroup_subsys *, struct cgroup *);
38 static int blkiocg_populate(struct cgroup_subsys *, struct cgroup *);
39
40 /* for encoding cft->private value on file */
41 #define BLKIOFILE_PRIVATE(x, val)       (((x) << 16) | (val))
42 /* What policy owns the file, proportional or throttle */
43 #define BLKIOFILE_POLICY(val)           (((val) >> 16) & 0xffff)
44 #define BLKIOFILE_ATTR(val)             ((val) & 0xffff)
45
46 struct cgroup_subsys blkio_subsys = {
47         .name = "blkio",
48         .create = blkiocg_create,
49         .can_attach = blkiocg_can_attach,
50         .attach = blkiocg_attach,
51         .destroy = blkiocg_destroy,
52         .populate = blkiocg_populate,
53 #ifdef CONFIG_BLK_CGROUP
54         /* note: blkio_subsys_id is otherwise defined in blk-cgroup.h */
55         .subsys_id = blkio_subsys_id,
56 #endif
57         .use_id = 1,
58         .module = THIS_MODULE,
59 };
60 EXPORT_SYMBOL_GPL(blkio_subsys);
61
62 static inline void blkio_policy_insert_node(struct blkio_cgroup *blkcg,
63                                             struct blkio_policy_node *pn)
64 {
65         list_add(&pn->node, &blkcg->policy_list);
66 }
67
68 static inline bool cftype_blkg_same_policy(struct cftype *cft,
69                         struct blkio_group *blkg)
70 {
71         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
72
73         if (blkg->plid == plid)
74                 return 1;
75
76         return 0;
77 }
78
79 /* Determines if policy node matches cgroup file being accessed */
80 static inline bool pn_matches_cftype(struct cftype *cft,
81                         struct blkio_policy_node *pn)
82 {
83         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
84         int fileid = BLKIOFILE_ATTR(cft->private);
85
86         return (plid == pn->plid && fileid == pn->fileid);
87 }
88
89 /* Must be called with blkcg->lock held */
90 static inline void blkio_policy_delete_node(struct blkio_policy_node *pn)
91 {
92         list_del(&pn->node);
93 }
94
95 /* Must be called with blkcg->lock held */
96 static struct blkio_policy_node *
97 blkio_policy_search_node(const struct blkio_cgroup *blkcg, dev_t dev,
98                 enum blkio_policy_id plid, int fileid)
99 {
100         struct blkio_policy_node *pn;
101
102         list_for_each_entry(pn, &blkcg->policy_list, node) {
103                 if (pn->dev == dev && pn->plid == plid && pn->fileid == fileid)
104                         return pn;
105         }
106
107         return NULL;
108 }
109
110 struct blkio_cgroup *cgroup_to_blkio_cgroup(struct cgroup *cgroup)
111 {
112         return container_of(cgroup_subsys_state(cgroup, blkio_subsys_id),
113                             struct blkio_cgroup, css);
114 }
115 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup);
116
117 static inline void
118 blkio_update_group_weight(struct blkio_group *blkg, unsigned int weight)
119 {
120         struct blkio_policy_type *blkiop;
121
122         list_for_each_entry(blkiop, &blkio_list, list) {
123                 /* If this policy does not own the blkg, do not send updates */
124                 if (blkiop->plid != blkg->plid)
125                         continue;
126                 if (blkiop->ops.blkio_update_group_weight_fn)
127                         blkiop->ops.blkio_update_group_weight_fn(blkg->key,
128                                                         blkg, weight);
129         }
130 }
131
132 static inline void blkio_update_group_bps(struct blkio_group *blkg, u64 bps,
133                                 int fileid)
134 {
135         struct blkio_policy_type *blkiop;
136
137         list_for_each_entry(blkiop, &blkio_list, list) {
138
139                 /* If this policy does not own the blkg, do not send updates */
140                 if (blkiop->plid != blkg->plid)
141                         continue;
142
143                 if (fileid == BLKIO_THROTL_read_bps_device
144                     && blkiop->ops.blkio_update_group_read_bps_fn)
145                         blkiop->ops.blkio_update_group_read_bps_fn(blkg->key,
146                                                                 blkg, bps);
147
148                 if (fileid == BLKIO_THROTL_write_bps_device
149                     && blkiop->ops.blkio_update_group_write_bps_fn)
150                         blkiop->ops.blkio_update_group_write_bps_fn(blkg->key,
151                                                                 blkg, bps);
152         }
153 }
154
155 static inline void blkio_update_group_iops(struct blkio_group *blkg,
156                         unsigned int iops, int fileid)
157 {
158         struct blkio_policy_type *blkiop;
159
160         list_for_each_entry(blkiop, &blkio_list, list) {
161
162                 /* If this policy does not own the blkg, do not send updates */
163                 if (blkiop->plid != blkg->plid)
164                         continue;
165
166                 if (fileid == BLKIO_THROTL_read_iops_device
167                     && blkiop->ops.blkio_update_group_read_iops_fn)
168                         blkiop->ops.blkio_update_group_read_iops_fn(blkg->key,
169                                                                 blkg, iops);
170
171                 if (fileid == BLKIO_THROTL_write_iops_device
172                     && blkiop->ops.blkio_update_group_write_iops_fn)
173                         blkiop->ops.blkio_update_group_write_iops_fn(blkg->key,
174                                                                 blkg,iops);
175         }
176 }
177
178 /*
179  * Add to the appropriate stat variable depending on the request type.
180  * This should be called with the blkg->stats_lock held.
181  */
182 static void blkio_add_stat(uint64_t *stat, uint64_t add, bool direction,
183                                 bool sync)
184 {
185         if (direction)
186                 stat[BLKIO_STAT_WRITE] += add;
187         else
188                 stat[BLKIO_STAT_READ] += add;
189         if (sync)
190                 stat[BLKIO_STAT_SYNC] += add;
191         else
192                 stat[BLKIO_STAT_ASYNC] += add;
193 }
194
195 /*
196  * Decrements the appropriate stat variable if non-zero depending on the
197  * request type. Panics on value being zero.
198  * This should be called with the blkg->stats_lock held.
199  */
200 static void blkio_check_and_dec_stat(uint64_t *stat, bool direction, bool sync)
201 {
202         if (direction) {
203                 BUG_ON(stat[BLKIO_STAT_WRITE] == 0);
204                 stat[BLKIO_STAT_WRITE]--;
205         } else {
206                 BUG_ON(stat[BLKIO_STAT_READ] == 0);
207                 stat[BLKIO_STAT_READ]--;
208         }
209         if (sync) {
210                 BUG_ON(stat[BLKIO_STAT_SYNC] == 0);
211                 stat[BLKIO_STAT_SYNC]--;
212         } else {
213                 BUG_ON(stat[BLKIO_STAT_ASYNC] == 0);
214                 stat[BLKIO_STAT_ASYNC]--;
215         }
216 }
217
218 #ifdef CONFIG_DEBUG_BLK_CGROUP
219 /* This should be called with the blkg->stats_lock held. */
220 static void blkio_set_start_group_wait_time(struct blkio_group *blkg,
221                                                 struct blkio_group *curr_blkg)
222 {
223         if (blkio_blkg_waiting(&blkg->stats))
224                 return;
225         if (blkg == curr_blkg)
226                 return;
227         blkg->stats.start_group_wait_time = sched_clock();
228         blkio_mark_blkg_waiting(&blkg->stats);
229 }
230
231 /* This should be called with the blkg->stats_lock held. */
232 static void blkio_update_group_wait_time(struct blkio_group_stats *stats)
233 {
234         unsigned long long now;
235
236         if (!blkio_blkg_waiting(stats))
237                 return;
238
239         now = sched_clock();
240         if (time_after64(now, stats->start_group_wait_time))
241                 stats->group_wait_time += now - stats->start_group_wait_time;
242         blkio_clear_blkg_waiting(stats);
243 }
244
245 /* This should be called with the blkg->stats_lock held. */
246 static void blkio_end_empty_time(struct blkio_group_stats *stats)
247 {
248         unsigned long long now;
249
250         if (!blkio_blkg_empty(stats))
251                 return;
252
253         now = sched_clock();
254         if (time_after64(now, stats->start_empty_time))
255                 stats->empty_time += now - stats->start_empty_time;
256         blkio_clear_blkg_empty(stats);
257 }
258
259 void blkiocg_update_set_idle_time_stats(struct blkio_group *blkg)
260 {
261         unsigned long flags;
262
263         spin_lock_irqsave(&blkg->stats_lock, flags);
264         BUG_ON(blkio_blkg_idling(&blkg->stats));
265         blkg->stats.start_idle_time = sched_clock();
266         blkio_mark_blkg_idling(&blkg->stats);
267         spin_unlock_irqrestore(&blkg->stats_lock, flags);
268 }
269 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats);
270
271 void blkiocg_update_idle_time_stats(struct blkio_group *blkg)
272 {
273         unsigned long flags;
274         unsigned long long now;
275         struct blkio_group_stats *stats;
276
277         spin_lock_irqsave(&blkg->stats_lock, flags);
278         stats = &blkg->stats;
279         if (blkio_blkg_idling(stats)) {
280                 now = sched_clock();
281                 if (time_after64(now, stats->start_idle_time))
282                         stats->idle_time += now - stats->start_idle_time;
283                 blkio_clear_blkg_idling(stats);
284         }
285         spin_unlock_irqrestore(&blkg->stats_lock, flags);
286 }
287 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats);
288
289 void blkiocg_update_avg_queue_size_stats(struct blkio_group *blkg)
290 {
291         unsigned long flags;
292         struct blkio_group_stats *stats;
293
294         spin_lock_irqsave(&blkg->stats_lock, flags);
295         stats = &blkg->stats;
296         stats->avg_queue_size_sum +=
297                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] +
298                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE];
299         stats->avg_queue_size_samples++;
300         blkio_update_group_wait_time(stats);
301         spin_unlock_irqrestore(&blkg->stats_lock, flags);
302 }
303 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats);
304
305 void blkiocg_set_start_empty_time(struct blkio_group *blkg)
306 {
307         unsigned long flags;
308         struct blkio_group_stats *stats;
309
310         spin_lock_irqsave(&blkg->stats_lock, flags);
311         stats = &blkg->stats;
312
313         if (stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_READ] ||
314                         stats->stat_arr[BLKIO_STAT_QUEUED][BLKIO_STAT_WRITE]) {
315                 spin_unlock_irqrestore(&blkg->stats_lock, flags);
316                 return;
317         }
318
319         /*
320          * group is already marked empty. This can happen if cfqq got new
321          * request in parent group and moved to this group while being added
322          * to service tree. Just ignore the event and move on.
323          */
324         if(blkio_blkg_empty(stats)) {
325                 spin_unlock_irqrestore(&blkg->stats_lock, flags);
326                 return;
327         }
328
329         stats->start_empty_time = sched_clock();
330         blkio_mark_blkg_empty(stats);
331         spin_unlock_irqrestore(&blkg->stats_lock, flags);
332 }
333 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time);
334
335 void blkiocg_update_dequeue_stats(struct blkio_group *blkg,
336                         unsigned long dequeue)
337 {
338         blkg->stats.dequeue += dequeue;
339 }
340 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats);
341 #else
342 static inline void blkio_set_start_group_wait_time(struct blkio_group *blkg,
343                                         struct blkio_group *curr_blkg) {}
344 static inline void blkio_end_empty_time(struct blkio_group_stats *stats) {}
345 #endif
346
347 void blkiocg_update_io_add_stats(struct blkio_group *blkg,
348                         struct blkio_group *curr_blkg, bool direction,
349                         bool sync)
350 {
351         unsigned long flags;
352
353         spin_lock_irqsave(&blkg->stats_lock, flags);
354         blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED], 1, direction,
355                         sync);
356         blkio_end_empty_time(&blkg->stats);
357         blkio_set_start_group_wait_time(blkg, curr_blkg);
358         spin_unlock_irqrestore(&blkg->stats_lock, flags);
359 }
360 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats);
361
362 void blkiocg_update_io_remove_stats(struct blkio_group *blkg,
363                                                 bool direction, bool sync)
364 {
365         unsigned long flags;
366
367         spin_lock_irqsave(&blkg->stats_lock, flags);
368         blkio_check_and_dec_stat(blkg->stats.stat_arr[BLKIO_STAT_QUEUED],
369                                         direction, sync);
370         spin_unlock_irqrestore(&blkg->stats_lock, flags);
371 }
372 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats);
373
374 void blkiocg_update_timeslice_used(struct blkio_group *blkg, unsigned long time)
375 {
376         unsigned long flags;
377
378         spin_lock_irqsave(&blkg->stats_lock, flags);
379         blkg->stats.time += time;
380         spin_unlock_irqrestore(&blkg->stats_lock, flags);
381 }
382 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used);
383
384 void blkiocg_update_dispatch_stats(struct blkio_group *blkg,
385                                 uint64_t bytes, bool direction, bool sync)
386 {
387         struct blkio_group_stats *stats;
388         unsigned long flags;
389
390         spin_lock_irqsave(&blkg->stats_lock, flags);
391         stats = &blkg->stats;
392         stats->sectors += bytes >> 9;
393         blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICED], 1, direction,
394                         sync);
395         blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_BYTES], bytes,
396                         direction, sync);
397         spin_unlock_irqrestore(&blkg->stats_lock, flags);
398 }
399 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats);
400
401 void blkiocg_update_completion_stats(struct blkio_group *blkg,
402         uint64_t start_time, uint64_t io_start_time, bool direction, bool sync)
403 {
404         struct blkio_group_stats *stats;
405         unsigned long flags;
406         unsigned long long now = sched_clock();
407
408         spin_lock_irqsave(&blkg->stats_lock, flags);
409         stats = &blkg->stats;
410         if (time_after64(now, io_start_time))
411                 blkio_add_stat(stats->stat_arr[BLKIO_STAT_SERVICE_TIME],
412                                 now - io_start_time, direction, sync);
413         if (time_after64(io_start_time, start_time))
414                 blkio_add_stat(stats->stat_arr[BLKIO_STAT_WAIT_TIME],
415                                 io_start_time - start_time, direction, sync);
416         spin_unlock_irqrestore(&blkg->stats_lock, flags);
417 }
418 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats);
419
420 void blkiocg_update_io_merged_stats(struct blkio_group *blkg, bool direction,
421                                         bool sync)
422 {
423         unsigned long flags;
424
425         spin_lock_irqsave(&blkg->stats_lock, flags);
426         blkio_add_stat(blkg->stats.stat_arr[BLKIO_STAT_MERGED], 1, direction,
427                         sync);
428         spin_unlock_irqrestore(&blkg->stats_lock, flags);
429 }
430 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats);
431
432 void blkiocg_add_blkio_group(struct blkio_cgroup *blkcg,
433                 struct blkio_group *blkg, void *key, dev_t dev,
434                 enum blkio_policy_id plid)
435 {
436         unsigned long flags;
437
438         spin_lock_irqsave(&blkcg->lock, flags);
439         spin_lock_init(&blkg->stats_lock);
440         rcu_assign_pointer(blkg->key, key);
441         blkg->blkcg_id = css_id(&blkcg->css);
442         hlist_add_head_rcu(&blkg->blkcg_node, &blkcg->blkg_list);
443         blkg->plid = plid;
444         spin_unlock_irqrestore(&blkcg->lock, flags);
445         /* Need to take css reference ? */
446         cgroup_path(blkcg->css.cgroup, blkg->path, sizeof(blkg->path));
447         blkg->dev = dev;
448 }
449 EXPORT_SYMBOL_GPL(blkiocg_add_blkio_group);
450
451 static void __blkiocg_del_blkio_group(struct blkio_group *blkg)
452 {
453         hlist_del_init_rcu(&blkg->blkcg_node);
454         blkg->blkcg_id = 0;
455 }
456
457 /*
458  * returns 0 if blkio_group was still on cgroup list. Otherwise returns 1
459  * indicating that blk_group was unhashed by the time we got to it.
460  */
461 int blkiocg_del_blkio_group(struct blkio_group *blkg)
462 {
463         struct blkio_cgroup *blkcg;
464         unsigned long flags;
465         struct cgroup_subsys_state *css;
466         int ret = 1;
467
468         rcu_read_lock();
469         css = css_lookup(&blkio_subsys, blkg->blkcg_id);
470         if (css) {
471                 blkcg = container_of(css, struct blkio_cgroup, css);
472                 spin_lock_irqsave(&blkcg->lock, flags);
473                 if (!hlist_unhashed(&blkg->blkcg_node)) {
474                         __blkiocg_del_blkio_group(blkg);
475                         ret = 0;
476                 }
477                 spin_unlock_irqrestore(&blkcg->lock, flags);
478         }
479
480         rcu_read_unlock();
481         return ret;
482 }
483 EXPORT_SYMBOL_GPL(blkiocg_del_blkio_group);
484
485 /* called under rcu_read_lock(). */
486 struct blkio_group *blkiocg_lookup_group(struct blkio_cgroup *blkcg, void *key)
487 {
488         struct blkio_group *blkg;
489         struct hlist_node *n;
490         void *__key;
491
492         hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
493                 __key = blkg->key;
494                 if (__key == key)
495                         return blkg;
496         }
497
498         return NULL;
499 }
500 EXPORT_SYMBOL_GPL(blkiocg_lookup_group);
501
502 static int
503 blkiocg_reset_stats(struct cgroup *cgroup, struct cftype *cftype, u64 val)
504 {
505         struct blkio_cgroup *blkcg;
506         struct blkio_group *blkg;
507         struct blkio_group_stats *stats;
508         struct hlist_node *n;
509         uint64_t queued[BLKIO_STAT_TOTAL];
510         int i;
511 #ifdef CONFIG_DEBUG_BLK_CGROUP
512         bool idling, waiting, empty;
513         unsigned long long now = sched_clock();
514 #endif
515
516         blkcg = cgroup_to_blkio_cgroup(cgroup);
517         spin_lock_irq(&blkcg->lock);
518         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
519                 spin_lock(&blkg->stats_lock);
520                 stats = &blkg->stats;
521 #ifdef CONFIG_DEBUG_BLK_CGROUP
522                 idling = blkio_blkg_idling(stats);
523                 waiting = blkio_blkg_waiting(stats);
524                 empty = blkio_blkg_empty(stats);
525 #endif
526                 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
527                         queued[i] = stats->stat_arr[BLKIO_STAT_QUEUED][i];
528                 memset(stats, 0, sizeof(struct blkio_group_stats));
529                 for (i = 0; i < BLKIO_STAT_TOTAL; i++)
530                         stats->stat_arr[BLKIO_STAT_QUEUED][i] = queued[i];
531 #ifdef CONFIG_DEBUG_BLK_CGROUP
532                 if (idling) {
533                         blkio_mark_blkg_idling(stats);
534                         stats->start_idle_time = now;
535                 }
536                 if (waiting) {
537                         blkio_mark_blkg_waiting(stats);
538                         stats->start_group_wait_time = now;
539                 }
540                 if (empty) {
541                         blkio_mark_blkg_empty(stats);
542                         stats->start_empty_time = now;
543                 }
544 #endif
545                 spin_unlock(&blkg->stats_lock);
546         }
547         spin_unlock_irq(&blkcg->lock);
548         return 0;
549 }
550
551 static void blkio_get_key_name(enum stat_sub_type type, dev_t dev, char *str,
552                                 int chars_left, bool diskname_only)
553 {
554         snprintf(str, chars_left, "%d:%d", MAJOR(dev), MINOR(dev));
555         chars_left -= strlen(str);
556         if (chars_left <= 0) {
557                 printk(KERN_WARNING
558                         "Possibly incorrect cgroup stat display format");
559                 return;
560         }
561         if (diskname_only)
562                 return;
563         switch (type) {
564         case BLKIO_STAT_READ:
565                 strlcat(str, " Read", chars_left);
566                 break;
567         case BLKIO_STAT_WRITE:
568                 strlcat(str, " Write", chars_left);
569                 break;
570         case BLKIO_STAT_SYNC:
571                 strlcat(str, " Sync", chars_left);
572                 break;
573         case BLKIO_STAT_ASYNC:
574                 strlcat(str, " Async", chars_left);
575                 break;
576         case BLKIO_STAT_TOTAL:
577                 strlcat(str, " Total", chars_left);
578                 break;
579         default:
580                 strlcat(str, " Invalid", chars_left);
581         }
582 }
583
584 static uint64_t blkio_fill_stat(char *str, int chars_left, uint64_t val,
585                                 struct cgroup_map_cb *cb, dev_t dev)
586 {
587         blkio_get_key_name(0, dev, str, chars_left, true);
588         cb->fill(cb, str, val);
589         return val;
590 }
591
592 /* This should be called with blkg->stats_lock held */
593 static uint64_t blkio_get_stat(struct blkio_group *blkg,
594                 struct cgroup_map_cb *cb, dev_t dev, enum stat_type type)
595 {
596         uint64_t disk_total;
597         char key_str[MAX_KEY_LEN];
598         enum stat_sub_type sub_type;
599
600         if (type == BLKIO_STAT_TIME)
601                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
602                                         blkg->stats.time, cb, dev);
603         if (type == BLKIO_STAT_SECTORS)
604                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
605                                         blkg->stats.sectors, cb, dev);
606 #ifdef CONFIG_DEBUG_BLK_CGROUP
607         if (type == BLKIO_STAT_AVG_QUEUE_SIZE) {
608                 uint64_t sum = blkg->stats.avg_queue_size_sum;
609                 uint64_t samples = blkg->stats.avg_queue_size_samples;
610                 if (samples)
611                         do_div(sum, samples);
612                 else
613                         sum = 0;
614                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1, sum, cb, dev);
615         }
616         if (type == BLKIO_STAT_GROUP_WAIT_TIME)
617                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
618                                         blkg->stats.group_wait_time, cb, dev);
619         if (type == BLKIO_STAT_IDLE_TIME)
620                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
621                                         blkg->stats.idle_time, cb, dev);
622         if (type == BLKIO_STAT_EMPTY_TIME)
623                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
624                                         blkg->stats.empty_time, cb, dev);
625         if (type == BLKIO_STAT_DEQUEUE)
626                 return blkio_fill_stat(key_str, MAX_KEY_LEN - 1,
627                                         blkg->stats.dequeue, cb, dev);
628 #endif
629
630         for (sub_type = BLKIO_STAT_READ; sub_type < BLKIO_STAT_TOTAL;
631                         sub_type++) {
632                 blkio_get_key_name(sub_type, dev, key_str, MAX_KEY_LEN, false);
633                 cb->fill(cb, key_str, blkg->stats.stat_arr[type][sub_type]);
634         }
635         disk_total = blkg->stats.stat_arr[type][BLKIO_STAT_READ] +
636                         blkg->stats.stat_arr[type][BLKIO_STAT_WRITE];
637         blkio_get_key_name(BLKIO_STAT_TOTAL, dev, key_str, MAX_KEY_LEN, false);
638         cb->fill(cb, key_str, disk_total);
639         return disk_total;
640 }
641
642 static int blkio_check_dev_num(dev_t dev)
643 {
644         int part = 0;
645         struct gendisk *disk;
646
647         disk = get_gendisk(dev, &part);
648         if (!disk || part)
649                 return -ENODEV;
650
651         return 0;
652 }
653
654 static int blkio_policy_parse_and_set(char *buf,
655         struct blkio_policy_node *newpn, enum blkio_policy_id plid, int fileid)
656 {
657         char *s[4], *p, *major_s = NULL, *minor_s = NULL;
658         int ret;
659         unsigned long major, minor, temp, iops;
660         int i = 0;
661         dev_t dev;
662         u64 bps;
663
664         memset(s, 0, sizeof(s));
665
666         while ((p = strsep(&buf, " ")) != NULL) {
667                 if (!*p)
668                         continue;
669
670                 s[i++] = p;
671
672                 /* Prevent from inputing too many things */
673                 if (i == 3)
674                         break;
675         }
676
677         if (i != 2)
678                 return -EINVAL;
679
680         p = strsep(&s[0], ":");
681         if (p != NULL)
682                 major_s = p;
683         else
684                 return -EINVAL;
685
686         minor_s = s[0];
687         if (!minor_s)
688                 return -EINVAL;
689
690         ret = strict_strtoul(major_s, 10, &major);
691         if (ret)
692                 return -EINVAL;
693
694         ret = strict_strtoul(minor_s, 10, &minor);
695         if (ret)
696                 return -EINVAL;
697
698         dev = MKDEV(major, minor);
699
700         ret = blkio_check_dev_num(dev);
701         if (ret)
702                 return ret;
703
704         newpn->dev = dev;
705
706         if (s[1] == NULL)
707                 return -EINVAL;
708
709         switch (plid) {
710         case BLKIO_POLICY_PROP:
711                 ret = strict_strtoul(s[1], 10, &temp);
712                 if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) ||
713                         temp > BLKIO_WEIGHT_MAX)
714                         return -EINVAL;
715
716                 newpn->plid = plid;
717                 newpn->fileid = fileid;
718                 newpn->val.weight = temp;
719                 break;
720         case BLKIO_POLICY_THROTL:
721                 switch(fileid) {
722                 case BLKIO_THROTL_read_bps_device:
723                 case BLKIO_THROTL_write_bps_device:
724                         ret = strict_strtoull(s[1], 10, &bps);
725                         if (ret)
726                                 return -EINVAL;
727
728                         newpn->plid = plid;
729                         newpn->fileid = fileid;
730                         newpn->val.bps = bps;
731                         break;
732                 case BLKIO_THROTL_read_iops_device:
733                 case BLKIO_THROTL_write_iops_device:
734                         ret = strict_strtoul(s[1], 10, &iops);
735                         if (ret)
736                                 return -EINVAL;
737
738                         newpn->plid = plid;
739                         newpn->fileid = fileid;
740                         newpn->val.iops = iops;
741                         break;
742                 }
743                 break;
744         default:
745                 BUG();
746         }
747
748         return 0;
749 }
750
751 unsigned int blkcg_get_weight(struct blkio_cgroup *blkcg,
752                               dev_t dev)
753 {
754         struct blkio_policy_node *pn;
755
756         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_PROP,
757                                 BLKIO_PROP_weight_device);
758         if (pn)
759                 return pn->val.weight;
760         else
761                 return blkcg->weight;
762 }
763 EXPORT_SYMBOL_GPL(blkcg_get_weight);
764
765 uint64_t blkcg_get_read_bps(struct blkio_cgroup *blkcg, dev_t dev)
766 {
767         struct blkio_policy_node *pn;
768
769         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
770                                 BLKIO_THROTL_read_bps_device);
771         if (pn)
772                 return pn->val.bps;
773         else
774                 return -1;
775 }
776
777 uint64_t blkcg_get_write_bps(struct blkio_cgroup *blkcg, dev_t dev)
778 {
779         struct blkio_policy_node *pn;
780         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
781                                 BLKIO_THROTL_write_bps_device);
782         if (pn)
783                 return pn->val.bps;
784         else
785                 return -1;
786 }
787
788 unsigned int blkcg_get_read_iops(struct blkio_cgroup *blkcg, dev_t dev)
789 {
790         struct blkio_policy_node *pn;
791
792         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
793                                 BLKIO_THROTL_read_iops_device);
794         if (pn)
795                 return pn->val.iops;
796         else
797                 return -1;
798 }
799
800 unsigned int blkcg_get_write_iops(struct blkio_cgroup *blkcg, dev_t dev)
801 {
802         struct blkio_policy_node *pn;
803         pn = blkio_policy_search_node(blkcg, dev, BLKIO_POLICY_THROTL,
804                                 BLKIO_THROTL_write_iops_device);
805         if (pn)
806                 return pn->val.iops;
807         else
808                 return -1;
809 }
810
811 /* Checks whether user asked for deleting a policy rule */
812 static bool blkio_delete_rule_command(struct blkio_policy_node *pn)
813 {
814         switch(pn->plid) {
815         case BLKIO_POLICY_PROP:
816                 if (pn->val.weight == 0)
817                         return 1;
818                 break;
819         case BLKIO_POLICY_THROTL:
820                 switch(pn->fileid) {
821                 case BLKIO_THROTL_read_bps_device:
822                 case BLKIO_THROTL_write_bps_device:
823                         if (pn->val.bps == 0)
824                                 return 1;
825                         break;
826                 case BLKIO_THROTL_read_iops_device:
827                 case BLKIO_THROTL_write_iops_device:
828                         if (pn->val.iops == 0)
829                                 return 1;
830                 }
831                 break;
832         default:
833                 BUG();
834         }
835
836         return 0;
837 }
838
839 static void blkio_update_policy_rule(struct blkio_policy_node *oldpn,
840                                         struct blkio_policy_node *newpn)
841 {
842         switch(oldpn->plid) {
843         case BLKIO_POLICY_PROP:
844                 oldpn->val.weight = newpn->val.weight;
845                 break;
846         case BLKIO_POLICY_THROTL:
847                 switch(newpn->fileid) {
848                 case BLKIO_THROTL_read_bps_device:
849                 case BLKIO_THROTL_write_bps_device:
850                         oldpn->val.bps = newpn->val.bps;
851                         break;
852                 case BLKIO_THROTL_read_iops_device:
853                 case BLKIO_THROTL_write_iops_device:
854                         oldpn->val.iops = newpn->val.iops;
855                 }
856                 break;
857         default:
858                 BUG();
859         }
860 }
861
862 /*
863  * Some rules/values in blkg have changed. Propogate those to respective
864  * policies.
865  */
866 static void blkio_update_blkg_policy(struct blkio_cgroup *blkcg,
867                 struct blkio_group *blkg, struct blkio_policy_node *pn)
868 {
869         unsigned int weight, iops;
870         u64 bps;
871
872         switch(pn->plid) {
873         case BLKIO_POLICY_PROP:
874                 weight = pn->val.weight ? pn->val.weight :
875                                 blkcg->weight;
876                 blkio_update_group_weight(blkg, weight);
877                 break;
878         case BLKIO_POLICY_THROTL:
879                 switch(pn->fileid) {
880                 case BLKIO_THROTL_read_bps_device:
881                 case BLKIO_THROTL_write_bps_device:
882                         bps = pn->val.bps ? pn->val.bps : (-1);
883                         blkio_update_group_bps(blkg, bps, pn->fileid);
884                         break;
885                 case BLKIO_THROTL_read_iops_device:
886                 case BLKIO_THROTL_write_iops_device:
887                         iops = pn->val.iops ? pn->val.iops : (-1);
888                         blkio_update_group_iops(blkg, iops, pn->fileid);
889                         break;
890                 }
891                 break;
892         default:
893                 BUG();
894         }
895 }
896
897 /*
898  * A policy node rule has been updated. Propogate this update to all the
899  * block groups which might be affected by this update.
900  */
901 static void blkio_update_policy_node_blkg(struct blkio_cgroup *blkcg,
902                                 struct blkio_policy_node *pn)
903 {
904         struct blkio_group *blkg;
905         struct hlist_node *n;
906
907         spin_lock(&blkio_list_lock);
908         spin_lock_irq(&blkcg->lock);
909
910         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
911                 if (pn->dev != blkg->dev || pn->plid != blkg->plid)
912                         continue;
913                 blkio_update_blkg_policy(blkcg, blkg, pn);
914         }
915
916         spin_unlock_irq(&blkcg->lock);
917         spin_unlock(&blkio_list_lock);
918 }
919
920 static int blkiocg_file_write(struct cgroup *cgrp, struct cftype *cft,
921                                        const char *buffer)
922 {
923         int ret = 0;
924         char *buf;
925         struct blkio_policy_node *newpn, *pn;
926         struct blkio_cgroup *blkcg;
927         int keep_newpn = 0;
928         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
929         int fileid = BLKIOFILE_ATTR(cft->private);
930
931         buf = kstrdup(buffer, GFP_KERNEL);
932         if (!buf)
933                 return -ENOMEM;
934
935         newpn = kzalloc(sizeof(*newpn), GFP_KERNEL);
936         if (!newpn) {
937                 ret = -ENOMEM;
938                 goto free_buf;
939         }
940
941         ret = blkio_policy_parse_and_set(buf, newpn, plid, fileid);
942         if (ret)
943                 goto free_newpn;
944
945         blkcg = cgroup_to_blkio_cgroup(cgrp);
946
947         spin_lock_irq(&blkcg->lock);
948
949         pn = blkio_policy_search_node(blkcg, newpn->dev, plid, fileid);
950         if (!pn) {
951                 if (!blkio_delete_rule_command(newpn)) {
952                         blkio_policy_insert_node(blkcg, newpn);
953                         keep_newpn = 1;
954                 }
955                 spin_unlock_irq(&blkcg->lock);
956                 goto update_io_group;
957         }
958
959         if (blkio_delete_rule_command(newpn)) {
960                 blkio_policy_delete_node(pn);
961                 spin_unlock_irq(&blkcg->lock);
962                 goto update_io_group;
963         }
964         spin_unlock_irq(&blkcg->lock);
965
966         blkio_update_policy_rule(pn, newpn);
967
968 update_io_group:
969         blkio_update_policy_node_blkg(blkcg, newpn);
970
971 free_newpn:
972         if (!keep_newpn)
973                 kfree(newpn);
974 free_buf:
975         kfree(buf);
976         return ret;
977 }
978
979 static void
980 blkio_print_policy_node(struct seq_file *m, struct blkio_policy_node *pn)
981 {
982         switch(pn->plid) {
983                 case BLKIO_POLICY_PROP:
984                         if (pn->fileid == BLKIO_PROP_weight_device)
985                                 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
986                                         MINOR(pn->dev), pn->val.weight);
987                         break;
988                 case BLKIO_POLICY_THROTL:
989                         switch(pn->fileid) {
990                         case BLKIO_THROTL_read_bps_device:
991                         case BLKIO_THROTL_write_bps_device:
992                                 seq_printf(m, "%u:%u\t%llu\n", MAJOR(pn->dev),
993                                         MINOR(pn->dev), pn->val.bps);
994                                 break;
995                         case BLKIO_THROTL_read_iops_device:
996                         case BLKIO_THROTL_write_iops_device:
997                                 seq_printf(m, "%u:%u\t%u\n", MAJOR(pn->dev),
998                                         MINOR(pn->dev), pn->val.iops);
999                                 break;
1000                         }
1001                         break;
1002                 default:
1003                         BUG();
1004         }
1005 }
1006
1007 /* cgroup files which read their data from policy nodes end up here */
1008 static void blkio_read_policy_node_files(struct cftype *cft,
1009                         struct blkio_cgroup *blkcg, struct seq_file *m)
1010 {
1011         struct blkio_policy_node *pn;
1012
1013         if (!list_empty(&blkcg->policy_list)) {
1014                 spin_lock_irq(&blkcg->lock);
1015                 list_for_each_entry(pn, &blkcg->policy_list, node) {
1016                         if (!pn_matches_cftype(cft, pn))
1017                                 continue;
1018                         blkio_print_policy_node(m, pn);
1019                 }
1020                 spin_unlock_irq(&blkcg->lock);
1021         }
1022 }
1023
1024 static int blkiocg_file_read(struct cgroup *cgrp, struct cftype *cft,
1025                                 struct seq_file *m)
1026 {
1027         struct blkio_cgroup *blkcg;
1028         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1029         int name = BLKIOFILE_ATTR(cft->private);
1030
1031         blkcg = cgroup_to_blkio_cgroup(cgrp);
1032
1033         switch(plid) {
1034         case BLKIO_POLICY_PROP:
1035                 switch(name) {
1036                 case BLKIO_PROP_weight_device:
1037                         blkio_read_policy_node_files(cft, blkcg, m);
1038                         return 0;
1039                 default:
1040                         BUG();
1041                 }
1042                 break;
1043         case BLKIO_POLICY_THROTL:
1044                 switch(name){
1045                 case BLKIO_THROTL_read_bps_device:
1046                 case BLKIO_THROTL_write_bps_device:
1047                 case BLKIO_THROTL_read_iops_device:
1048                 case BLKIO_THROTL_write_iops_device:
1049                         blkio_read_policy_node_files(cft, blkcg, m);
1050                         return 0;
1051                 default:
1052                         BUG();
1053                 }
1054                 break;
1055         default:
1056                 BUG();
1057         }
1058
1059         return 0;
1060 }
1061
1062 static int blkio_read_blkg_stats(struct blkio_cgroup *blkcg,
1063                 struct cftype *cft, struct cgroup_map_cb *cb, enum stat_type type,
1064                 bool show_total)
1065 {
1066         struct blkio_group *blkg;
1067         struct hlist_node *n;
1068         uint64_t cgroup_total = 0;
1069
1070         rcu_read_lock();
1071         hlist_for_each_entry_rcu(blkg, n, &blkcg->blkg_list, blkcg_node) {
1072                 if (blkg->dev) {
1073                         if (!cftype_blkg_same_policy(cft, blkg))
1074                                 continue;
1075                         spin_lock_irq(&blkg->stats_lock);
1076                         cgroup_total += blkio_get_stat(blkg, cb, blkg->dev,
1077                                                 type);
1078                         spin_unlock_irq(&blkg->stats_lock);
1079                 }
1080         }
1081         if (show_total)
1082                 cb->fill(cb, "Total", cgroup_total);
1083         rcu_read_unlock();
1084         return 0;
1085 }
1086
1087 /* All map kind of cgroup file get serviced by this function */
1088 static int blkiocg_file_read_map(struct cgroup *cgrp, struct cftype *cft,
1089                                 struct cgroup_map_cb *cb)
1090 {
1091         struct blkio_cgroup *blkcg;
1092         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1093         int name = BLKIOFILE_ATTR(cft->private);
1094
1095         blkcg = cgroup_to_blkio_cgroup(cgrp);
1096
1097         switch(plid) {
1098         case BLKIO_POLICY_PROP:
1099                 switch(name) {
1100                 case BLKIO_PROP_time:
1101                         return blkio_read_blkg_stats(blkcg, cft, cb,
1102                                                 BLKIO_STAT_TIME, 0);
1103                 case BLKIO_PROP_sectors:
1104                         return blkio_read_blkg_stats(blkcg, cft, cb,
1105                                                 BLKIO_STAT_SECTORS, 0);
1106                 case BLKIO_PROP_io_service_bytes:
1107                         return blkio_read_blkg_stats(blkcg, cft, cb,
1108                                                 BLKIO_STAT_SERVICE_BYTES, 1);
1109                 case BLKIO_PROP_io_serviced:
1110                         return blkio_read_blkg_stats(blkcg, cft, cb,
1111                                                 BLKIO_STAT_SERVICED, 1);
1112                 case BLKIO_PROP_io_service_time:
1113                         return blkio_read_blkg_stats(blkcg, cft, cb,
1114                                                 BLKIO_STAT_SERVICE_TIME, 1);
1115                 case BLKIO_PROP_io_wait_time:
1116                         return blkio_read_blkg_stats(blkcg, cft, cb,
1117                                                 BLKIO_STAT_WAIT_TIME, 1);
1118                 case BLKIO_PROP_io_merged:
1119                         return blkio_read_blkg_stats(blkcg, cft, cb,
1120                                                 BLKIO_STAT_MERGED, 1);
1121                 case BLKIO_PROP_io_queued:
1122                         return blkio_read_blkg_stats(blkcg, cft, cb,
1123                                                 BLKIO_STAT_QUEUED, 1);
1124 #ifdef CONFIG_DEBUG_BLK_CGROUP
1125                 case BLKIO_PROP_dequeue:
1126                         return blkio_read_blkg_stats(blkcg, cft, cb,
1127                                                 BLKIO_STAT_DEQUEUE, 0);
1128                 case BLKIO_PROP_avg_queue_size:
1129                         return blkio_read_blkg_stats(blkcg, cft, cb,
1130                                                 BLKIO_STAT_AVG_QUEUE_SIZE, 0);
1131                 case BLKIO_PROP_group_wait_time:
1132                         return blkio_read_blkg_stats(blkcg, cft, cb,
1133                                                 BLKIO_STAT_GROUP_WAIT_TIME, 0);
1134                 case BLKIO_PROP_idle_time:
1135                         return blkio_read_blkg_stats(blkcg, cft, cb,
1136                                                 BLKIO_STAT_IDLE_TIME, 0);
1137                 case BLKIO_PROP_empty_time:
1138                         return blkio_read_blkg_stats(blkcg, cft, cb,
1139                                                 BLKIO_STAT_EMPTY_TIME, 0);
1140 #endif
1141                 default:
1142                         BUG();
1143                 }
1144                 break;
1145         case BLKIO_POLICY_THROTL:
1146                 switch(name){
1147                 case BLKIO_THROTL_io_service_bytes:
1148                         return blkio_read_blkg_stats(blkcg, cft, cb,
1149                                                 BLKIO_STAT_SERVICE_BYTES, 1);
1150                 case BLKIO_THROTL_io_serviced:
1151                         return blkio_read_blkg_stats(blkcg, cft, cb,
1152                                                 BLKIO_STAT_SERVICED, 1);
1153                 default:
1154                         BUG();
1155                 }
1156                 break;
1157         default:
1158                 BUG();
1159         }
1160
1161         return 0;
1162 }
1163
1164 static int blkio_weight_write(struct blkio_cgroup *blkcg, u64 val)
1165 {
1166         struct blkio_group *blkg;
1167         struct hlist_node *n;
1168         struct blkio_policy_node *pn;
1169
1170         if (val < BLKIO_WEIGHT_MIN || val > BLKIO_WEIGHT_MAX)
1171                 return -EINVAL;
1172
1173         spin_lock(&blkio_list_lock);
1174         spin_lock_irq(&blkcg->lock);
1175         blkcg->weight = (unsigned int)val;
1176
1177         hlist_for_each_entry(blkg, n, &blkcg->blkg_list, blkcg_node) {
1178                 pn = blkio_policy_search_node(blkcg, blkg->dev,
1179                                 BLKIO_POLICY_PROP, BLKIO_PROP_weight_device);
1180                 if (pn)
1181                         continue;
1182
1183                 blkio_update_group_weight(blkg, blkcg->weight);
1184         }
1185         spin_unlock_irq(&blkcg->lock);
1186         spin_unlock(&blkio_list_lock);
1187         return 0;
1188 }
1189
1190 static u64 blkiocg_file_read_u64 (struct cgroup *cgrp, struct cftype *cft) {
1191         struct blkio_cgroup *blkcg;
1192         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1193         int name = BLKIOFILE_ATTR(cft->private);
1194
1195         blkcg = cgroup_to_blkio_cgroup(cgrp);
1196
1197         switch(plid) {
1198         case BLKIO_POLICY_PROP:
1199                 switch(name) {
1200                 case BLKIO_PROP_weight:
1201                         return (u64)blkcg->weight;
1202                 }
1203                 break;
1204         default:
1205                 BUG();
1206         }
1207         return 0;
1208 }
1209
1210 static int
1211 blkiocg_file_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
1212 {
1213         struct blkio_cgroup *blkcg;
1214         enum blkio_policy_id plid = BLKIOFILE_POLICY(cft->private);
1215         int name = BLKIOFILE_ATTR(cft->private);
1216
1217         blkcg = cgroup_to_blkio_cgroup(cgrp);
1218
1219         switch(plid) {
1220         case BLKIO_POLICY_PROP:
1221                 switch(name) {
1222                 case BLKIO_PROP_weight:
1223                         return blkio_weight_write(blkcg, val);
1224                 }
1225                 break;
1226         default:
1227                 BUG();
1228         }
1229
1230         return 0;
1231 }
1232
1233 struct cftype blkio_files[] = {
1234         {
1235                 .name = "weight_device",
1236                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1237                                 BLKIO_PROP_weight_device),
1238                 .read_seq_string = blkiocg_file_read,
1239                 .write_string = blkiocg_file_write,
1240                 .max_write_len = 256,
1241         },
1242         {
1243                 .name = "weight",
1244                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1245                                 BLKIO_PROP_weight),
1246                 .read_u64 = blkiocg_file_read_u64,
1247                 .write_u64 = blkiocg_file_write_u64,
1248         },
1249         {
1250                 .name = "time",
1251                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1252                                 BLKIO_PROP_time),
1253                 .read_map = blkiocg_file_read_map,
1254         },
1255         {
1256                 .name = "sectors",
1257                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1258                                 BLKIO_PROP_sectors),
1259                 .read_map = blkiocg_file_read_map,
1260         },
1261         {
1262                 .name = "io_service_bytes",
1263                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1264                                 BLKIO_PROP_io_service_bytes),
1265                 .read_map = blkiocg_file_read_map,
1266         },
1267         {
1268                 .name = "io_serviced",
1269                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1270                                 BLKIO_PROP_io_serviced),
1271                 .read_map = blkiocg_file_read_map,
1272         },
1273         {
1274                 .name = "io_service_time",
1275                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1276                                 BLKIO_PROP_io_service_time),
1277                 .read_map = blkiocg_file_read_map,
1278         },
1279         {
1280                 .name = "io_wait_time",
1281                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1282                                 BLKIO_PROP_io_wait_time),
1283                 .read_map = blkiocg_file_read_map,
1284         },
1285         {
1286                 .name = "io_merged",
1287                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1288                                 BLKIO_PROP_io_merged),
1289                 .read_map = blkiocg_file_read_map,
1290         },
1291         {
1292                 .name = "io_queued",
1293                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1294                                 BLKIO_PROP_io_queued),
1295                 .read_map = blkiocg_file_read_map,
1296         },
1297         {
1298                 .name = "reset_stats",
1299                 .write_u64 = blkiocg_reset_stats,
1300         },
1301 #ifdef CONFIG_BLK_DEV_THROTTLING
1302         {
1303                 .name = "throttle.read_bps_device",
1304                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1305                                 BLKIO_THROTL_read_bps_device),
1306                 .read_seq_string = blkiocg_file_read,
1307                 .write_string = blkiocg_file_write,
1308                 .max_write_len = 256,
1309         },
1310
1311         {
1312                 .name = "throttle.write_bps_device",
1313                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1314                                 BLKIO_THROTL_write_bps_device),
1315                 .read_seq_string = blkiocg_file_read,
1316                 .write_string = blkiocg_file_write,
1317                 .max_write_len = 256,
1318         },
1319
1320         {
1321                 .name = "throttle.read_iops_device",
1322                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1323                                 BLKIO_THROTL_read_iops_device),
1324                 .read_seq_string = blkiocg_file_read,
1325                 .write_string = blkiocg_file_write,
1326                 .max_write_len = 256,
1327         },
1328
1329         {
1330                 .name = "throttle.write_iops_device",
1331                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1332                                 BLKIO_THROTL_write_iops_device),
1333                 .read_seq_string = blkiocg_file_read,
1334                 .write_string = blkiocg_file_write,
1335                 .max_write_len = 256,
1336         },
1337         {
1338                 .name = "throttle.io_service_bytes",
1339                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1340                                 BLKIO_THROTL_io_service_bytes),
1341                 .read_map = blkiocg_file_read_map,
1342         },
1343         {
1344                 .name = "throttle.io_serviced",
1345                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_THROTL,
1346                                 BLKIO_THROTL_io_serviced),
1347                 .read_map = blkiocg_file_read_map,
1348         },
1349 #endif /* CONFIG_BLK_DEV_THROTTLING */
1350
1351 #ifdef CONFIG_DEBUG_BLK_CGROUP
1352         {
1353                 .name = "avg_queue_size",
1354                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1355                                 BLKIO_PROP_avg_queue_size),
1356                 .read_map = blkiocg_file_read_map,
1357         },
1358         {
1359                 .name = "group_wait_time",
1360                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1361                                 BLKIO_PROP_group_wait_time),
1362                 .read_map = blkiocg_file_read_map,
1363         },
1364         {
1365                 .name = "idle_time",
1366                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1367                                 BLKIO_PROP_idle_time),
1368                 .read_map = blkiocg_file_read_map,
1369         },
1370         {
1371                 .name = "empty_time",
1372                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1373                                 BLKIO_PROP_empty_time),
1374                 .read_map = blkiocg_file_read_map,
1375         },
1376         {
1377                 .name = "dequeue",
1378                 .private = BLKIOFILE_PRIVATE(BLKIO_POLICY_PROP,
1379                                 BLKIO_PROP_dequeue),
1380                 .read_map = blkiocg_file_read_map,
1381         },
1382 #endif
1383 };
1384
1385 static int blkiocg_populate(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1386 {
1387         return cgroup_add_files(cgroup, subsys, blkio_files,
1388                                 ARRAY_SIZE(blkio_files));
1389 }
1390
1391 static void blkiocg_destroy(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1392 {
1393         struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
1394         unsigned long flags;
1395         struct blkio_group *blkg;
1396         void *key;
1397         struct blkio_policy_type *blkiop;
1398         struct blkio_policy_node *pn, *pntmp;
1399
1400         rcu_read_lock();
1401         do {
1402                 spin_lock_irqsave(&blkcg->lock, flags);
1403
1404                 if (hlist_empty(&blkcg->blkg_list)) {
1405                         spin_unlock_irqrestore(&blkcg->lock, flags);
1406                         break;
1407                 }
1408
1409                 blkg = hlist_entry(blkcg->blkg_list.first, struct blkio_group,
1410                                         blkcg_node);
1411                 key = rcu_dereference(blkg->key);
1412                 __blkiocg_del_blkio_group(blkg);
1413
1414                 spin_unlock_irqrestore(&blkcg->lock, flags);
1415
1416                 /*
1417                  * This blkio_group is being unlinked as associated cgroup is
1418                  * going away. Let all the IO controlling policies know about
1419                  * this event.
1420                  */
1421                 spin_lock(&blkio_list_lock);
1422                 list_for_each_entry(blkiop, &blkio_list, list) {
1423                         if (blkiop->plid != blkg->plid)
1424                                 continue;
1425                         blkiop->ops.blkio_unlink_group_fn(key, blkg);
1426                 }
1427                 spin_unlock(&blkio_list_lock);
1428         } while (1);
1429
1430         list_for_each_entry_safe(pn, pntmp, &blkcg->policy_list, node) {
1431                 blkio_policy_delete_node(pn);
1432                 kfree(pn);
1433         }
1434
1435         free_css_id(&blkio_subsys, &blkcg->css);
1436         rcu_read_unlock();
1437         if (blkcg != &blkio_root_cgroup)
1438                 kfree(blkcg);
1439 }
1440
1441 static struct cgroup_subsys_state *
1442 blkiocg_create(struct cgroup_subsys *subsys, struct cgroup *cgroup)
1443 {
1444         struct blkio_cgroup *blkcg;
1445         struct cgroup *parent = cgroup->parent;
1446
1447         if (!parent) {
1448                 blkcg = &blkio_root_cgroup;
1449                 goto done;
1450         }
1451
1452         /* Currently we do not support hierarchy deeper than two level (0,1) */
1453         if (parent != cgroup->top_cgroup)
1454                 return ERR_PTR(-EINVAL);
1455
1456         blkcg = kzalloc(sizeof(*blkcg), GFP_KERNEL);
1457         if (!blkcg)
1458                 return ERR_PTR(-ENOMEM);
1459
1460         blkcg->weight = BLKIO_WEIGHT_DEFAULT;
1461 done:
1462         spin_lock_init(&blkcg->lock);
1463         INIT_HLIST_HEAD(&blkcg->blkg_list);
1464
1465         INIT_LIST_HEAD(&blkcg->policy_list);
1466         return &blkcg->css;
1467 }
1468
1469 /*
1470  * We cannot support shared io contexts, as we have no mean to support
1471  * two tasks with the same ioc in two different groups without major rework
1472  * of the main cic data structures.  For now we allow a task to change
1473  * its cgroup only if it's the only owner of its ioc.
1474  */
1475 static int blkiocg_can_attach(struct cgroup_subsys *subsys,
1476                                 struct cgroup *cgroup, struct task_struct *tsk,
1477                                 bool threadgroup)
1478 {
1479         struct io_context *ioc;
1480         int ret = 0;
1481
1482         /* task_lock() is needed to avoid races with exit_io_context() */
1483         task_lock(tsk);
1484         ioc = tsk->io_context;
1485         if (ioc && atomic_read(&ioc->nr_tasks) > 1)
1486                 ret = -EINVAL;
1487         task_unlock(tsk);
1488
1489         return ret;
1490 }
1491
1492 static void blkiocg_attach(struct cgroup_subsys *subsys, struct cgroup *cgroup,
1493                                 struct cgroup *prev, struct task_struct *tsk,
1494                                 bool threadgroup)
1495 {
1496         struct io_context *ioc;
1497
1498         task_lock(tsk);
1499         ioc = tsk->io_context;
1500         if (ioc)
1501                 ioc->cgroup_changed = 1;
1502         task_unlock(tsk);
1503 }
1504
1505 void blkio_policy_register(struct blkio_policy_type *blkiop)
1506 {
1507         spin_lock(&blkio_list_lock);
1508         list_add_tail(&blkiop->list, &blkio_list);
1509         spin_unlock(&blkio_list_lock);
1510 }
1511 EXPORT_SYMBOL_GPL(blkio_policy_register);
1512
1513 void blkio_policy_unregister(struct blkio_policy_type *blkiop)
1514 {
1515         spin_lock(&blkio_list_lock);
1516         list_del_init(&blkiop->list);
1517         spin_unlock(&blkio_list_lock);
1518 }
1519 EXPORT_SYMBOL_GPL(blkio_policy_unregister);
1520
1521 static int __init init_cgroup_blkio(void)
1522 {
1523         return cgroup_load_subsys(&blkio_subsys);
1524 }
1525
1526 static void __exit exit_cgroup_blkio(void)
1527 {
1528         cgroup_unload_subsys(&blkio_subsys);
1529 }
1530
1531 module_init(init_cgroup_blkio);
1532 module_exit(exit_cgroup_blkio);
1533 MODULE_LICENSE("GPL");