2 * Common Block IO controller cgroup interface
4 * Based on ideas and code from CFQ, CFS and BFQ:
5 * Copyright (C) 2003 Jens Axboe <axboe@kernel.dk>
7 * Copyright (C) 2008 Fabio Checconi <fabio@gandalf.sssup.it>
8 * Paolo Valente <paolo.valente@unimore.it>
10 * Copyright (C) 2009 Vivek Goyal <vgoyal@redhat.com>
11 * Nauman Rafique <nauman@google.com>
13 #include <linux/ioprio.h>
14 #include <linux/kdev_t.h>
15 #include <linux/module.h>
16 #include <linux/err.h>
17 #include <linux/blkdev.h>
18 #include <linux/slab.h>
19 #include <linux/genhd.h>
20 #include <linux/delay.h>
21 #include <linux/atomic.h>
22 #include "blk-cgroup.h"
25 #define MAX_KEY_LEN 100
27 static DEFINE_SPINLOCK(blkio_list_lock
);
28 static LIST_HEAD(blkio_list
);
30 static DEFINE_MUTEX(all_q_mutex
);
31 static LIST_HEAD(all_q_list
);
33 /* List of groups pending per cpu stats allocation */
34 static DEFINE_SPINLOCK(alloc_list_lock
);
35 static LIST_HEAD(alloc_list
);
37 static void blkio_stat_alloc_fn(struct work_struct
*);
38 static DECLARE_DELAYED_WORK(blkio_stat_alloc_work
, blkio_stat_alloc_fn
);
40 struct blkio_cgroup blkio_root_cgroup
= { .weight
= 2*BLKIO_WEIGHT_DEFAULT
};
41 EXPORT_SYMBOL_GPL(blkio_root_cgroup
);
43 static struct blkio_policy_type
*blkio_policy
[BLKIO_NR_POLICIES
];
45 struct blkio_cgroup
*cgroup_to_blkio_cgroup(struct cgroup
*cgroup
)
47 return container_of(cgroup_subsys_state(cgroup
, blkio_subsys_id
),
48 struct blkio_cgroup
, css
);
50 EXPORT_SYMBOL_GPL(cgroup_to_blkio_cgroup
);
52 static struct blkio_cgroup
*task_blkio_cgroup(struct task_struct
*tsk
)
54 return container_of(task_subsys_state(tsk
, blkio_subsys_id
),
55 struct blkio_cgroup
, css
);
58 struct blkio_cgroup
*bio_blkio_cgroup(struct bio
*bio
)
60 if (bio
&& bio
->bi_css
)
61 return container_of(bio
->bi_css
, struct blkio_cgroup
, css
);
62 return task_blkio_cgroup(current
);
64 EXPORT_SYMBOL_GPL(bio_blkio_cgroup
);
66 static inline void blkio_update_group_weight(struct blkio_group
*blkg
,
67 int plid
, unsigned int weight
)
69 struct blkio_policy_type
*blkiop
;
71 list_for_each_entry(blkiop
, &blkio_list
, list
) {
72 /* If this policy does not own the blkg, do not send updates */
73 if (blkiop
->plid
!= plid
)
75 if (blkiop
->ops
.blkio_update_group_weight_fn
)
76 blkiop
->ops
.blkio_update_group_weight_fn(blkg
->q
,
81 static inline void blkio_update_group_bps(struct blkio_group
*blkg
, int plid
,
84 struct blkio_policy_type
*blkiop
;
86 list_for_each_entry(blkiop
, &blkio_list
, list
) {
88 /* If this policy does not own the blkg, do not send updates */
89 if (blkiop
->plid
!= plid
)
92 if (rw
== READ
&& blkiop
->ops
.blkio_update_group_read_bps_fn
)
93 blkiop
->ops
.blkio_update_group_read_bps_fn(blkg
->q
,
96 if (rw
== WRITE
&& blkiop
->ops
.blkio_update_group_write_bps_fn
)
97 blkiop
->ops
.blkio_update_group_write_bps_fn(blkg
->q
,
102 static inline void blkio_update_group_iops(struct blkio_group
*blkg
, int plid
,
105 struct blkio_policy_type
*blkiop
;
107 list_for_each_entry(blkiop
, &blkio_list
, list
) {
109 /* If this policy does not own the blkg, do not send updates */
110 if (blkiop
->plid
!= plid
)
113 if (rw
== READ
&& blkiop
->ops
.blkio_update_group_read_iops_fn
)
114 blkiop
->ops
.blkio_update_group_read_iops_fn(blkg
->q
,
117 if (rw
== WRITE
&& blkiop
->ops
.blkio_update_group_write_iops_fn
)
118 blkiop
->ops
.blkio_update_group_write_iops_fn(blkg
->q
,
123 #ifdef CONFIG_DEBUG_BLK_CGROUP
124 /* This should be called with the queue_lock held. */
125 static void blkio_set_start_group_wait_time(struct blkio_group
*blkg
,
126 struct blkio_policy_type
*pol
,
127 struct blkio_group
*curr_blkg
)
129 struct blkg_policy_data
*pd
= blkg
->pd
[pol
->plid
];
131 if (blkio_blkg_waiting(&pd
->stats
))
133 if (blkg
== curr_blkg
)
135 pd
->stats
.start_group_wait_time
= sched_clock();
136 blkio_mark_blkg_waiting(&pd
->stats
);
139 /* This should be called with the queue_lock held. */
140 static void blkio_update_group_wait_time(struct blkio_group_stats
*stats
)
142 unsigned long long now
;
144 if (!blkio_blkg_waiting(stats
))
148 if (time_after64(now
, stats
->start_group_wait_time
))
149 blkg_stat_add(&stats
->group_wait_time
,
150 now
- stats
->start_group_wait_time
);
151 blkio_clear_blkg_waiting(stats
);
154 /* This should be called with the queue_lock held. */
155 static void blkio_end_empty_time(struct blkio_group_stats
*stats
)
157 unsigned long long now
;
159 if (!blkio_blkg_empty(stats
))
163 if (time_after64(now
, stats
->start_empty_time
))
164 blkg_stat_add(&stats
->empty_time
,
165 now
- stats
->start_empty_time
);
166 blkio_clear_blkg_empty(stats
);
169 void blkiocg_update_set_idle_time_stats(struct blkio_group
*blkg
,
170 struct blkio_policy_type
*pol
)
172 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
174 lockdep_assert_held(blkg
->q
->queue_lock
);
175 BUG_ON(blkio_blkg_idling(stats
));
177 stats
->start_idle_time
= sched_clock();
178 blkio_mark_blkg_idling(stats
);
180 EXPORT_SYMBOL_GPL(blkiocg_update_set_idle_time_stats
);
182 void blkiocg_update_idle_time_stats(struct blkio_group
*blkg
,
183 struct blkio_policy_type
*pol
)
185 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
187 lockdep_assert_held(blkg
->q
->queue_lock
);
189 if (blkio_blkg_idling(stats
)) {
190 unsigned long long now
= sched_clock();
192 if (time_after64(now
, stats
->start_idle_time
))
193 blkg_stat_add(&stats
->idle_time
,
194 now
- stats
->start_idle_time
);
195 blkio_clear_blkg_idling(stats
);
198 EXPORT_SYMBOL_GPL(blkiocg_update_idle_time_stats
);
200 void blkiocg_update_avg_queue_size_stats(struct blkio_group
*blkg
,
201 struct blkio_policy_type
*pol
)
203 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
205 lockdep_assert_held(blkg
->q
->queue_lock
);
207 blkg_stat_add(&stats
->avg_queue_size_sum
,
208 blkg_rwstat_sum(&stats
->queued
));
209 blkg_stat_add(&stats
->avg_queue_size_samples
, 1);
210 blkio_update_group_wait_time(stats
);
212 EXPORT_SYMBOL_GPL(blkiocg_update_avg_queue_size_stats
);
214 void blkiocg_set_start_empty_time(struct blkio_group
*blkg
,
215 struct blkio_policy_type
*pol
)
217 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
219 lockdep_assert_held(blkg
->q
->queue_lock
);
221 if (blkg_rwstat_sum(&stats
->queued
))
225 * group is already marked empty. This can happen if cfqq got new
226 * request in parent group and moved to this group while being added
227 * to service tree. Just ignore the event and move on.
229 if (blkio_blkg_empty(stats
))
232 stats
->start_empty_time
= sched_clock();
233 blkio_mark_blkg_empty(stats
);
235 EXPORT_SYMBOL_GPL(blkiocg_set_start_empty_time
);
237 void blkiocg_update_dequeue_stats(struct blkio_group
*blkg
,
238 struct blkio_policy_type
*pol
,
239 unsigned long dequeue
)
241 struct blkg_policy_data
*pd
= blkg
->pd
[pol
->plid
];
243 lockdep_assert_held(blkg
->q
->queue_lock
);
245 blkg_stat_add(&pd
->stats
.dequeue
, dequeue
);
247 EXPORT_SYMBOL_GPL(blkiocg_update_dequeue_stats
);
249 static inline void blkio_set_start_group_wait_time(struct blkio_group
*blkg
,
250 struct blkio_policy_type
*pol
,
251 struct blkio_group
*curr_blkg
) { }
252 static inline void blkio_end_empty_time(struct blkio_group_stats
*stats
) { }
255 void blkiocg_update_io_add_stats(struct blkio_group
*blkg
,
256 struct blkio_policy_type
*pol
,
257 struct blkio_group
*curr_blkg
, bool direction
,
260 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
261 int rw
= (direction
? REQ_WRITE
: 0) | (sync
? REQ_SYNC
: 0);
263 lockdep_assert_held(blkg
->q
->queue_lock
);
265 blkg_rwstat_add(&stats
->queued
, rw
, 1);
266 blkio_end_empty_time(stats
);
267 blkio_set_start_group_wait_time(blkg
, pol
, curr_blkg
);
269 EXPORT_SYMBOL_GPL(blkiocg_update_io_add_stats
);
271 void blkiocg_update_io_remove_stats(struct blkio_group
*blkg
,
272 struct blkio_policy_type
*pol
,
273 bool direction
, bool sync
)
275 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
276 int rw
= (direction
? REQ_WRITE
: 0) | (sync
? REQ_SYNC
: 0);
278 lockdep_assert_held(blkg
->q
->queue_lock
);
280 blkg_rwstat_add(&stats
->queued
, rw
, -1);
282 EXPORT_SYMBOL_GPL(blkiocg_update_io_remove_stats
);
284 void blkiocg_update_timeslice_used(struct blkio_group
*blkg
,
285 struct blkio_policy_type
*pol
,
287 unsigned long unaccounted_time
)
289 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
291 lockdep_assert_held(blkg
->q
->queue_lock
);
293 blkg_stat_add(&stats
->time
, time
);
294 #ifdef CONFIG_DEBUG_BLK_CGROUP
295 blkg_stat_add(&stats
->unaccounted_time
, unaccounted_time
);
298 EXPORT_SYMBOL_GPL(blkiocg_update_timeslice_used
);
301 * should be called under rcu read lock or queue lock to make sure blkg pointer
304 void blkiocg_update_dispatch_stats(struct blkio_group
*blkg
,
305 struct blkio_policy_type
*pol
,
306 uint64_t bytes
, bool direction
, bool sync
)
308 int rw
= (direction
? REQ_WRITE
: 0) | (sync
? REQ_SYNC
: 0);
309 struct blkg_policy_data
*pd
= blkg
->pd
[pol
->plid
];
310 struct blkio_group_stats_cpu
*stats_cpu
;
313 /* If per cpu stats are not allocated yet, don't do any accounting. */
314 if (pd
->stats_cpu
== NULL
)
318 * Disabling interrupts to provide mutual exclusion between two
319 * writes on same cpu. It probably is not needed for 64bit. Not
320 * optimizing that case yet.
322 local_irq_save(flags
);
324 stats_cpu
= this_cpu_ptr(pd
->stats_cpu
);
326 blkg_stat_add(&stats_cpu
->sectors
, bytes
>> 9);
327 blkg_rwstat_add(&stats_cpu
->serviced
, rw
, 1);
328 blkg_rwstat_add(&stats_cpu
->service_bytes
, rw
, bytes
);
330 local_irq_restore(flags
);
332 EXPORT_SYMBOL_GPL(blkiocg_update_dispatch_stats
);
334 void blkiocg_update_completion_stats(struct blkio_group
*blkg
,
335 struct blkio_policy_type
*pol
,
337 uint64_t io_start_time
, bool direction
,
340 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
341 unsigned long long now
= sched_clock();
342 int rw
= (direction
? REQ_WRITE
: 0) | (sync
? REQ_SYNC
: 0);
344 lockdep_assert_held(blkg
->q
->queue_lock
);
346 if (time_after64(now
, io_start_time
))
347 blkg_rwstat_add(&stats
->service_time
, rw
, now
- io_start_time
);
348 if (time_after64(io_start_time
, start_time
))
349 blkg_rwstat_add(&stats
->wait_time
, rw
,
350 io_start_time
- start_time
);
352 EXPORT_SYMBOL_GPL(blkiocg_update_completion_stats
);
354 /* Merged stats are per cpu. */
355 void blkiocg_update_io_merged_stats(struct blkio_group
*blkg
,
356 struct blkio_policy_type
*pol
,
357 bool direction
, bool sync
)
359 struct blkio_group_stats
*stats
= &blkg
->pd
[pol
->plid
]->stats
;
360 int rw
= (direction
? REQ_WRITE
: 0) | (sync
? REQ_SYNC
: 0);
362 lockdep_assert_held(blkg
->q
->queue_lock
);
364 blkg_rwstat_add(&stats
->merged
, rw
, 1);
366 EXPORT_SYMBOL_GPL(blkiocg_update_io_merged_stats
);
369 * Worker for allocating per cpu stat for blk groups. This is scheduled on
370 * the system_nrt_wq once there are some groups on the alloc_list waiting
373 static void blkio_stat_alloc_fn(struct work_struct
*work
)
375 static void *pcpu_stats
[BLKIO_NR_POLICIES
];
376 struct delayed_work
*dwork
= to_delayed_work(work
);
377 struct blkio_group
*blkg
;
382 for (i
= 0; i
< BLKIO_NR_POLICIES
; i
++) {
383 if (pcpu_stats
[i
] != NULL
)
386 pcpu_stats
[i
] = alloc_percpu(struct blkio_group_stats_cpu
);
388 /* Allocation failed. Try again after some time. */
389 if (pcpu_stats
[i
] == NULL
) {
390 queue_delayed_work(system_nrt_wq
, dwork
,
391 msecs_to_jiffies(10));
396 spin_lock_irq(&blkio_list_lock
);
397 spin_lock(&alloc_list_lock
);
399 /* cgroup got deleted or queue exited. */
400 if (!list_empty(&alloc_list
)) {
401 blkg
= list_first_entry(&alloc_list
, struct blkio_group
,
403 for (i
= 0; i
< BLKIO_NR_POLICIES
; i
++) {
404 struct blkg_policy_data
*pd
= blkg
->pd
[i
];
406 if (blkio_policy
[i
] && pd
&& !pd
->stats_cpu
)
407 swap(pd
->stats_cpu
, pcpu_stats
[i
]);
410 list_del_init(&blkg
->alloc_node
);
413 empty
= list_empty(&alloc_list
);
415 spin_unlock(&alloc_list_lock
);
416 spin_unlock_irq(&blkio_list_lock
);
423 * blkg_free - free a blkg
424 * @blkg: blkg to free
426 * Free @blkg which may be partially allocated.
428 static void blkg_free(struct blkio_group
*blkg
)
435 for (i
= 0; i
< BLKIO_NR_POLICIES
; i
++) {
436 struct blkg_policy_data
*pd
= blkg
->pd
[i
];
439 free_percpu(pd
->stats_cpu
);
448 * blkg_alloc - allocate a blkg
449 * @blkcg: block cgroup the new blkg is associated with
450 * @q: request_queue the new blkg is associated with
452 * Allocate a new blkg assocating @blkcg and @q.
454 static struct blkio_group
*blkg_alloc(struct blkio_cgroup
*blkcg
,
455 struct request_queue
*q
)
457 struct blkio_group
*blkg
;
460 /* alloc and init base part */
461 blkg
= kzalloc_node(sizeof(*blkg
), GFP_ATOMIC
, q
->node
);
466 INIT_LIST_HEAD(&blkg
->q_node
);
467 INIT_LIST_HEAD(&blkg
->alloc_node
);
470 cgroup_path(blkcg
->css
.cgroup
, blkg
->path
, sizeof(blkg
->path
));
472 for (i
= 0; i
< BLKIO_NR_POLICIES
; i
++) {
473 struct blkio_policy_type
*pol
= blkio_policy
[i
];
474 struct blkg_policy_data
*pd
;
479 /* alloc per-policy data and attach it to blkg */
480 pd
= kzalloc_node(sizeof(*pd
) + pol
->pdata_size
, GFP_ATOMIC
,
491 /* invoke per-policy init */
492 for (i
= 0; i
< BLKIO_NR_POLICIES
; i
++) {
493 struct blkio_policy_type
*pol
= blkio_policy
[i
];
496 pol
->ops
.blkio_init_group_fn(blkg
);
502 struct blkio_group
*blkg_lookup_create(struct blkio_cgroup
*blkcg
,
503 struct request_queue
*q
,
505 __releases(q
->queue_lock
) __acquires(q
->queue_lock
)
507 struct blkio_group
*blkg
;
509 WARN_ON_ONCE(!rcu_read_lock_held());
510 lockdep_assert_held(q
->queue_lock
);
513 * This could be the first entry point of blkcg implementation and
514 * we shouldn't allow anything to go through for a bypassing queue.
515 * The following can be removed if blkg lookup is guaranteed to
516 * fail on a bypassing queue.
518 if (unlikely(blk_queue_bypass(q
)) && !for_root
)
519 return ERR_PTR(blk_queue_dead(q
) ? -EINVAL
: -EBUSY
);
521 blkg
= blkg_lookup(blkcg
, q
);
525 /* blkg holds a reference to blkcg */
526 if (!css_tryget(&blkcg
->css
))
527 return ERR_PTR(-EINVAL
);
530 * Allocate and initialize.
532 blkg
= blkg_alloc(blkcg
, q
);
534 /* did alloc fail? */
535 if (unlikely(!blkg
)) {
536 blkg
= ERR_PTR(-ENOMEM
);
541 spin_lock(&blkcg
->lock
);
542 hlist_add_head_rcu(&blkg
->blkcg_node
, &blkcg
->blkg_list
);
543 list_add(&blkg
->q_node
, &q
->blkg_list
);
544 spin_unlock(&blkcg
->lock
);
546 spin_lock(&alloc_list_lock
);
547 list_add(&blkg
->alloc_node
, &alloc_list
);
548 /* Queue per cpu stat allocation from worker thread. */
549 queue_delayed_work(system_nrt_wq
, &blkio_stat_alloc_work
, 0);
550 spin_unlock(&alloc_list_lock
);
554 EXPORT_SYMBOL_GPL(blkg_lookup_create
);
556 /* called under rcu_read_lock(). */
557 struct blkio_group
*blkg_lookup(struct blkio_cgroup
*blkcg
,
558 struct request_queue
*q
)
560 struct blkio_group
*blkg
;
561 struct hlist_node
*n
;
563 hlist_for_each_entry_rcu(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
)
568 EXPORT_SYMBOL_GPL(blkg_lookup
);
570 static void blkg_destroy(struct blkio_group
*blkg
)
572 struct request_queue
*q
= blkg
->q
;
573 struct blkio_cgroup
*blkcg
= blkg
->blkcg
;
575 lockdep_assert_held(q
->queue_lock
);
576 lockdep_assert_held(&blkcg
->lock
);
578 /* Something wrong if we are trying to remove same group twice */
579 WARN_ON_ONCE(list_empty(&blkg
->q_node
));
580 WARN_ON_ONCE(hlist_unhashed(&blkg
->blkcg_node
));
581 list_del_init(&blkg
->q_node
);
582 hlist_del_init_rcu(&blkg
->blkcg_node
);
584 spin_lock(&alloc_list_lock
);
585 list_del_init(&blkg
->alloc_node
);
586 spin_unlock(&alloc_list_lock
);
589 * Put the reference taken at the time of creation so that when all
590 * queues are gone, group can be destroyed.
596 * XXX: This updates blkg policy data in-place for root blkg, which is
597 * necessary across elevator switch and policy registration as root blkgs
598 * aren't shot down. This broken and racy implementation is temporary.
599 * Eventually, blkg shoot down will be replaced by proper in-place update.
601 void update_root_blkg_pd(struct request_queue
*q
, enum blkio_policy_id plid
)
603 struct blkio_policy_type
*pol
= blkio_policy
[plid
];
604 struct blkio_group
*blkg
= blkg_lookup(&blkio_root_cgroup
, q
);
605 struct blkg_policy_data
*pd
;
610 kfree(blkg
->pd
[plid
]);
611 blkg
->pd
[plid
] = NULL
;
616 pd
= kzalloc(sizeof(*pd
) + pol
->pdata_size
, GFP_KERNEL
);
619 pd
->stats_cpu
= alloc_percpu(struct blkio_group_stats_cpu
);
620 WARN_ON_ONCE(!pd
->stats_cpu
);
624 pol
->ops
.blkio_init_group_fn(blkg
);
626 EXPORT_SYMBOL_GPL(update_root_blkg_pd
);
629 * blkg_destroy_all - destroy all blkgs associated with a request_queue
630 * @q: request_queue of interest
631 * @destroy_root: whether to destroy root blkg or not
633 * Destroy blkgs associated with @q. If @destroy_root is %true, all are
634 * destroyed; otherwise, root blkg is left alone.
636 void blkg_destroy_all(struct request_queue
*q
, bool destroy_root
)
638 struct blkio_group
*blkg
, *n
;
640 spin_lock_irq(q
->queue_lock
);
642 list_for_each_entry_safe(blkg
, n
, &q
->blkg_list
, q_node
) {
643 struct blkio_cgroup
*blkcg
= blkg
->blkcg
;
646 if (!destroy_root
&& blkg
->blkcg
== &blkio_root_cgroup
)
649 spin_lock(&blkcg
->lock
);
651 spin_unlock(&blkcg
->lock
);
654 spin_unlock_irq(q
->queue_lock
);
656 EXPORT_SYMBOL_GPL(blkg_destroy_all
);
658 static void blkg_rcu_free(struct rcu_head
*rcu_head
)
660 blkg_free(container_of(rcu_head
, struct blkio_group
, rcu_head
));
663 void __blkg_release(struct blkio_group
*blkg
)
665 /* release the extra blkcg reference this blkg has been holding */
666 css_put(&blkg
->blkcg
->css
);
669 * A group is freed in rcu manner. But having an rcu lock does not
670 * mean that one can access all the fields of blkg and assume these
671 * are valid. For example, don't try to follow throtl_data and
672 * request queue links.
674 * Having a reference to blkg under an rcu allows acess to only
675 * values local to groups like group stats and group rate limits
677 call_rcu(&blkg
->rcu_head
, blkg_rcu_free
);
679 EXPORT_SYMBOL_GPL(__blkg_release
);
681 static void blkio_reset_stats_cpu(struct blkio_group
*blkg
, int plid
)
683 struct blkg_policy_data
*pd
= blkg
->pd
[plid
];
686 if (pd
->stats_cpu
== NULL
)
689 for_each_possible_cpu(cpu
) {
690 struct blkio_group_stats_cpu
*sc
=
691 per_cpu_ptr(pd
->stats_cpu
, cpu
);
693 blkg_rwstat_reset(&sc
->service_bytes
);
694 blkg_rwstat_reset(&sc
->serviced
);
695 blkg_stat_reset(&sc
->sectors
);
700 blkiocg_reset_stats(struct cgroup
*cgroup
, struct cftype
*cftype
, u64 val
)
702 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgroup
);
703 struct blkio_group
*blkg
;
704 struct hlist_node
*n
;
706 spin_lock(&blkio_list_lock
);
707 spin_lock_irq(&blkcg
->lock
);
710 * Note that stat reset is racy - it doesn't synchronize against
711 * stat updates. This is a debug feature which shouldn't exist
712 * anyway. If you get hit by a race, retry.
714 hlist_for_each_entry(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
) {
715 struct blkio_policy_type
*pol
;
717 list_for_each_entry(pol
, &blkio_list
, list
) {
718 struct blkg_policy_data
*pd
= blkg
->pd
[pol
->plid
];
719 struct blkio_group_stats
*stats
= &pd
->stats
;
721 /* queued stats shouldn't be cleared */
722 blkg_rwstat_reset(&stats
->merged
);
723 blkg_rwstat_reset(&stats
->service_time
);
724 blkg_rwstat_reset(&stats
->wait_time
);
725 blkg_stat_reset(&stats
->time
);
726 #ifdef CONFIG_DEBUG_BLK_CGROUP
727 blkg_stat_reset(&stats
->unaccounted_time
);
728 blkg_stat_reset(&stats
->avg_queue_size_sum
);
729 blkg_stat_reset(&stats
->avg_queue_size_samples
);
730 blkg_stat_reset(&stats
->dequeue
);
731 blkg_stat_reset(&stats
->group_wait_time
);
732 blkg_stat_reset(&stats
->idle_time
);
733 blkg_stat_reset(&stats
->empty_time
);
735 blkio_reset_stats_cpu(blkg
, pol
->plid
);
739 spin_unlock_irq(&blkcg
->lock
);
740 spin_unlock(&blkio_list_lock
);
744 static const char *blkg_dev_name(struct blkio_group
*blkg
)
746 /* some drivers (floppy) instantiate a queue w/o disk registered */
747 if (blkg
->q
->backing_dev_info
.dev
)
748 return dev_name(blkg
->q
->backing_dev_info
.dev
);
753 * blkcg_print_blkgs - helper for printing per-blkg data
754 * @sf: seq_file to print to
755 * @blkcg: blkcg of interest
756 * @prfill: fill function to print out a blkg
757 * @pol: policy in question
758 * @data: data to be passed to @prfill
759 * @show_total: to print out sum of prfill return values or not
761 * This function invokes @prfill on each blkg of @blkcg if pd for the
762 * policy specified by @pol exists. @prfill is invoked with @sf, the
763 * policy data and @data. If @show_total is %true, the sum of the return
764 * values from @prfill is printed with "Total" label at the end.
766 * This is to be used to construct print functions for
767 * cftype->read_seq_string method.
769 void blkcg_print_blkgs(struct seq_file
*sf
, struct blkio_cgroup
*blkcg
,
770 u64 (*prfill
)(struct seq_file
*, struct blkg_policy_data
*, int),
771 int pol
, int data
, bool show_total
)
773 struct blkio_group
*blkg
;
774 struct hlist_node
*n
;
777 spin_lock_irq(&blkcg
->lock
);
778 hlist_for_each_entry(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
)
780 total
+= prfill(sf
, blkg
->pd
[pol
], data
);
781 spin_unlock_irq(&blkcg
->lock
);
784 seq_printf(sf
, "Total %llu\n", (unsigned long long)total
);
786 EXPORT_SYMBOL_GPL(blkcg_print_blkgs
);
789 * __blkg_prfill_u64 - prfill helper for a single u64 value
790 * @sf: seq_file to print to
791 * @pd: policy data of interest
794 * Print @v to @sf for the device assocaited with @pd.
796 u64
__blkg_prfill_u64(struct seq_file
*sf
, struct blkg_policy_data
*pd
, u64 v
)
798 const char *dname
= blkg_dev_name(pd
->blkg
);
803 seq_printf(sf
, "%s %llu\n", dname
, (unsigned long long)v
);
806 EXPORT_SYMBOL_GPL(__blkg_prfill_u64
);
809 * __blkg_prfill_rwstat - prfill helper for a blkg_rwstat
810 * @sf: seq_file to print to
811 * @pd: policy data of interest
812 * @rwstat: rwstat to print
814 * Print @rwstat to @sf for the device assocaited with @pd.
816 u64
__blkg_prfill_rwstat(struct seq_file
*sf
, struct blkg_policy_data
*pd
,
817 const struct blkg_rwstat
*rwstat
)
819 static const char *rwstr
[] = {
820 [BLKG_RWSTAT_READ
] = "Read",
821 [BLKG_RWSTAT_WRITE
] = "Write",
822 [BLKG_RWSTAT_SYNC
] = "Sync",
823 [BLKG_RWSTAT_ASYNC
] = "Async",
825 const char *dname
= blkg_dev_name(pd
->blkg
);
832 for (i
= 0; i
< BLKG_RWSTAT_NR
; i
++)
833 seq_printf(sf
, "%s %s %llu\n", dname
, rwstr
[i
],
834 (unsigned long long)rwstat
->cnt
[i
]);
836 v
= rwstat
->cnt
[BLKG_RWSTAT_READ
] + rwstat
->cnt
[BLKG_RWSTAT_WRITE
];
837 seq_printf(sf
, "%s Total %llu\n", dname
, (unsigned long long)v
);
841 static u64
blkg_prfill_stat(struct seq_file
*sf
, struct blkg_policy_data
*pd
,
844 return __blkg_prfill_u64(sf
, pd
,
845 blkg_stat_read((void *)&pd
->stats
+ off
));
848 static u64
blkg_prfill_rwstat(struct seq_file
*sf
, struct blkg_policy_data
*pd
,
851 struct blkg_rwstat rwstat
= blkg_rwstat_read((void *)&pd
->stats
+ off
);
853 return __blkg_prfill_rwstat(sf
, pd
, &rwstat
);
856 /* print blkg_stat specified by BLKCG_STAT_PRIV() */
857 int blkcg_print_stat(struct cgroup
*cgrp
, struct cftype
*cft
,
860 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgrp
);
862 blkcg_print_blkgs(sf
, blkcg
, blkg_prfill_stat
,
863 BLKCG_STAT_POL(cft
->private),
864 BLKCG_STAT_OFF(cft
->private), false);
867 EXPORT_SYMBOL_GPL(blkcg_print_stat
);
869 /* print blkg_rwstat specified by BLKCG_STAT_PRIV() */
870 int blkcg_print_rwstat(struct cgroup
*cgrp
, struct cftype
*cft
,
873 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgrp
);
875 blkcg_print_blkgs(sf
, blkcg
, blkg_prfill_rwstat
,
876 BLKCG_STAT_POL(cft
->private),
877 BLKCG_STAT_OFF(cft
->private), true);
880 EXPORT_SYMBOL_GPL(blkcg_print_rwstat
);
882 static u64
blkg_prfill_cpu_stat(struct seq_file
*sf
,
883 struct blkg_policy_data
*pd
, int off
)
888 for_each_possible_cpu(cpu
) {
889 struct blkio_group_stats_cpu
*sc
=
890 per_cpu_ptr(pd
->stats_cpu
, cpu
);
892 v
+= blkg_stat_read((void *)sc
+ off
);
895 return __blkg_prfill_u64(sf
, pd
, v
);
898 static u64
blkg_prfill_cpu_rwstat(struct seq_file
*sf
,
899 struct blkg_policy_data
*pd
, int off
)
901 struct blkg_rwstat rwstat
= { }, tmp
;
904 for_each_possible_cpu(cpu
) {
905 struct blkio_group_stats_cpu
*sc
=
906 per_cpu_ptr(pd
->stats_cpu
, cpu
);
908 tmp
= blkg_rwstat_read((void *)sc
+ off
);
909 for (i
= 0; i
< BLKG_RWSTAT_NR
; i
++)
910 rwstat
.cnt
[i
] += tmp
.cnt
[i
];
913 return __blkg_prfill_rwstat(sf
, pd
, &rwstat
);
916 /* print per-cpu blkg_stat specified by BLKCG_STAT_PRIV() */
917 int blkcg_print_cpu_stat(struct cgroup
*cgrp
, struct cftype
*cft
,
920 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgrp
);
922 blkcg_print_blkgs(sf
, blkcg
, blkg_prfill_cpu_stat
,
923 BLKCG_STAT_POL(cft
->private),
924 BLKCG_STAT_OFF(cft
->private), false);
927 EXPORT_SYMBOL_GPL(blkcg_print_cpu_stat
);
929 /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */
930 int blkcg_print_cpu_rwstat(struct cgroup
*cgrp
, struct cftype
*cft
,
933 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgrp
);
935 blkcg_print_blkgs(sf
, blkcg
, blkg_prfill_cpu_rwstat
,
936 BLKCG_STAT_POL(cft
->private),
937 BLKCG_STAT_OFF(cft
->private), true);
940 EXPORT_SYMBOL_GPL(blkcg_print_cpu_rwstat
);
942 #ifdef CONFIG_DEBUG_BLK_CGROUP
943 static u64
blkg_prfill_avg_queue_size(struct seq_file
*sf
,
944 struct blkg_policy_data
*pd
, int off
)
946 u64 samples
= blkg_stat_read(&pd
->stats
.avg_queue_size_samples
);
950 v
= blkg_stat_read(&pd
->stats
.avg_queue_size_sum
);
953 __blkg_prfill_u64(sf
, pd
, v
);
957 /* print avg_queue_size */
958 static int blkcg_print_avg_queue_size(struct cgroup
*cgrp
, struct cftype
*cft
,
961 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgrp
);
963 blkcg_print_blkgs(sf
, blkcg
, blkg_prfill_avg_queue_size
,
964 BLKIO_POLICY_PROP
, 0, false);
967 #endif /* CONFIG_DEBUG_BLK_CGROUP */
970 * blkg_conf_prep - parse and prepare for per-blkg config update
971 * @blkcg: target block cgroup
972 * @input: input string
973 * @ctx: blkg_conf_ctx to be filled
975 * Parse per-blkg config update from @input and initialize @ctx with the
976 * result. @ctx->blkg points to the blkg to be updated and @ctx->v the new
977 * value. This function returns with RCU read locked and must be paired
978 * with blkg_conf_finish().
980 int blkg_conf_prep(struct blkio_cgroup
*blkcg
, const char *input
,
981 struct blkg_conf_ctx
*ctx
)
984 struct gendisk
*disk
;
985 struct blkio_group
*blkg
;
986 unsigned int major
, minor
;
987 unsigned long long v
;
990 if (sscanf(input
, "%u:%u %llu", &major
, &minor
, &v
) != 3)
993 disk
= get_gendisk(MKDEV(major
, minor
), &part
);
999 spin_lock_irq(disk
->queue
->queue_lock
);
1000 blkg
= blkg_lookup_create(blkcg
, disk
->queue
, false);
1001 spin_unlock_irq(disk
->queue
->queue_lock
);
1004 ret
= PTR_ERR(blkg
);
1008 * If queue was bypassing, we should retry. Do so after a
1009 * short msleep(). It isn't strictly necessary but queue
1010 * can be bypassing for some time and it's always nice to
1011 * avoid busy looping.
1013 if (ret
== -EBUSY
) {
1015 ret
= restart_syscall();
1025 EXPORT_SYMBOL_GPL(blkg_conf_prep
);
1028 * blkg_conf_finish - finish up per-blkg config update
1029 * @ctx: blkg_conf_ctx intiailized by blkg_conf_prep()
1031 * Finish up after per-blkg config update. This function must be paired
1032 * with blkg_conf_prep().
1034 void blkg_conf_finish(struct blkg_conf_ctx
*ctx
)
1038 put_disk(ctx
->disk
);
1040 EXPORT_SYMBOL_GPL(blkg_conf_finish
);
1042 /* for propio conf */
1043 static u64
blkg_prfill_weight_device(struct seq_file
*sf
,
1044 struct blkg_policy_data
*pd
, int off
)
1046 if (!pd
->conf
.weight
)
1048 return __blkg_prfill_u64(sf
, pd
, pd
->conf
.weight
);
1051 static int blkcg_print_weight_device(struct cgroup
*cgrp
, struct cftype
*cft
,
1052 struct seq_file
*sf
)
1054 blkcg_print_blkgs(sf
, cgroup_to_blkio_cgroup(cgrp
),
1055 blkg_prfill_weight_device
, BLKIO_POLICY_PROP
, 0,
1060 static int blkcg_print_weight(struct cgroup
*cgrp
, struct cftype
*cft
,
1061 struct seq_file
*sf
)
1063 seq_printf(sf
, "%u\n", cgroup_to_blkio_cgroup(cgrp
)->weight
);
1067 static int blkcg_set_weight_device(struct cgroup
*cgrp
, struct cftype
*cft
,
1070 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgrp
);
1071 struct blkg_policy_data
*pd
;
1072 struct blkg_conf_ctx ctx
;
1075 ret
= blkg_conf_prep(blkcg
, buf
, &ctx
);
1080 pd
= ctx
.blkg
->pd
[BLKIO_POLICY_PROP
];
1081 if (pd
&& (!ctx
.v
|| (ctx
.v
>= BLKIO_WEIGHT_MIN
&&
1082 ctx
.v
<= BLKIO_WEIGHT_MAX
))) {
1083 pd
->conf
.weight
= ctx
.v
;
1084 blkio_update_group_weight(ctx
.blkg
, BLKIO_POLICY_PROP
,
1085 ctx
.v
?: blkcg
->weight
);
1089 blkg_conf_finish(&ctx
);
1093 static int blkcg_set_weight(struct cgroup
*cgrp
, struct cftype
*cft
, u64 val
)
1095 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgrp
);
1096 struct blkio_group
*blkg
;
1097 struct hlist_node
*n
;
1099 if (val
< BLKIO_WEIGHT_MIN
|| val
> BLKIO_WEIGHT_MAX
)
1102 spin_lock(&blkio_list_lock
);
1103 spin_lock_irq(&blkcg
->lock
);
1104 blkcg
->weight
= (unsigned int)val
;
1106 hlist_for_each_entry(blkg
, n
, &blkcg
->blkg_list
, blkcg_node
) {
1107 struct blkg_policy_data
*pd
= blkg
->pd
[BLKIO_POLICY_PROP
];
1109 if (pd
&& !pd
->conf
.weight
)
1110 blkio_update_group_weight(blkg
, BLKIO_POLICY_PROP
,
1114 spin_unlock_irq(&blkcg
->lock
);
1115 spin_unlock(&blkio_list_lock
);
1119 /* for blk-throttle conf */
1120 #ifdef CONFIG_BLK_DEV_THROTTLING
1121 static u64
blkg_prfill_conf_u64(struct seq_file
*sf
,
1122 struct blkg_policy_data
*pd
, int off
)
1124 u64 v
= *(u64
*)((void *)&pd
->conf
+ off
);
1128 return __blkg_prfill_u64(sf
, pd
, v
);
1131 static int blkcg_print_conf_u64(struct cgroup
*cgrp
, struct cftype
*cft
,
1132 struct seq_file
*sf
)
1134 blkcg_print_blkgs(sf
, cgroup_to_blkio_cgroup(cgrp
),
1135 blkg_prfill_conf_u64
, BLKIO_POLICY_THROTL
,
1136 cft
->private, false);
1140 static int blkcg_set_conf_u64(struct cgroup
*cgrp
, struct cftype
*cft
,
1141 const char *buf
, int rw
,
1142 void (*update
)(struct blkio_group
*, int, u64
, int))
1144 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgrp
);
1145 struct blkg_policy_data
*pd
;
1146 struct blkg_conf_ctx ctx
;
1149 ret
= blkg_conf_prep(blkcg
, buf
, &ctx
);
1154 pd
= ctx
.blkg
->pd
[BLKIO_POLICY_THROTL
];
1156 *(u64
*)((void *)&pd
->conf
+ cft
->private) = ctx
.v
;
1157 update(ctx
.blkg
, BLKIO_POLICY_THROTL
, ctx
.v
?: -1, rw
);
1161 blkg_conf_finish(&ctx
);
1165 static int blkcg_set_conf_bps_r(struct cgroup
*cgrp
, struct cftype
*cft
,
1168 return blkcg_set_conf_u64(cgrp
, cft
, buf
, READ
, blkio_update_group_bps
);
1171 static int blkcg_set_conf_bps_w(struct cgroup
*cgrp
, struct cftype
*cft
,
1174 return blkcg_set_conf_u64(cgrp
, cft
, buf
, WRITE
, blkio_update_group_bps
);
1177 static int blkcg_set_conf_iops_r(struct cgroup
*cgrp
, struct cftype
*cft
,
1180 return blkcg_set_conf_u64(cgrp
, cft
, buf
, READ
, blkio_update_group_iops
);
1183 static int blkcg_set_conf_iops_w(struct cgroup
*cgrp
, struct cftype
*cft
,
1186 return blkcg_set_conf_u64(cgrp
, cft
, buf
, WRITE
, blkio_update_group_iops
);
1190 struct cftype blkio_files
[] = {
1192 .name
= "weight_device",
1193 .read_seq_string
= blkcg_print_weight_device
,
1194 .write_string
= blkcg_set_weight_device
,
1195 .max_write_len
= 256,
1199 .read_seq_string
= blkcg_print_weight
,
1200 .write_u64
= blkcg_set_weight
,
1204 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1205 offsetof(struct blkio_group_stats
, time
)),
1206 .read_seq_string
= blkcg_print_stat
,
1210 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1211 offsetof(struct blkio_group_stats_cpu
, sectors
)),
1212 .read_seq_string
= blkcg_print_cpu_stat
,
1215 .name
= "io_service_bytes",
1216 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1217 offsetof(struct blkio_group_stats_cpu
, service_bytes
)),
1218 .read_seq_string
= blkcg_print_cpu_rwstat
,
1221 .name
= "io_serviced",
1222 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1223 offsetof(struct blkio_group_stats_cpu
, serviced
)),
1224 .read_seq_string
= blkcg_print_cpu_rwstat
,
1227 .name
= "io_service_time",
1228 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1229 offsetof(struct blkio_group_stats
, service_time
)),
1230 .read_seq_string
= blkcg_print_rwstat
,
1233 .name
= "io_wait_time",
1234 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1235 offsetof(struct blkio_group_stats
, wait_time
)),
1236 .read_seq_string
= blkcg_print_rwstat
,
1239 .name
= "io_merged",
1240 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1241 offsetof(struct blkio_group_stats
, merged
)),
1242 .read_seq_string
= blkcg_print_rwstat
,
1245 .name
= "io_queued",
1246 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1247 offsetof(struct blkio_group_stats
, queued
)),
1248 .read_seq_string
= blkcg_print_rwstat
,
1251 .name
= "reset_stats",
1252 .write_u64
= blkiocg_reset_stats
,
1254 #ifdef CONFIG_BLK_DEV_THROTTLING
1256 .name
= "throttle.read_bps_device",
1257 .private = offsetof(struct blkio_group_conf
, bps
[READ
]),
1258 .read_seq_string
= blkcg_print_conf_u64
,
1259 .write_string
= blkcg_set_conf_bps_r
,
1260 .max_write_len
= 256,
1264 .name
= "throttle.write_bps_device",
1265 .private = offsetof(struct blkio_group_conf
, bps
[WRITE
]),
1266 .read_seq_string
= blkcg_print_conf_u64
,
1267 .write_string
= blkcg_set_conf_bps_w
,
1268 .max_write_len
= 256,
1272 .name
= "throttle.read_iops_device",
1273 .private = offsetof(struct blkio_group_conf
, iops
[READ
]),
1274 .read_seq_string
= blkcg_print_conf_u64
,
1275 .write_string
= blkcg_set_conf_iops_r
,
1276 .max_write_len
= 256,
1280 .name
= "throttle.write_iops_device",
1281 .private = offsetof(struct blkio_group_conf
, iops
[WRITE
]),
1282 .read_seq_string
= blkcg_print_conf_u64
,
1283 .write_string
= blkcg_set_conf_iops_w
,
1284 .max_write_len
= 256,
1287 .name
= "throttle.io_service_bytes",
1288 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL
,
1289 offsetof(struct blkio_group_stats_cpu
, service_bytes
)),
1290 .read_seq_string
= blkcg_print_cpu_rwstat
,
1293 .name
= "throttle.io_serviced",
1294 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL
,
1295 offsetof(struct blkio_group_stats_cpu
, serviced
)),
1296 .read_seq_string
= blkcg_print_cpu_rwstat
,
1298 #endif /* CONFIG_BLK_DEV_THROTTLING */
1300 #ifdef CONFIG_DEBUG_BLK_CGROUP
1302 .name
= "avg_queue_size",
1303 .read_seq_string
= blkcg_print_avg_queue_size
,
1306 .name
= "group_wait_time",
1307 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1308 offsetof(struct blkio_group_stats
, group_wait_time
)),
1309 .read_seq_string
= blkcg_print_stat
,
1312 .name
= "idle_time",
1313 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1314 offsetof(struct blkio_group_stats
, idle_time
)),
1315 .read_seq_string
= blkcg_print_stat
,
1318 .name
= "empty_time",
1319 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1320 offsetof(struct blkio_group_stats
, empty_time
)),
1321 .read_seq_string
= blkcg_print_stat
,
1325 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1326 offsetof(struct blkio_group_stats
, dequeue
)),
1327 .read_seq_string
= blkcg_print_stat
,
1330 .name
= "unaccounted_time",
1331 .private = BLKCG_STAT_PRIV(BLKIO_POLICY_PROP
,
1332 offsetof(struct blkio_group_stats
, unaccounted_time
)),
1333 .read_seq_string
= blkcg_print_stat
,
1340 * blkiocg_pre_destroy - cgroup pre_destroy callback
1341 * @cgroup: cgroup of interest
1343 * This function is called when @cgroup is about to go away and responsible
1344 * for shooting down all blkgs associated with @cgroup. blkgs should be
1345 * removed while holding both q and blkcg locks. As blkcg lock is nested
1346 * inside q lock, this function performs reverse double lock dancing.
1348 * This is the blkcg counterpart of ioc_release_fn().
1350 static int blkiocg_pre_destroy(struct cgroup
*cgroup
)
1352 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgroup
);
1354 spin_lock_irq(&blkcg
->lock
);
1356 while (!hlist_empty(&blkcg
->blkg_list
)) {
1357 struct blkio_group
*blkg
= hlist_entry(blkcg
->blkg_list
.first
,
1358 struct blkio_group
, blkcg_node
);
1359 struct request_queue
*q
= blkg
->q
;
1361 if (spin_trylock(q
->queue_lock
)) {
1363 spin_unlock(q
->queue_lock
);
1365 spin_unlock_irq(&blkcg
->lock
);
1367 spin_lock_irq(&blkcg
->lock
);
1371 spin_unlock_irq(&blkcg
->lock
);
1375 static void blkiocg_destroy(struct cgroup
*cgroup
)
1377 struct blkio_cgroup
*blkcg
= cgroup_to_blkio_cgroup(cgroup
);
1379 if (blkcg
!= &blkio_root_cgroup
)
1383 static struct cgroup_subsys_state
*blkiocg_create(struct cgroup
*cgroup
)
1385 static atomic64_t id_seq
= ATOMIC64_INIT(0);
1386 struct blkio_cgroup
*blkcg
;
1387 struct cgroup
*parent
= cgroup
->parent
;
1390 blkcg
= &blkio_root_cgroup
;
1394 blkcg
= kzalloc(sizeof(*blkcg
), GFP_KERNEL
);
1396 return ERR_PTR(-ENOMEM
);
1398 blkcg
->weight
= BLKIO_WEIGHT_DEFAULT
;
1399 blkcg
->id
= atomic64_inc_return(&id_seq
); /* root is 0, start from 1 */
1401 spin_lock_init(&blkcg
->lock
);
1402 INIT_HLIST_HEAD(&blkcg
->blkg_list
);
1408 * blkcg_init_queue - initialize blkcg part of request queue
1409 * @q: request_queue to initialize
1411 * Called from blk_alloc_queue_node(). Responsible for initializing blkcg
1412 * part of new request_queue @q.
1415 * 0 on success, -errno on failure.
1417 int blkcg_init_queue(struct request_queue
*q
)
1423 ret
= blk_throtl_init(q
);
1427 mutex_lock(&all_q_mutex
);
1428 INIT_LIST_HEAD(&q
->all_q_node
);
1429 list_add_tail(&q
->all_q_node
, &all_q_list
);
1430 mutex_unlock(&all_q_mutex
);
1436 * blkcg_drain_queue - drain blkcg part of request_queue
1437 * @q: request_queue to drain
1439 * Called from blk_drain_queue(). Responsible for draining blkcg part.
1441 void blkcg_drain_queue(struct request_queue
*q
)
1443 lockdep_assert_held(q
->queue_lock
);
1445 blk_throtl_drain(q
);
1449 * blkcg_exit_queue - exit and release blkcg part of request_queue
1450 * @q: request_queue being released
1452 * Called from blk_release_queue(). Responsible for exiting blkcg part.
1454 void blkcg_exit_queue(struct request_queue
*q
)
1456 mutex_lock(&all_q_mutex
);
1457 list_del_init(&q
->all_q_node
);
1458 mutex_unlock(&all_q_mutex
);
1460 blkg_destroy_all(q
, true);
1466 * We cannot support shared io contexts, as we have no mean to support
1467 * two tasks with the same ioc in two different groups without major rework
1468 * of the main cic data structures. For now we allow a task to change
1469 * its cgroup only if it's the only owner of its ioc.
1471 static int blkiocg_can_attach(struct cgroup
*cgrp
, struct cgroup_taskset
*tset
)
1473 struct task_struct
*task
;
1474 struct io_context
*ioc
;
1477 /* task_lock() is needed to avoid races with exit_io_context() */
1478 cgroup_taskset_for_each(task
, cgrp
, tset
) {
1480 ioc
= task
->io_context
;
1481 if (ioc
&& atomic_read(&ioc
->nr_tasks
) > 1)
1490 static void blkcg_bypass_start(void)
1491 __acquires(&all_q_mutex
)
1493 struct request_queue
*q
;
1495 mutex_lock(&all_q_mutex
);
1497 list_for_each_entry(q
, &all_q_list
, all_q_node
) {
1498 blk_queue_bypass_start(q
);
1499 blkg_destroy_all(q
, false);
1503 static void blkcg_bypass_end(void)
1504 __releases(&all_q_mutex
)
1506 struct request_queue
*q
;
1508 list_for_each_entry(q
, &all_q_list
, all_q_node
)
1509 blk_queue_bypass_end(q
);
1511 mutex_unlock(&all_q_mutex
);
1514 struct cgroup_subsys blkio_subsys
= {
1516 .create
= blkiocg_create
,
1517 .can_attach
= blkiocg_can_attach
,
1518 .pre_destroy
= blkiocg_pre_destroy
,
1519 .destroy
= blkiocg_destroy
,
1520 .subsys_id
= blkio_subsys_id
,
1521 .base_cftypes
= blkio_files
,
1522 .module
= THIS_MODULE
,
1524 EXPORT_SYMBOL_GPL(blkio_subsys
);
1526 void blkio_policy_register(struct blkio_policy_type
*blkiop
)
1528 struct request_queue
*q
;
1530 blkcg_bypass_start();
1531 spin_lock(&blkio_list_lock
);
1533 BUG_ON(blkio_policy
[blkiop
->plid
]);
1534 blkio_policy
[blkiop
->plid
] = blkiop
;
1535 list_add_tail(&blkiop
->list
, &blkio_list
);
1537 spin_unlock(&blkio_list_lock
);
1538 list_for_each_entry(q
, &all_q_list
, all_q_node
)
1539 update_root_blkg_pd(q
, blkiop
->plid
);
1542 EXPORT_SYMBOL_GPL(blkio_policy_register
);
1544 void blkio_policy_unregister(struct blkio_policy_type
*blkiop
)
1546 struct request_queue
*q
;
1548 blkcg_bypass_start();
1549 spin_lock(&blkio_list_lock
);
1551 BUG_ON(blkio_policy
[blkiop
->plid
] != blkiop
);
1552 blkio_policy
[blkiop
->plid
] = NULL
;
1553 list_del_init(&blkiop
->list
);
1555 spin_unlock(&blkio_list_lock
);
1556 list_for_each_entry(q
, &all_q_list
, all_q_node
)
1557 update_root_blkg_pd(q
, blkiop
->plid
);
1560 EXPORT_SYMBOL_GPL(blkio_policy_unregister
);