blkio: Export disk time and sectors used by a group to user space
[deliverable/linux.git] / block / cfq-iosched.c
index 019f28eea9df0998ddff3b7e06b6afcdc5745485..7d345e772d88120b5f4065f1f0a4d717a777e9e9 100644 (file)
@@ -82,6 +82,7 @@ struct cfq_rb_root {
        unsigned count;
        u64 min_vdisktime;
        struct rb_node *active;
+       unsigned total_weight;
 };
 #define CFQ_RB_ROOT    (struct cfq_rb_root) { RB_ROOT, NULL, 0, 0, }
 
@@ -114,6 +115,10 @@ struct cfq_queue {
        /* fifo list of requests in sort_list */
        struct list_head fifo;
 
+       /* time when queue got scheduled in to dispatch first request. */
+       unsigned long dispatch_start;
+       /* time when first request from queue completed and slice started. */
+       unsigned long slice_start;
        unsigned long slice_end;
        long slice_resid;
        unsigned int slice_dispatch;
@@ -138,6 +143,8 @@ struct cfq_queue {
        struct cfq_rb_root *service_tree;
        struct cfq_queue *new_cfqq;
        struct cfq_group *cfqg;
+       /* Sectors dispatched in current dispatch round */
+       unsigned long nr_sectors;
 };
 
 /*
@@ -172,12 +179,23 @@ struct cfq_group {
        /* number of cfqq currently on this group */
        int nr_cfqq;
 
+       /* Per group busy queus average. Useful for workload slice calc. */
+       unsigned int busy_queues_avg[2];
        /*
         * rr lists of queues with requests, onle rr for each priority class.
         * Counts are embedded in the cfq_rb_root
         */
        struct cfq_rb_root service_trees[2][3];
        struct cfq_rb_root service_tree_idle;
+
+       unsigned long saved_workload_slice;
+       enum wl_type_t saved_workload;
+       enum wl_prio_t saved_serving_prio;
+       struct blkio_group blkg;
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+       struct hlist_node cfqd_node;
+       atomic_t ref;
+#endif
 };
 
 /*
@@ -188,6 +206,8 @@ struct cfq_data {
        /* Root service tree for cfq_groups */
        struct cfq_rb_root grp_service_tree;
        struct cfq_group root_group;
+       /* Number of active cfq groups on group service tree */
+       int nr_groups;
 
        /*
         * The priority currently being served
@@ -206,7 +226,6 @@ struct cfq_data {
        struct rb_root prio_trees[CFQ_PRIO_LISTS];
 
        unsigned int busy_queues;
-       unsigned int busy_queues_avg[2];
 
        int rq_in_driver[2];
        int sync_flight;
@@ -262,8 +281,13 @@ struct cfq_data {
        struct cfq_queue oom_cfqq;
 
        unsigned long last_end_sync_rq;
+
+       /* List of cfq groups being managed on this device*/
+       struct hlist_head cfqg_list;
 };
 
+static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd);
+
 static struct cfq_rb_root *service_tree_for(struct cfq_group *cfqg,
                                            enum wl_prio_t prio,
                                            enum wl_type_t type,
@@ -319,8 +343,21 @@ CFQ_CFQQ_FNS(coop);
 CFQ_CFQQ_FNS(deep);
 #undef CFQ_CFQQ_FNS
 
+#ifdef CONFIG_DEBUG_CFQ_IOSCHED
+#define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
+       blk_add_trace_msg((cfqd)->queue, "cfq%d%c %s " fmt, (cfqq)->pid, \
+                       cfq_cfqq_sync((cfqq)) ? 'S' : 'A', \
+                       blkg_path(&(cfqq)->cfqg->blkg), ##args);
+
+#define cfq_log_cfqg(cfqd, cfqg, fmt, args...)                         \
+       blk_add_trace_msg((cfqd)->queue, "%s " fmt,                     \
+                               blkg_path(&(cfqg)->blkg), ##args);      \
+
+#else
 #define cfq_log_cfqq(cfqd, cfqq, fmt, args...) \
        blk_add_trace_msg((cfqd)->queue, "cfq%d " fmt, (cfqq)->pid, ##args)
+#define cfq_log_cfqg(cfqd, cfqg, fmt, args...)         do {} while (0);
+#endif
 #define cfq_log(cfqd, fmt, args...)    \
        blk_add_trace_msg((cfqd)->queue, "cfq " fmt, ##args)
 
@@ -354,10 +391,10 @@ static enum wl_type_t cfqq_type(struct cfq_queue *cfqq)
        return SYNC_WORKLOAD;
 }
 
-static inline int cfq_busy_queues_wl(enum wl_prio_t wl, struct cfq_data *cfqd)
+static inline int cfq_group_busy_queues_wl(enum wl_prio_t wl,
+                                       struct cfq_data *cfqd,
+                                       struct cfq_group *cfqg)
 {
-       struct cfq_group *cfqg = &cfqd->root_group;
-
        if (wl == IDLE_WORKLOAD)
                return cfqg->service_tree_idle.count;
 
@@ -489,18 +526,27 @@ static void update_min_vdisktime(struct cfq_rb_root *st)
  * to quickly follows sudden increases and decrease slowly
  */
 
-static inline unsigned cfq_get_avg_queues(struct cfq_data *cfqd, bool rt)
+static inline unsigned cfq_group_get_avg_queues(struct cfq_data *cfqd,
+                                       struct cfq_group *cfqg, bool rt)
 {
        unsigned min_q, max_q;
        unsigned mult  = cfq_hist_divisor - 1;
        unsigned round = cfq_hist_divisor / 2;
-       unsigned busy = cfq_busy_queues_wl(rt, cfqd);
+       unsigned busy = cfq_group_busy_queues_wl(rt, cfqd, cfqg);
 
-       min_q = min(cfqd->busy_queues_avg[rt], busy);
-       max_q = max(cfqd->busy_queues_avg[rt], busy);
-       cfqd->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
+       min_q = min(cfqg->busy_queues_avg[rt], busy);
+       max_q = max(cfqg->busy_queues_avg[rt], busy);
+       cfqg->busy_queues_avg[rt] = (mult * max_q + min_q + round) /
                cfq_hist_divisor;
-       return cfqd->busy_queues_avg[rt];
+       return cfqg->busy_queues_avg[rt];
+}
+
+static inline unsigned
+cfq_group_slice(struct cfq_data *cfqd, struct cfq_group *cfqg)
+{
+       struct cfq_rb_root *st = &cfqd->grp_service_tree;
+
+       return cfq_target_latency * cfqg->weight / st->total_weight;
 }
 
 static inline void
@@ -508,12 +554,17 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
 {
        unsigned slice = cfq_prio_to_slice(cfqd, cfqq);
        if (cfqd->cfq_latency) {
-               /* interested queues (we consider only the ones with the same
-                * priority class) */
-               unsigned iq = cfq_get_avg_queues(cfqd, cfq_class_rt(cfqq));
+               /*
+                * interested queues (we consider only the ones with the same
+                * priority class in the cfq group)
+                */
+               unsigned iq = cfq_group_get_avg_queues(cfqd, cfqq->cfqg,
+                                               cfq_class_rt(cfqq));
                unsigned sync_slice = cfqd->cfq_slice[1];
                unsigned expect_latency = sync_slice * iq;
-               if (expect_latency > cfq_target_latency) {
+               unsigned group_slice = cfq_group_slice(cfqd, cfqq->cfqg);
+
+               if (expect_latency > group_slice) {
                        unsigned base_low_slice = 2 * cfqd->cfq_slice_idle;
                        /* scale low_slice according to IO priority
                         * and sync vs async */
@@ -521,10 +572,11 @@ cfq_set_prio_slice(struct cfq_data *cfqd, struct cfq_queue *cfqq)
                                min(slice, base_low_slice * slice / sync_slice);
                        /* the adapted slice value is scaled to fit all iqs
                         * into the target latency */
-                       slice = max(slice * cfq_target_latency / expect_latency,
+                       slice = max(slice * group_slice / expect_latency,
                                    low_slice);
                }
        }
+       cfqq->slice_start = jiffies;
        cfqq->slice_end = jiffies + slice;
        cfq_log_cfqq(cfqd, cfqq, "set_slice=%lu", cfqq->slice_end - jiffies);
 }
@@ -776,6 +828,8 @@ cfq_group_service_tree_add(struct cfq_data *cfqd, struct cfq_group *cfqg)
 
        __cfq_group_service_tree_add(st, cfqg);
        cfqg->on_st = true;
+       cfqd->nr_groups++;
+       st->total_weight += cfqg->weight;
 }
 
 static void
@@ -793,11 +847,241 @@ cfq_group_service_tree_del(struct cfq_data *cfqd, struct cfq_group *cfqg)
        if (cfqg->nr_cfqq)
                return;
 
+       cfq_log_cfqg(cfqd, cfqg, "del_from_rr group");
        cfqg->on_st = false;
+       cfqd->nr_groups--;
+       st->total_weight -= cfqg->weight;
        if (!RB_EMPTY_NODE(&cfqg->rb_node))
                cfq_rb_erase(&cfqg->rb_node, st);
+       cfqg->saved_workload_slice = 0;
+       blkiocg_update_blkio_group_dequeue_stats(&cfqg->blkg, 1);
+}
+
+static inline unsigned int cfq_cfqq_slice_usage(struct cfq_queue *cfqq)
+{
+       unsigned int slice_used, allocated_slice;
+
+       /*
+        * Queue got expired before even a single request completed or
+        * got expired immediately after first request completion.
+        */
+       if (!cfqq->slice_start || cfqq->slice_start == jiffies) {
+               /*
+                * Also charge the seek time incurred to the group, otherwise
+                * if there are mutiple queues in the group, each can dispatch
+                * a single request on seeky media and cause lots of seek time
+                * and group will never know it.
+                */
+               slice_used = max_t(unsigned, (jiffies - cfqq->dispatch_start),
+                                       1);
+       } else {
+               slice_used = jiffies - cfqq->slice_start;
+               allocated_slice = cfqq->slice_end - cfqq->slice_start;
+               if (slice_used > allocated_slice)
+                       slice_used = allocated_slice;
+       }
+
+       cfq_log_cfqq(cfqq->cfqd, cfqq, "sl_used=%u sect=%lu", slice_used,
+                               cfqq->nr_sectors);
+       return slice_used;
+}
+
+static void cfq_group_served(struct cfq_data *cfqd, struct cfq_group *cfqg,
+                               struct cfq_queue *cfqq)
+{
+       struct cfq_rb_root *st = &cfqd->grp_service_tree;
+       unsigned int used_sl;
+
+       used_sl = cfq_cfqq_slice_usage(cfqq);
+
+       /* Can't update vdisktime while group is on service tree */
+       cfq_rb_erase(&cfqg->rb_node, st);
+       cfqg->vdisktime += cfq_scale_slice(used_sl, cfqg);
+       __cfq_group_service_tree_add(st, cfqg);
+
+       /* This group is being expired. Save the context */
+       if (time_after(cfqd->workload_expires, jiffies)) {
+               cfqg->saved_workload_slice = cfqd->workload_expires
+                                               - jiffies;
+               cfqg->saved_workload = cfqd->serving_type;
+               cfqg->saved_serving_prio = cfqd->serving_prio;
+       } else
+               cfqg->saved_workload_slice = 0;
+
+       cfq_log_cfqg(cfqd, cfqg, "served: vt=%llu min_vt=%llu", cfqg->vdisktime,
+                                       st->min_vdisktime);
+       blkiocg_update_blkio_group_stats(&cfqg->blkg, used_sl,
+                                               cfqq->nr_sectors);
+}
+
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg)
+{
+       if (blkg)
+               return container_of(blkg, struct cfq_group, blkg);
+       return NULL;
+}
+
+static struct cfq_group *
+cfq_find_alloc_cfqg(struct cfq_data *cfqd, struct cgroup *cgroup, int create)
+{
+       struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgroup);
+       struct cfq_group *cfqg = NULL;
+       void *key = cfqd;
+       int i, j;
+       struct cfq_rb_root *st;
+       struct backing_dev_info *bdi = &cfqd->queue->backing_dev_info;
+       unsigned int major, minor;
+
+       /* Do we need to take this reference */
+       if (!css_tryget(&blkcg->css))
+               return NULL;;
+
+       cfqg = cfqg_of_blkg(blkiocg_lookup_group(blkcg, key));
+       if (cfqg || !create)
+               goto done;
+
+       cfqg = kzalloc_node(sizeof(*cfqg), GFP_ATOMIC, cfqd->queue->node);
+       if (!cfqg)
+               goto done;
+
+       cfqg->weight = blkcg->weight;
+       for_each_cfqg_st(cfqg, i, j, st)
+               *st = CFQ_RB_ROOT;
+       RB_CLEAR_NODE(&cfqg->rb_node);
+
+       /*
+        * Take the initial reference that will be released on destroy
+        * This can be thought of a joint reference by cgroup and
+        * elevator which will be dropped by either elevator exit
+        * or cgroup deletion path depending on who is exiting first.
+        */
+       atomic_set(&cfqg->ref, 1);
+
+       /* Add group onto cgroup list */
+       sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor);
+       blkiocg_add_blkio_group(blkcg, &cfqg->blkg, (void *)cfqd,
+                                       MKDEV(major, minor));
+
+       /* Add group on cfqd list */
+       hlist_add_head(&cfqg->cfqd_node, &cfqd->cfqg_list);
+
+done:
+       css_put(&blkcg->css);
+       return cfqg;
+}
+
+/*
+ * Search for the cfq group current task belongs to. If create = 1, then also
+ * create the cfq group if it does not exist. request_queue lock must be held.
+ */
+static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
+{
+       struct cgroup *cgroup;
+       struct cfq_group *cfqg = NULL;
+
+       rcu_read_lock();
+       cgroup = task_cgroup(current, blkio_subsys_id);
+       cfqg = cfq_find_alloc_cfqg(cfqd, cgroup, create);
+       if (!cfqg && create)
+               cfqg = &cfqd->root_group;
+       rcu_read_unlock();
+       return cfqg;
+}
+
+static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
+{
+       /* Currently, all async queues are mapped to root group */
+       if (!cfq_cfqq_sync(cfqq))
+               cfqg = &cfqq->cfqd->root_group;
+
+       cfqq->cfqg = cfqg;
+       /* cfqq reference on cfqg */
+       atomic_inc(&cfqq->cfqg->ref);
+}
+
+static void cfq_put_cfqg(struct cfq_group *cfqg)
+{
+       struct cfq_rb_root *st;
+       int i, j;
+
+       BUG_ON(atomic_read(&cfqg->ref) <= 0);
+       if (!atomic_dec_and_test(&cfqg->ref))
+               return;
+       for_each_cfqg_st(cfqg, i, j, st)
+               BUG_ON(!RB_EMPTY_ROOT(&st->rb) || st->active != NULL);
+       kfree(cfqg);
+}
+
+static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg)
+{
+       /* Something wrong if we are trying to remove same group twice */
+       BUG_ON(hlist_unhashed(&cfqg->cfqd_node));
+
+       hlist_del_init(&cfqg->cfqd_node);
+
+       /*
+        * Put the reference taken at the time of creation so that when all
+        * queues are gone, group can be destroyed.
+        */
+       cfq_put_cfqg(cfqg);
+}
+
+static void cfq_release_cfq_groups(struct cfq_data *cfqd)
+{
+       struct hlist_node *pos, *n;
+       struct cfq_group *cfqg;
+
+       hlist_for_each_entry_safe(cfqg, pos, n, &cfqd->cfqg_list, cfqd_node) {
+               /*
+                * If cgroup removal path got to blk_group first and removed
+                * it from cgroup list, then it will take care of destroying
+                * cfqg also.
+                */
+               if (!blkiocg_del_blkio_group(&cfqg->blkg))
+                       cfq_destroy_cfqg(cfqd, cfqg);
+       }
 }
 
+/*
+ * Blk cgroup controller notification saying that blkio_group object is being
+ * delinked as associated cgroup object is going away. That also means that
+ * no new IO will come in this group. So get rid of this group as soon as
+ * any pending IO in the group is finished.
+ *
+ * This function is called under rcu_read_lock(). key is the rcu protected
+ * pointer. That means "key" is a valid cfq_data pointer as long as we are rcu
+ * read lock.
+ *
+ * "key" was fetched from blkio_group under blkio_cgroup->lock. That means
+ * it should not be NULL as even if elevator was exiting, cgroup deltion
+ * path got to it first.
+ */
+void cfq_unlink_blkio_group(void *key, struct blkio_group *blkg)
+{
+       unsigned long  flags;
+       struct cfq_data *cfqd = key;
+
+       spin_lock_irqsave(cfqd->queue->queue_lock, flags);
+       cfq_destroy_cfqg(cfqd, cfqg_of_blkg(blkg));
+       spin_unlock_irqrestore(cfqd->queue->queue_lock, flags);
+}
+
+#else /* GROUP_IOSCHED */
+static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
+{
+       return &cfqd->root_group;
+}
+static inline void
+cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg) {
+       cfqq->cfqg = cfqg;
+}
+
+static void cfq_release_cfq_groups(struct cfq_data *cfqd) {}
+static inline void cfq_put_cfqg(struct cfq_group *cfqg) {}
+
+#endif /* GROUP_IOSCHED */
+
 /*
  * The cfqd->service_trees holds all pending cfq_queue's that have
  * requests waiting to be processed. It is sorted in the order that
@@ -811,6 +1095,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        unsigned long rb_key;
        struct cfq_rb_root *service_tree;
        int left;
+       int new_cfqq = 1;
 
        service_tree = service_tree_for(cfqq->cfqg, cfqq_prio(cfqq),
                                                cfqq_type(cfqq), cfqd);
@@ -839,6 +1124,7 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        }
 
        if (!RB_EMPTY_NODE(&cfqq->rb_node)) {
+               new_cfqq = 0;
                /*
                 * same position, nothing more to do
                 */
@@ -880,6 +1166,8 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        rb_link_node(&cfqq->rb_node, parent, p);
        rb_insert_color(&cfqq->rb_node, &service_tree->rb);
        service_tree->count++;
+       if (add_front || !new_cfqq)
+               return;
        cfq_group_service_tree_add(cfqd, cfqq->cfqg);
 }
 
@@ -1196,8 +1484,11 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd,
 {
        if (cfqq) {
                cfq_log_cfqq(cfqd, cfqq, "set_active");
+               cfqq->slice_start = 0;
+               cfqq->dispatch_start = jiffies;
                cfqq->slice_end = 0;
                cfqq->slice_dispatch = 0;
+               cfqq->nr_sectors = 0;
 
                cfq_clear_cfqq_wait_request(cfqq);
                cfq_clear_cfqq_must_dispatch(cfqq);
@@ -1233,6 +1524,8 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
                cfq_log_cfqq(cfqd, cfqq, "resid=%ld", cfqq->slice_resid);
        }
 
+       cfq_group_served(cfqd, cfqq->cfqg, cfqq);
+
        if (cfq_cfqq_on_rr(cfqq) && RB_EMPTY_ROOT(&cfqq->sort_list))
                cfq_del_cfqq_rr(cfqd, cfqq);
 
@@ -1241,6 +1534,9 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        if (cfqq == cfqd->active_queue)
                cfqd->active_queue = NULL;
 
+       if (&cfqq->cfqg->rb_node == cfqd->grp_service_tree.active)
+               cfqd->grp_service_tree.active = NULL;
+
        if (cfqd->active_cic) {
                put_io_context(cfqd->active_cic->ioc);
                cfqd->active_cic = NULL;
@@ -1278,7 +1574,7 @@ static struct cfq_queue *cfq_get_next_queue(struct cfq_data *cfqd)
 
 static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
 {
-       struct cfq_group *cfqg = &cfqd->root_group;
+       struct cfq_group *cfqg;
        struct cfq_queue *cfqq;
        int i, j;
        struct cfq_rb_root *st;
@@ -1286,6 +1582,10 @@ static struct cfq_queue *cfq_get_next_queue_forced(struct cfq_data *cfqd)
        if (!cfqd->rq_queued)
                return NULL;
 
+       cfqg = cfq_get_next_cfqg(cfqd);
+       if (!cfqg)
+               return NULL;
+
        for_each_cfqg_st(cfqg, i, j, st)
                if ((cfqq = cfq_rb_first(st)) != NULL)
                        return cfqq;
@@ -1512,6 +1812,7 @@ static void cfq_dispatch_insert(struct request_queue *q, struct request *rq)
 
        if (cfq_cfqq_sync(cfqq))
                cfqd->sync_flight++;
+       cfqq->nr_sectors += blk_rq_sectors(rq);
 }
 
 /*
@@ -1639,6 +1940,7 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
        unsigned slice;
        unsigned count;
        struct cfq_rb_root *st;
+       unsigned group_slice;
 
        if (!cfqg) {
                cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1647,9 +1949,9 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
        }
 
        /* Choose next priority. RT > BE > IDLE */
-       if (cfq_busy_queues_wl(RT_WORKLOAD, cfqd))
+       if (cfq_group_busy_queues_wl(RT_WORKLOAD, cfqd, cfqg))
                cfqd->serving_prio = RT_WORKLOAD;
-       else if (cfq_busy_queues_wl(BE_WORKLOAD, cfqd))
+       else if (cfq_group_busy_queues_wl(BE_WORKLOAD, cfqd, cfqg))
                cfqd->serving_prio = BE_WORKLOAD;
        else {
                cfqd->serving_prio = IDLE_WORKLOAD;
@@ -1687,9 +1989,11 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg)
         * proportional to the number of queues in that workload, over
         * all the queues in the same priority class
         */
-       slice = cfq_target_latency * count /
-               max_t(unsigned, cfqd->busy_queues_avg[cfqd->serving_prio],
-                     cfq_busy_queues_wl(cfqd->serving_prio, cfqd));
+       group_slice = cfq_group_slice(cfqd, cfqg);
+
+       slice = group_slice * count /
+               max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio],
+                     cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg));
 
        if (cfqd->serving_type == ASYNC_WORKLOAD)
                /* async workload slice is scaled down according to
@@ -1722,6 +2026,13 @@ static void cfq_choose_cfqg(struct cfq_data *cfqd)
        struct cfq_group *cfqg = cfq_get_next_cfqg(cfqd);
 
        cfqd->serving_group = cfqg;
+
+       /* Restore the workload type data */
+       if (cfqg->saved_workload_slice) {
+               cfqd->workload_expires = jiffies + cfqg->saved_workload_slice;
+               cfqd->serving_type = cfqg->saved_workload;
+               cfqd->serving_prio = cfqg->saved_serving_prio;
+       }
        choose_service_tree(cfqd, cfqg);
 }
 
@@ -1972,11 +2283,13 @@ static int cfq_dispatch_requests(struct request_queue *q, int force)
  * task holds one reference to the queue, dropped when task exits. each rq
  * in-flight on this queue also holds a reference, dropped when rq is freed.
  *
+ * Each cfq queue took a reference on the parent group. Drop it now.
  * queue lock must be held here.
  */
 static void cfq_put_queue(struct cfq_queue *cfqq)
 {
        struct cfq_data *cfqd = cfqq->cfqd;
+       struct cfq_group *cfqg;
 
        BUG_ON(atomic_read(&cfqq->ref) <= 0);
 
@@ -1986,6 +2299,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
        cfq_log_cfqq(cfqd, cfqq, "put_queue");
        BUG_ON(rb_first(&cfqq->sort_list));
        BUG_ON(cfqq->allocated[READ] + cfqq->allocated[WRITE]);
+       cfqg = cfqq->cfqg;
 
        if (unlikely(cfqd->active_queue == cfqq)) {
                __cfq_slice_expired(cfqd, cfqq, 0);
@@ -1994,6 +2308,7 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
 
        BUG_ON(cfq_cfqq_on_rr(cfqq));
        kmem_cache_free(cfq_pool, cfqq);
+       cfq_put_cfqg(cfqg);
 }
 
 /*
@@ -2286,16 +2601,6 @@ static void cfq_init_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq,
        cfqq->pid = pid;
 }
 
-static void cfq_link_cfqq_cfqg(struct cfq_queue *cfqq, struct cfq_group *cfqg)
-{
-       cfqq->cfqg = cfqg;
-}
-
-static struct cfq_group *cfq_get_cfqg(struct cfq_data *cfqd, int create)
-{
-       return &cfqd->root_group;
-}
-
 static struct cfq_queue *
 cfq_find_alloc_queue(struct cfq_data *cfqd, bool is_sync,
                     struct io_context *ioc, gfp_t gfp_mask)
@@ -2826,7 +3131,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq)
        unsigned long now;
 
        now = jiffies;
-       cfq_log_cfqq(cfqd, cfqq, "complete");
+       cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", !!rq_noidle(rq));
 
        cfq_update_hw_tag(cfqd);
 
@@ -3179,11 +3484,15 @@ static void cfq_exit_queue(struct elevator_queue *e)
        }
 
        cfq_put_async_queues(cfqd);
+       cfq_release_cfq_groups(cfqd);
+       blkiocg_del_blkio_group(&cfqd->root_group.blkg);
 
        spin_unlock_irq(q->queue_lock);
 
        cfq_shutdown_timer_wq(cfqd);
 
+       /* Wait for cfqg->blkg->key accessors to exit their grace periods. */
+       synchronize_rcu();
        kfree(cfqd);
 }
 
@@ -3210,6 +3519,15 @@ static void *cfq_init_queue(struct request_queue *q)
        /* Give preference to root group over other groups */
        cfqg->weight = 2*BLKIO_WEIGHT_DEFAULT;
 
+#ifdef CONFIG_CFQ_GROUP_IOSCHED
+       /*
+        * Take a reference to root group which we never drop. This is just
+        * to make sure that cfq_put_cfqg() does not try to kfree root group
+        */
+       atomic_set(&cfqg->ref, 1);
+       blkiocg_add_blkio_group(&blkio_root_cgroup, &cfqg->blkg, (void *)cfqd,
+                                       0);
+#endif
        /*
         * Not strictly needed (since RB_ROOT just clears the node and we
         * zeroed cfqd on alloc), but better be safe in case someone decides
This page took 0.034644 seconds and 5 git commands to generate.