From: Linus Torvalds Date: Wed, 4 Sep 2013 01:25:03 +0000 (-0700) Subject: Merge branch 'for-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup X-Git-Url: http://drtracing.org/?a=commitdiff_plain;h=32dad03d164206ea886885d0740284ba215b0970;p=deliverable%2Flinux.git Merge branch 'for-3.12' of git://git./linux/kernel/git/tj/cgroup Pull cgroup updates from Tejun Heo: "A lot of activities on the cgroup front. Most changes aren't visible to userland at all at this point and are laying foundation for the planned unified hierarchy. - The biggest change is decoupling the lifetime management of css (cgroup_subsys_state) from that of cgroup's. Because controllers (cpu, memory, block and so on) will need to be dynamically enabled and disabled, css which is the association point between a cgroup and a controller may come and go dynamically across the lifetime of a cgroup. Till now, css's were created when the associated cgroup was created and stayed till the cgroup got destroyed. Assumptions around this tight coupling permeated through cgroup core and controllers. These assumptions are gradually removed, which consists bulk of patches, and css destruction path is completely decoupled from cgroup destruction path. Note that decoupling of creation path is relatively easy on top of these changes and the patchset is pending for the next window. - cgroup has its own event mechanism cgroup.event_control, which is only used by memcg. It is overly complex trying to achieve high flexibility whose benefits seem dubious at best. Going forward, new events will simply generate file modified event and the existing mechanism is being made specific to memcg. This pull request contains prepatory patches for such change. - Various fixes and cleanups" Fixed up conflict in kernel/cgroup.c as per Tejun. * 'for-3.12' of git://git.kernel.org/pub/scm/linux/kernel/git/tj/cgroup: (69 commits) cgroup: fix cgroup_css() invocation in css_from_id() cgroup: make cgroup_write_event_control() use css_from_dir() instead of __d_cgrp() cgroup: make cgroup_event hold onto cgroup_subsys_state instead of cgroup cgroup: implement CFTYPE_NO_PREFIX cgroup: make cgroup_css() take cgroup_subsys * instead and allow NULL subsys cgroup: rename cgroup_css_from_dir() to css_from_dir() and update its syntax cgroup: fix cgroup_write_event_control() cgroup: fix subsystem file accesses on the root cgroup cgroup: change cgroup_from_id() to css_from_id() cgroup: use css_get() in cgroup_create() to check CSS_ROOT cpuset: remove an unncessary forward declaration cgroup: RCU protect each cgroup_subsys_state release cgroup: move subsys file removal to kill_css() cgroup: factor out kill_css() cgroup: decouple cgroup_subsys_state destruction from cgroup destruction cgroup: replace cgroup->css_kill_cnt with ->nr_css cgroup: bounce cgroup_subsys_state ref kill confirmation to a work item cgroup: move cgroup->subsys[] assignment to online_css() cgroup: reorganize css init / exit paths cgroup: add __rcu modifier to cgroup->subsys[] ... --- 32dad03d164206ea886885d0740284ba215b0970 diff --cc kernel/cgroup.c index e91963302c0d,b5f4989937f2..e0aeb32415ff --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@@ -802,9 -830,9 +830,8 @@@ static struct cgroup *task_cgroup_from_ */ static int cgroup_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode); -static struct dentry *cgroup_lookup(struct inode *, struct dentry *, unsigned int); static int cgroup_rmdir(struct inode *unused_dir, struct dentry *dentry); - static int cgroup_populate_dir(struct cgroup *cgrp, bool base_files, - unsigned long subsys_mask); + static int cgroup_populate_dir(struct cgroup *cgrp, unsigned long subsys_mask); static const struct inode_operations cgroup_dir_inode_operations; static const struct file_operations proc_cgroupstats_operations; @@@ -4496,51 -4693,13 +4685,29 @@@ static int cgroup_destroy_locked(struc if (!empty) return -EBUSY; + /* + * Make sure there's no live children. We can't test ->children + * emptiness as dead children linger on it while being destroyed; + * otherwise, "rmdir parent/child parent" may fail with -EBUSY. + */ + empty = true; + rcu_read_lock(); + list_for_each_entry_rcu(child, &cgrp->children, sibling) { + empty = cgroup_is_dead(child); + if (!empty) + break; + } + rcu_read_unlock(); + if (!empty) + return -EBUSY; + /* - * Block new css_tryget() by killing css refcnts. cgroup core - * guarantees that, by the time ->css_offline() is invoked, no new - * css reference will be given out via css_tryget(). We can't - * simply call percpu_ref_kill() and proceed to offlining css's - * because percpu_ref_kill() doesn't guarantee that the ref is seen - * as killed on all CPUs on return. - * - * Use percpu_ref_kill_and_confirm() to get notifications as each - * css is confirmed to be seen as killed on all CPUs. The - * notification callback keeps track of the number of css's to be - * killed and schedules cgroup_offline_fn() to perform the rest of - * destruction once the percpu refs of all css's are confirmed to - * be killed. + * Initiate massacre of all css's. cgroup_destroy_css_killed() + * will be invoked to perform the rest of destruction once the + * percpu refs of all css's are confirmed to be killed. */ - atomic_set(&cgrp->css_kill_cnt, 1); - for_each_root_subsys(cgrp->root, ss) { - struct cgroup_subsys_state *css = cgrp->subsys[ss->subsys_id]; - - /* - * Killing would put the base ref, but we need to keep it - * alive until after ->css_offline. - */ - percpu_ref_get(&css->refcnt); - - atomic_inc(&cgrp->css_kill_cnt); - percpu_ref_kill_and_confirm(&css->refcnt, css_ref_killed_fn); - } - cgroup_css_killed(cgrp); + for_each_root_subsys(cgrp->root, ss) + kill_css(cgroup_css(cgrp, ss)); /* * Mark @cgrp dead. This prevents further task migration and child diff --cc kernel/cpuset.c index ea1966db34f2,95f4b25e1538..6bf981e13c43 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@@ -1608,17 -1591,16 +1595,18 @@@ typedef enum FILE_SPREAD_SLAB, } cpuset_filetype_t; - static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) + static int cpuset_write_u64(struct cgroup_subsys_state *css, struct cftype *cft, + u64 val) { - struct cpuset *cs = cgroup_cs(cgrp); + struct cpuset *cs = css_cs(css); cpuset_filetype_t type = cft->private; - int retval = -ENODEV; + int retval = 0; mutex_lock(&cpuset_mutex); - if (!is_cpuset_online(cs)) + if (!is_cpuset_online(cs)) { + retval = -ENODEV; goto out_unlock; + } switch (type) { case FILE_CPU_EXCLUSIVE: diff --cc mm/memcontrol.c index 0878ff7c26a9,b89d4cbc0c08..3b83957b6439 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@@ -6335,12 -6304,11 +6304,12 @@@ static void mem_cgroup_css_offline(stru mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); mem_cgroup_destroy_all_caches(memcg); + vmpressure_cleanup(&memcg->vmpressure); } - static void mem_cgroup_css_free(struct cgroup *cont) + static void mem_cgroup_css_free(struct cgroup_subsys_state *css) { - struct mem_cgroup *memcg = mem_cgroup_from_cont(cont); + struct mem_cgroup *memcg = mem_cgroup_from_css(css); memcg_destroy_kmem(memcg); __mem_cgroup_free(memcg);