#define MEM_CGROUP_RECLAIM_SHRINK_BIT 0x1
#define MEM_CGROUP_RECLAIM_SHRINK (1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
+/*
+ * The memcg_create_mutex will be held whenever a new cgroup is created.
+ * As a consequence, any change that needs to protect against new child cgroups
+ * appearing has to hold it as well.
+ */
+static DEFINE_MUTEX(memcg_create_mutex);
+
static void mem_cgroup_get(struct mem_cgroup *memcg);
static void mem_cgroup_put(struct mem_cgroup *memcg);
return inactive * inactive_ratio < active;
}
-int mem_cgroup_inactive_file_is_low(struct lruvec *lruvec)
-{
- unsigned long active;
- unsigned long inactive;
-
- inactive = mem_cgroup_get_lru_size(lruvec, LRU_INACTIVE_FILE);
- active = mem_cgroup_get_lru_size(lruvec, LRU_ACTIVE_FILE);
-
- return (active > inactive);
-}
-
#define mem_cgroup_from_res_counter(counter, member) \
container_of(counter, struct mem_cgroup, member)
clear_bit(FLUSHING_CACHED_CHARGE, &stock->flags);
}
+static void __init memcg_stock_init(void)
+{
+ int cpu;
+
+ for_each_possible_cpu(cpu) {
+ struct memcg_stock_pcp *stock =
+ &per_cpu(memcg_stock, cpu);
+ INIT_WORK(&stock->work, drain_local_stock);
+ }
+}
+
/*
* Cache charges(val) which is from res_counter, to local per_cpu area.
* This will be consumed by consume_stock() function, later.
} while (usage > 0);
}
+/*
+ * This mainly exists for tests during the setting of set of use_hierarchy.
+ * Since this is the very setting we are changing, the current hierarchy value
+ * is meaningless
+ */
+static inline bool __memcg_has_children(struct mem_cgroup *memcg)
+{
+ struct cgroup *pos;
+
+ /* bounce at first found */
+ cgroup_for_each_child(pos, memcg->css.cgroup)
+ return true;
+ return false;
+}
+
+/*
+ * Must be called with memcg_create_mutex held, unless the cgroup is guaranteed
+ * to be already dead (as in mem_cgroup_force_empty, for instance). This is
+ * from mem_cgroup_count_children(), in the sense that we don't really care how
+ * many children we have; we only need to know if we have any. It also counts
+ * any memcg without hierarchy as infertile.
+ */
+static inline bool memcg_has_children(struct mem_cgroup *memcg)
+{
+ return memcg->use_hierarchy && __memcg_has_children(memcg);
+}
+
/*
* Reclaims as many pages from the given memcg as possible and moves
* the rest to the parent.
if (parent)
parent_memcg = mem_cgroup_from_cont(parent);
- cgroup_lock();
+ mutex_lock(&memcg_create_mutex);
if (memcg->use_hierarchy == val)
goto out;
*/
if ((!parent_memcg || !parent_memcg->use_hierarchy) &&
(val == 1 || val == 0)) {
- if (list_empty(&cont->children))
+ if (!__memcg_has_children(memcg))
memcg->use_hierarchy = val;
else
retval = -EBUSY;
retval = -EINVAL;
out:
- cgroup_unlock();
+ mutex_unlock(&memcg_create_mutex);
return retval;
}
{
int ret = -EINVAL;
#ifdef CONFIG_MEMCG_KMEM
- bool must_inc_static_branch = false;
-
struct mem_cgroup *memcg = mem_cgroup_from_cont(cont);
/*
* For simplicity, we won't allow this to be disabled. It also can't
*
* After it first became limited, changes in the value of the limit are
* of course permitted.
- *
- * Taking the cgroup_lock is really offensive, but it is so far the only
- * way to guarantee that no children will appear. There are plenty of
- * other offenders, and they should all go away. Fine grained locking
- * is probably the way to go here. When we are fully hierarchical, we
- * can also get rid of the use_hierarchy check.
*/
- cgroup_lock();
+ mutex_lock(&memcg_create_mutex);
mutex_lock(&set_limit_mutex);
if (!memcg->kmem_account_flags && val != RESOURCE_MAX) {
- if (cgroup_task_count(cont) || (memcg->use_hierarchy &&
- !list_empty(&cont->children))) {
+ if (cgroup_task_count(cont) || memcg_has_children(memcg)) {
ret = -EBUSY;
goto out;
}
res_counter_set_limit(&memcg->kmem, RESOURCE_MAX);
goto out;
}
- must_inc_static_branch = true;
+ static_key_slow_inc(&memcg_kmem_enabled_key);
+ /*
+ * setting the active bit after the inc will guarantee no one
+ * starts accounting before all call sites are patched
+ */
+ memcg_kmem_set_active(memcg);
+
/*
* kmem charges can outlive the cgroup. In the case of slab
* pages, for instance, a page contain objects from various
ret = res_counter_set_limit(&memcg->kmem, val);
out:
mutex_unlock(&set_limit_mutex);
- cgroup_unlock();
-
- /*
- * We are by now familiar with the fact that we can't inc the static
- * branch inside cgroup_lock. See disarm functions for details. A
- * worker here is overkill, but also wrong: After the limit is set, we
- * must start accounting right away. Since this operation can't fail,
- * we can safely defer it to here - no rollback will be needed.
- *
- * The boolean used to control this is also safe, because
- * KMEM_ACCOUNTED_ACTIVATED guarantees that only one process will be
- * able to set it to true;
- */
- if (must_inc_static_branch) {
- static_key_slow_inc(&memcg_kmem_enabled_key);
- /*
- * setting the active bit after the inc will guarantee no one
- * starts accounting before all call sites are patched
- */
- memcg_kmem_set_active(memcg);
- }
-
+ mutex_unlock(&memcg_create_mutex);
#endif
return ret;
}
+#ifdef CONFIG_MEMCG_KMEM
static int memcg_propagate_kmem(struct mem_cgroup *memcg)
{
int ret = 0;
goto out;
memcg->kmem_account_flags = parent->kmem_account_flags;
-#ifdef CONFIG_MEMCG_KMEM
/*
* When that happen, we need to disable the static branch only on those
* memcgs that enabled it. To achieve this, we would be forced to
mutex_lock(&set_limit_mutex);
ret = memcg_update_cache_sizes(memcg);
mutex_unlock(&set_limit_mutex);
-#endif
out:
return ret;
}
+#endif /* CONFIG_MEMCG_KMEM */
/*
* The user of this function is...
parent = mem_cgroup_from_cont(cgrp->parent);
- cgroup_lock();
+ mutex_lock(&memcg_create_mutex);
/* If under hierarchy, only empty-root can set this value */
- if ((parent->use_hierarchy) ||
- (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
- cgroup_unlock();
+ if ((parent->use_hierarchy) || memcg_has_children(memcg)) {
+ mutex_unlock(&memcg_create_mutex);
return -EINVAL;
}
memcg->swappiness = val;
- cgroup_unlock();
+ mutex_unlock(&memcg_create_mutex);
return 0;
}
parent = mem_cgroup_from_cont(cgrp->parent);
- cgroup_lock();
+ mutex_lock(&memcg_create_mutex);
/* oom-kill-disable is a flag for subhierarchy. */
- if ((parent->use_hierarchy) ||
- (memcg->use_hierarchy && !list_empty(&cgrp->children))) {
- cgroup_unlock();
+ if ((parent->use_hierarchy) || memcg_has_children(memcg)) {
+ mutex_unlock(&memcg_create_mutex);
return -EINVAL;
}
memcg->oom_kill_disable = val;
if (!val)
memcg_oom_recover(memcg);
- cgroup_unlock();
+ mutex_unlock(&memcg_create_mutex);
return 0;
}
}
EXPORT_SYMBOL(parent_mem_cgroup);
-static int mem_cgroup_soft_limit_tree_init(void)
+static void __init mem_cgroup_soft_limit_tree_init(void)
{
struct mem_cgroup_tree_per_node *rtpn;
struct mem_cgroup_tree_per_zone *rtpz;
if (!node_state(node, N_NORMAL_MEMORY))
tmp = -1;
rtpn = kzalloc_node(sizeof(*rtpn), GFP_KERNEL, tmp);
- if (!rtpn)
- goto err_cleanup;
+ BUG_ON(!rtpn);
soft_limit_tree.rb_tree_per_node[node] = rtpn;
spin_lock_init(&rtpz->lock);
}
}
- return 0;
-
-err_cleanup:
- for_each_node(node) {
- if (!soft_limit_tree.rb_tree_per_node[node])
- break;
- kfree(soft_limit_tree.rb_tree_per_node[node]);
- soft_limit_tree.rb_tree_per_node[node] = NULL;
- }
- return 1;
-
}
static struct cgroup_subsys_state * __ref
/* root ? */
if (cont->parent == NULL) {
- int cpu;
-
- if (mem_cgroup_soft_limit_tree_init())
- goto free_out;
root_mem_cgroup = memcg;
- for_each_possible_cpu(cpu) {
- struct memcg_stock_pcp *stock =
- &per_cpu(memcg_stock, cpu);
- INIT_WORK(&stock->work, drain_local_stock);
- }
-
res_counter_init(&memcg->res, NULL);
res_counter_init(&memcg->memsw, NULL);
res_counter_init(&memcg->kmem, NULL);
if (!cont->parent)
return 0;
+ mutex_lock(&memcg_create_mutex);
memcg = mem_cgroup_from_cont(cont);
parent = mem_cgroup_from_cont(cont->parent);
}
error = memcg_init_kmem(memcg, &mem_cgroup_subsys);
+ mutex_unlock(&memcg_create_mutex);
if (error) {
/*
* We call put now because our (and parent's) refcnts
* call __mem_cgroup_free, so return directly
*/
mem_cgroup_put(memcg);
+ if (parent->use_hierarchy)
+ mem_cgroup_put(parent);
}
return error;
}
#endif
/*
- * The rest of init is performed during ->css_alloc() for root css which
- * happens before initcalls. hotcpu_notifier() can't be done together as
- * it would introduce circular locking by adding cgroup_lock -> cpu hotplug
- * dependency. Do it from a subsys_initcall().
+ * subsys_initcall() for memory controller.
+ *
+ * Some parts like hotcpu_notifier() have to be initialized from this context
+ * because of lock dependencies (cgroup_lock -> cpu hotplug) but basically
+ * everything that doesn't depend on a specific mem_cgroup structure should
+ * be initialized from here.
*/
static int __init mem_cgroup_init(void)
{
hotcpu_notifier(memcg_cpu_hotplug_callback, 0);
enable_swap_cgroup();
+ mem_cgroup_soft_limit_tree_init();
+ memcg_stock_init();
return 0;
}
subsys_initcall(mem_cgroup_init);