#include <linux/seq_file.h>
#include <linux/vmpressure.h>
#include <linux/mm_inline.h>
-#include <linux/page_cgroup.h>
+#include <linux/swap_cgroup.h>
#include <linux/cpu.h>
#include <linux/oom.h>
#include <linux/lockdep.h>
* Should the accounting and control be hierarchical, per subtree?
*/
bool use_hierarchy;
- unsigned long kmem_account_flags; /* See KMEM_ACCOUNTED_*, below */
bool oom_lock;
atomic_t under_oom;
/* WARNING: nodeinfo must be the last member here */
};
-/* internal only representation about the status of kmem accounting. */
-enum {
- KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */
-};
-
#ifdef CONFIG_MEMCG_KMEM
-static inline void memcg_kmem_set_active(struct mem_cgroup *memcg)
-{
- set_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
-}
-
static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
{
- return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
+ return memcg->kmemcg_id >= 0;
}
-
#endif
/* Stuffs for move charges at task migration. */
{
struct mem_cgroup_per_zone *mz;
struct mem_cgroup *memcg;
- struct page_cgroup *pc;
struct lruvec *lruvec;
if (mem_cgroup_disabled()) {
goto out;
}
- pc = lookup_page_cgroup(page);
- memcg = pc->mem_cgroup;
-
+ memcg = page->mem_cgroup;
/*
* Swapcache readahead pages are added to the LRU - and
- * possibly migrated - before they are charged. Ensure
- * pc->mem_cgroup is sane.
+ * possibly migrated - before they are charged.
*/
- if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
- pc->mem_cgroup = memcg = root_mem_cgroup;
+ if (!memcg)
+ memcg = root_mem_cgroup;
mz = mem_cgroup_page_zoneinfo(memcg, page);
lruvec = &mz->lruvec;
VM_BUG_ON((long)(*lru_size) < 0);
}
-/*
- * Checks whether given mem is same or in the root_mem_cgroup's
- * hierarchy subtree
- */
-bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
- struct mem_cgroup *memcg)
+bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root)
{
- if (root_memcg == memcg)
+ if (root == memcg)
return true;
- if (!root_memcg->use_hierarchy || !memcg)
+ if (!root->use_hierarchy)
return false;
- return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
-}
-
-static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
- struct mem_cgroup *memcg)
-{
- bool ret;
-
- rcu_read_lock();
- ret = __mem_cgroup_same_or_subtree(root_memcg, memcg);
- rcu_read_unlock();
- return ret;
+ return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
}
-bool task_in_mem_cgroup(struct task_struct *task,
- const struct mem_cgroup *memcg)
+bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
{
- struct mem_cgroup *curr = NULL;
+ struct mem_cgroup *task_memcg;
struct task_struct *p;
bool ret;
p = find_lock_task_mm(task);
if (p) {
- curr = get_mem_cgroup_from_mm(p->mm);
+ task_memcg = get_mem_cgroup_from_mm(p->mm);
task_unlock(p);
} else {
/*
* killed to prevent needlessly killing additional tasks.
*/
rcu_read_lock();
- curr = mem_cgroup_from_task(task);
- if (curr)
- css_get(&curr->css);
+ task_memcg = mem_cgroup_from_task(task);
+ css_get(&task_memcg->css);
rcu_read_unlock();
}
- /*
- * We should check use_hierarchy of "memcg" not "curr". Because checking
- * use_hierarchy of "curr" here make this function true if hierarchy is
- * enabled in "curr" and "curr" is a child of "memcg" in *cgroup*
- * hierarchy(even if use_hierarchy is disabled in "memcg").
- */
- ret = mem_cgroup_same_or_subtree(memcg, curr);
- css_put(&curr->css);
+ ret = mem_cgroup_is_descendant(task_memcg, memcg);
+ css_put(&task_memcg->css);
return ret;
}
return memcg->swappiness;
}
-/*
- * memcg->moving_account is used for checking possibility that some thread is
- * calling move_account(). When a thread on CPU-A starts moving pages under
- * a memcg, other threads should check memcg->moving_account under
- * rcu_read_lock(), like this:
- *
- * CPU-A CPU-B
- * rcu_read_lock()
- * memcg->moving_account+1 if (memcg->mocing_account)
- * take heavy locks.
- * synchronize_rcu() update something.
- * rcu_read_unlock()
- * start move here.
- */
-
-static void mem_cgroup_start_move(struct mem_cgroup *memcg)
-{
- atomic_inc(&memcg->moving_account);
- synchronize_rcu();
-}
-
-static void mem_cgroup_end_move(struct mem_cgroup *memcg)
-{
- /*
- * Now, mem_cgroup_clear_mc() may call this function with NULL.
- * We check NULL in callee rather than caller.
- */
- if (memcg)
- atomic_dec(&memcg->moving_account);
-}
-
/*
* A routine for checking "mem" is under move_account() or not.
*
if (!from)
goto unlock;
- ret = mem_cgroup_same_or_subtree(memcg, from)
- || mem_cgroup_same_or_subtree(memcg, to);
+ ret = mem_cgroup_is_descendant(from, memcg) ||
+ mem_cgroup_is_descendant(to, memcg);
unlock:
spin_unlock(&mc.lock);
return ret;
return false;
}
-/*
- * Take this lock when
- * - a code tries to modify page's memcg while it's USED.
- * - a code tries to modify page state accounting in a memcg.
- */
-static void move_lock_mem_cgroup(struct mem_cgroup *memcg,
- unsigned long *flags)
-{
- spin_lock_irqsave(&memcg->move_lock, *flags);
-}
-
-static void move_unlock_mem_cgroup(struct mem_cgroup *memcg,
- unsigned long *flags)
-{
- spin_unlock_irqrestore(&memcg->move_lock, *flags);
-}
-
#define K(x) ((x) << (PAGE_SHIFT-10))
/**
* mem_cgroup_print_oom_info: Print OOM information relevant to memory controller.
* select it. The goal is to allow it to allocate so that it may
* quickly exit and free its memory.
*/
- if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
+ if (fatal_signal_pending(current) || task_will_free_mem(current)) {
set_thread_flag(TIF_MEMDIE);
return;
}
NULL, "Memory cgroup out of memory");
}
+#if MAX_NUMNODES > 1
+
/**
* test_mem_cgroup_node_reclaimable
* @memcg: the target memcg
return false;
}
-#if MAX_NUMNODES > 1
/*
* Always updating the nodemask is not very good - even if we have an empty
memcg->last_scanned_node = node;
return node;
}
-
-/*
- * Check all nodes whether it contains reclaimable pages or not.
- * For quick scan, we make use of scan_nodes. This will allow us to skip
- * unused nodes. But scan_nodes is lazily updated and may not cotain
- * enough new information. We need to do double check.
- */
-static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
-{
- int nid;
-
- /*
- * quick check...making use of scan_node.
- * We can skip unused nodes.
- */
- if (!nodes_empty(memcg->scan_nodes)) {
- for (nid = first_node(memcg->scan_nodes);
- nid < MAX_NUMNODES;
- nid = next_node(nid, memcg->scan_nodes)) {
-
- if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
- return true;
- }
- }
- /*
- * Check rest of nodes.
- */
- for_each_node_state(nid, N_MEMORY) {
- if (node_isset(nid, memcg->scan_nodes))
- continue;
- if (test_mem_cgroup_node_reclaimable(memcg, nid, noswap))
- return true;
- }
- return false;
-}
-
#else
int mem_cgroup_select_victim_node(struct mem_cgroup *memcg)
{
return 0;
}
-
-static bool mem_cgroup_reclaimable(struct mem_cgroup *memcg, bool noswap)
-{
- return test_mem_cgroup_node_reclaimable(memcg, 0, noswap);
-}
#endif
static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
}
continue;
}
- if (!mem_cgroup_reclaimable(victim, false))
- continue;
total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
zone, &nr_scanned);
*total_scanned += nr_scanned;
oom_wait_info = container_of(wait, struct oom_wait_info, wait);
oom_wait_memcg = oom_wait_info->memcg;
- /*
- * Both of oom_wait_info->memcg and wake_memcg are stable under us.
- * Then we can use css_is_ancestor without taking care of RCU.
- */
- if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
- && !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
+ if (!mem_cgroup_is_descendant(wake_memcg, oom_wait_memcg) &&
+ !mem_cgroup_is_descendant(oom_wait_memcg, wake_memcg))
return 0;
return autoremove_wake_function(wait, mode, sync, arg);
}
unsigned long *flags)
{
struct mem_cgroup *memcg;
- struct page_cgroup *pc;
rcu_read_lock();
if (mem_cgroup_disabled())
return NULL;
-
- pc = lookup_page_cgroup(page);
again:
- memcg = pc->mem_cgroup;
- if (unlikely(!memcg || !PageCgroupUsed(pc)))
+ memcg = page->mem_cgroup;
+ if (unlikely(!memcg))
return NULL;
*locked = false;
if (atomic_read(&memcg->moving_account) <= 0)
return memcg;
- move_lock_mem_cgroup(memcg, flags);
- if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
- move_unlock_mem_cgroup(memcg, flags);
+ spin_lock_irqsave(&memcg->move_lock, *flags);
+ if (memcg != page->mem_cgroup) {
+ spin_unlock_irqrestore(&memcg->move_lock, *flags);
goto again;
}
*locked = true;
* @locked: value received from mem_cgroup_begin_page_stat()
* @flags: value received from mem_cgroup_begin_page_stat()
*/
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
- unsigned long flags)
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
+ unsigned long *flags)
{
- if (memcg && locked)
- move_unlock_mem_cgroup(memcg, &flags);
+ if (memcg && *locked)
+ spin_unlock_irqrestore(&memcg->move_lock, *flags);
rcu_read_unlock();
}
memcg = stock->cached;
if (!memcg || !stock->nr_pages)
continue;
- if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
+ if (!mem_cgroup_is_descendant(memcg, root_memcg))
continue;
if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
if (cpu == curcpu)
*/
struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
+ struct mem_cgroup *memcg;
unsigned short id;
swp_entry_t ent;
VM_BUG_ON_PAGE(!PageLocked(page), page);
- pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc)) {
- memcg = pc->mem_cgroup;
- if (memcg && !css_tryget_online(&memcg->css))
+ memcg = page->mem_cgroup;
+ if (memcg) {
+ if (!css_tryget_online(&memcg->css))
memcg = NULL;
} else if (PageSwapCache(page)) {
ent.val = page_private(page);
static void commit_charge(struct page *page, struct mem_cgroup *memcg,
bool lrucare)
{
- struct page_cgroup *pc = lookup_page_cgroup(page);
int isolated;
- VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
- /*
- * we don't need page_cgroup_lock about tail pages, becase they are not
- * accessed by any other context at this point.
- */
+ VM_BUG_ON_PAGE(page->mem_cgroup, page);
/*
* In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
/*
* Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point:
+ * page->mem_cgroup at this point:
*
* - the page is uncharged
*
* - a page cache insertion, a swapin fault, or a migration
* have the page locked
*/
- pc->mem_cgroup = memcg;
- pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
+ page->mem_cgroup = memcg;
if (lrucare)
unlock_page_lru(page, isolated);
return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
}
-#ifdef CONFIG_SLABINFO
-static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
-{
- struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
- struct memcg_cache_params *params;
-
- if (!memcg_kmem_is_active(memcg))
- return -EIO;
-
- print_slabinfo_header(m);
-
- mutex_lock(&memcg_slab_mutex);
- list_for_each_entry(params, &memcg->memcg_slab_caches, list)
- cache_show(memcg_params_to_cache(params), m);
- mutex_unlock(&memcg_slab_mutex);
-
- return 0;
-}
-#endif
-
static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
unsigned long nr_pages)
{
if (!cachep)
return;
- css_get(&memcg->css);
list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
/*
list_del(&cachep->memcg_params->list);
kmem_cache_destroy(cachep);
-
- /* drop the reference taken in memcg_register_cache */
- css_put(&memcg->css);
-}
-
-/*
- * During the creation a new cache, we need to disable our accounting mechanism
- * altogether. This is true even if we are not creating, but rather just
- * enqueing new caches to be created.
- *
- * This is because that process will trigger allocations; some visible, like
- * explicit kmallocs to auxiliary data structures, name strings and internal
- * cache structures; some well concealed, like INIT_WORK() that can allocate
- * objects during debug.
- *
- * If any allocation happens during memcg_kmem_get_cache, we will recurse back
- * to it. This may not be a bounded recursion: since the first cache creation
- * failed to complete (waiting on the allocation), we'll just try to create the
- * cache again, failing at the same point.
- *
- * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
- * memcg_kmem_skip_account. So we enclose anything that might allocate memory
- * inside the following two functions.
- */
-static inline void memcg_stop_kmem_account(void)
-{
- VM_BUG_ON(!current->mm);
- current->memcg_kmem_skip_account++;
-}
-
-static inline void memcg_resume_kmem_account(void)
-{
- VM_BUG_ON(!current->mm);
- current->memcg_kmem_skip_account--;
}
int __memcg_cleanup_cache_params(struct kmem_cache *s)
mutex_lock(&memcg_slab_mutex);
list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
cachep = memcg_params_to_cache(params);
- kmem_cache_shrink(cachep);
- if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
- memcg_unregister_cache(cachep);
+ memcg_unregister_cache(cachep);
}
mutex_unlock(&memcg_slab_mutex);
}
struct memcg_register_cache_work *cw;
cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
- if (cw == NULL) {
- css_put(&memcg->css);
+ if (!cw)
return;
- }
+
+ css_get(&memcg->css);
cw->memcg = memcg;
cw->cachep = cachep;
* this point we can't allow ourselves back into memcg_kmem_get_cache,
* the safest choice is to do it like this, wrapping the whole function.
*/
- memcg_stop_kmem_account();
+ current->memcg_kmem_skip_account = 1;
__memcg_schedule_register_cache(memcg, cachep);
- memcg_resume_kmem_account();
+ current->memcg_kmem_skip_account = 0;
}
int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
{
unsigned int nr_pages = 1 << order;
- int res;
- res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
- if (!res)
- atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
- return res;
+ return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
}
void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
unsigned int nr_pages = 1 << order;
memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
- atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
}
/*
* Can't be called in interrupt context or from kernel threads.
* This function needs to be called with rcu_read_lock() held.
*/
-struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
- gfp_t gfp)
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
{
struct mem_cgroup *memcg;
struct kmem_cache *memcg_cachep;
VM_BUG_ON(!cachep->memcg_params);
VM_BUG_ON(!cachep->memcg_params->is_root_cache);
- if (!current->mm || current->memcg_kmem_skip_account)
+ if (current->memcg_kmem_skip_account)
return cachep;
- rcu_read_lock();
- memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
-
+ memcg = get_mem_cgroup_from_mm(current->mm);
if (!memcg_kmem_is_active(memcg))
goto out;
memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));
- if (likely(memcg_cachep)) {
- cachep = memcg_cachep;
- goto out;
- }
-
- /* The corresponding put will be done in the workqueue. */
- if (!css_tryget_online(&memcg->css))
- goto out;
- rcu_read_unlock();
+ if (likely(memcg_cachep))
+ return memcg_cachep;
/*
* If we are in a safe context (can wait, and not in interrupt
* defer everything.
*/
memcg_schedule_register_cache(memcg, cachep);
- return cachep;
out:
- rcu_read_unlock();
+ css_put(&memcg->css);
return cachep;
}
+void __memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+ if (!is_root_cache(cachep))
+ css_put(&cachep->memcg_params->memcg->css);
+}
+
/*
* We need to verify if the allocation against current->mm->owner's memcg is
* possible for the given order. But the page is not allocated yet, so we'll
*_memcg = NULL;
- /*
- * Disabling accounting is only relevant for some specific memcg
- * internal allocations. Therefore we would initially not have such
- * check here, since direct calls to the page allocator that are
- * accounted to kmemcg (alloc_kmem_pages and friends) only happen
- * outside memcg core. We are mostly concerned with cache allocations,
- * and by having this test at memcg_kmem_get_cache, we are already able
- * to relay the allocation to the root cache and bypass the memcg cache
- * altogether.
- *
- * There is one exception, though: the SLUB allocator does not create
- * large order caches, but rather service large kmallocs directly from
- * the page allocator. Therefore, the following sequence when backed by
- * the SLUB allocator:
- *
- * memcg_stop_kmem_account();
- * kmalloc(<large_number>)
- * memcg_resume_kmem_account();
- *
- * would effectively ignore the fact that we should skip accounting,
- * since it will drive us directly to this function without passing
- * through the cache selector memcg_kmem_get_cache. Such large
- * allocations are extremely rare but can happen, for instance, for the
- * cache arrays. We bring this test here.
- */
- if (!current->mm || current->memcg_kmem_skip_account)
- return true;
-
memcg = get_mem_cgroup_from_mm(current->mm);
if (!memcg_kmem_is_active(memcg)) {
void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
int order)
{
- struct page_cgroup *pc;
-
VM_BUG_ON(mem_cgroup_is_root(memcg));
/* The page allocation failed. Revert */
memcg_uncharge_kmem(memcg, 1 << order);
return;
}
- /*
- * The page is freshly allocated and not visible to any
- * outside callers yet. Set up pc non-atomically.
- */
- pc = lookup_page_cgroup(page);
- pc->mem_cgroup = memcg;
- pc->flags = PCG_USED;
+ page->mem_cgroup = memcg;
}
void __memcg_kmem_uncharge_pages(struct page *page, int order)
{
- struct mem_cgroup *memcg = NULL;
- struct page_cgroup *pc;
+ struct mem_cgroup *memcg = page->mem_cgroup;
-
- pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
- return;
-
- memcg = pc->mem_cgroup;
- pc->flags = 0;
-
- /*
- * We trust that only if there is a memcg associated with the page, it
- * is a valid allocation
- */
if (!memcg)
return;
VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
+
memcg_uncharge_kmem(memcg, 1 << order);
-}
-#else
-static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
-{
+ page->mem_cgroup = NULL;
}
#endif /* CONFIG_MEMCG_KMEM */
*/
void mem_cgroup_split_huge_fixup(struct page *head)
{
- struct page_cgroup *head_pc = lookup_page_cgroup(head);
- struct page_cgroup *pc;
- struct mem_cgroup *memcg;
int i;
if (mem_cgroup_disabled())
return;
- memcg = head_pc->mem_cgroup;
- for (i = 1; i < HPAGE_PMD_NR; i++) {
- pc = head_pc + i;
- pc->mem_cgroup = memcg;
- pc->flags = head_pc->flags;
- }
- __this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
+ for (i = 1; i < HPAGE_PMD_NR; i++)
+ head[i].mem_cgroup = head->mem_cgroup;
+
+ __this_cpu_sub(head->mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
HPAGE_PMD_NR);
}
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
* mem_cgroup_move_account - move account of the page
* @page: the page
* @nr_pages: number of regular pages (>1 for huge pages)
- * @pc: page_cgroup of the page.
* @from: mem_cgroup which the page is moved from.
* @to: mem_cgroup which the page is moved to. @from != @to.
*
*/
static int mem_cgroup_move_account(struct page *page,
unsigned int nr_pages,
- struct page_cgroup *pc,
struct mem_cgroup *from,
struct mem_cgroup *to)
{
goto out;
/*
- * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
+ * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup
* of its source page while we change it: page migration takes
* both pages off the LRU, but page cache replacement doesn't.
*/
goto out;
ret = -EINVAL;
- if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
+ if (page->mem_cgroup != from)
goto out_unlock;
- move_lock_mem_cgroup(from, &flags);
+ spin_lock_irqsave(&from->move_lock, flags);
if (!PageAnon(page) && page_mapped(page)) {
__this_cpu_sub(from->stat->count[MEM_CGROUP_STAT_FILE_MAPPED],
}
/*
- * It is safe to change pc->mem_cgroup here because the page
+ * It is safe to change page->mem_cgroup here because the page
* is referenced, charged, and isolated - we can't race with
* uncharging, charging, migration, or LRU putback.
*/
/* caller should have done css_get */
- pc->mem_cgroup = to;
- move_unlock_mem_cgroup(from, &flags);
+ page->mem_cgroup = to;
+ spin_unlock_irqrestore(&from->move_lock, flags);
+
ret = 0;
local_irq_disable();
}
#endif
-#ifdef CONFIG_DEBUG_VM
-static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
-{
- struct page_cgroup *pc;
-
- pc = lookup_page_cgroup(page);
- /*
- * Can be NULL while feeding pages into the page allocator for
- * the first time, i.e. during boot or memory hotplug;
- * or when mem_cgroup_disabled().
- */
- if (likely(pc) && PageCgroupUsed(pc))
- return pc;
- return NULL;
-}
-
-bool mem_cgroup_bad_page_check(struct page *page)
-{
- if (mem_cgroup_disabled())
- return false;
-
- return lookup_page_cgroup_used(page) != NULL;
-}
-
-void mem_cgroup_print_bad_page(struct page *page)
-{
- struct page_cgroup *pc;
-
- pc = lookup_page_cgroup_used(page);
- if (pc) {
- pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
- pc, pc->flags, pc->mem_cgroup);
- }
-}
-#endif
-
static DEFINE_MUTEX(memcg_limit_mutex);
static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
if (memcg_kmem_is_active(memcg))
return 0;
- /*
- * We are going to allocate memory for data shared by all memory
- * cgroups so let's stop accounting here.
- */
- memcg_stop_kmem_account();
-
/*
* For simplicity, we won't allow this to be disabled. It also can't
* be changed if the cgroup has children already, or if tasks had
goto out;
}
- memcg->kmemcg_id = memcg_id;
- INIT_LIST_HEAD(&memcg->memcg_slab_caches);
-
/*
- * We couldn't have accounted to this cgroup, because it hasn't got the
- * active bit set yet, so this should succeed.
+ * We couldn't have accounted to this cgroup, because it hasn't got
+ * activated yet, so this should succeed.
*/
err = page_counter_limit(&memcg->kmem, nr_pages);
VM_BUG_ON(err);
static_key_slow_inc(&memcg_kmem_enabled_key);
/*
- * Setting the active bit after enabling static branching will
+ * A memory cgroup is considered kmem-active as soon as it gets
+ * kmemcg_id. Setting the id after enabling static branching will
* guarantee no one starts accounting before all call sites are
* patched.
*/
- memcg_kmem_set_active(memcg);
+ memcg->kmemcg_id = memcg_id;
out:
- memcg_resume_kmem_account();
return err;
}
}
#endif /* CONFIG_NUMA */
-static inline void mem_cgroup_lru_names_not_uptodate(void)
-{
- BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
-}
-
static int memcg_stat_show(struct seq_file *m, void *v)
{
struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
struct mem_cgroup *mi;
unsigned int i;
+ BUILD_BUG_ON(ARRAY_SIZE(mem_cgroup_lru_names) != NR_LRU_LISTS);
+
for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
continue;
{
int ret;
- memcg->kmemcg_id = -1;
ret = memcg_propagate_kmem(memcg);
if (ret)
return ret;
static void memcg_destroy_kmem(struct mem_cgroup *memcg)
{
+ memcg_unregister_all_caches(memcg);
mem_cgroup_sockets_destroy(memcg);
}
#else
*
* DO NOT ADD NEW FILES.
*/
- name = cfile.file->f_dentry->d_name.name;
+ name = cfile.file->f_path.dentry->d_name.name;
if (!strcmp(name, "memory.usage_in_bytes")) {
event->register_event = mem_cgroup_usage_register_event;
* automatically removed on cgroup destruction but the removal is
* asynchronous, so take an extra ref on @css.
*/
- cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent,
+ cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent,
&memory_cgrp_subsys);
ret = -EINVAL;
if (IS_ERR(cfile_css))
#ifdef CONFIG_SLABINFO
{
.name = "kmem.slabinfo",
- .seq_show = mem_cgroup_slabinfo_read,
+ .seq_start = slab_start,
+ .seq_next = slab_next,
+ .seq_stop = slab_stop,
+ .seq_show = memcg_slab_show,
},
#endif
#endif
free_percpu(memcg->stat);
- /*
- * We need to make sure that (at least for now), the jump label
- * destruction code runs outside of the cgroup lock. This is because
- * get_online_cpus(), which is called from the static_branch update,
- * can't be called inside the cgroup_lock. cpusets are the ones
- * enforcing this dependency, so if they ever change, we might as well.
- *
- * schedule_work() will guarantee this happens. Be careful if you need
- * to move this code around, and make sure it is outside
- * the cgroup_lock.
- */
disarm_static_keys(memcg);
kfree(memcg);
}
vmpressure_init(&memcg->vmpressure);
INIT_LIST_HEAD(&memcg->event_list);
spin_lock_init(&memcg->event_list_lock);
+#ifdef CONFIG_MEMCG_KMEM
+ memcg->kmemcg_id = -1;
+ INIT_LIST_HEAD(&memcg->memcg_slab_caches);
+#endif
return &memcg->css;
}
spin_unlock(&memcg->event_list_lock);
- memcg_unregister_all_caches(memcg);
vmpressure_cleanup(&memcg->vmpressure);
}
unsigned long addr, pte_t ptent, union mc_target *target)
{
struct page *page = NULL;
- struct page_cgroup *pc;
enum mc_target_type ret = MC_TARGET_NONE;
swp_entry_t ent = { .val = 0 };
if (!page && !ent.val)
return ret;
if (page) {
- pc = lookup_page_cgroup(page);
/*
* Do only loose check w/o serialization.
- * mem_cgroup_move_account() checks the pc is valid or
+ * mem_cgroup_move_account() checks the page is valid or
* not under LRU exclusion.
*/
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (page->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target)
target->page = page;
unsigned long addr, pmd_t pmd, union mc_target *target)
{
struct page *page = NULL;
- struct page_cgroup *pc;
enum mc_target_type ret = MC_TARGET_NONE;
page = pmd_page(pmd);
VM_BUG_ON_PAGE(!page || !PageHead(page), page);
if (!move_anon())
return ret;
- pc = lookup_page_cgroup(page);
- if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+ if (page->mem_cgroup == mc.from) {
ret = MC_TARGET_PAGE;
if (target) {
get_page(page);
static void mem_cgroup_clear_mc(void)
{
- struct mem_cgroup *from = mc.from;
-
/*
* we must clear moving_task before waking up waiters at the end of
* task migration.
mc.from = NULL;
mc.to = NULL;
spin_unlock(&mc.lock);
- mem_cgroup_end_move(from);
}
static int mem_cgroup_can_attach(struct cgroup_subsys_state *css,
VM_BUG_ON(mc.precharge);
VM_BUG_ON(mc.moved_charge);
VM_BUG_ON(mc.moved_swap);
- mem_cgroup_start_move(from);
+
spin_lock(&mc.lock);
mc.from = from;
mc.to = memcg;
static void mem_cgroup_cancel_attach(struct cgroup_subsys_state *css,
struct cgroup_taskset *tset)
{
- mem_cgroup_clear_mc();
+ if (mc.to)
+ mem_cgroup_clear_mc();
}
static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
enum mc_target_type target_type;
union mc_target target;
struct page *page;
- struct page_cgroup *pc;
/*
* We don't take compound_lock() here but no race with splitting thp
if (target_type == MC_TARGET_PAGE) {
page = target.page;
if (!isolate_lru_page(page)) {
- pc = lookup_page_cgroup(page);
if (!mem_cgroup_move_account(page, HPAGE_PMD_NR,
- pc, mc.from, mc.to)) {
+ mc.from, mc.to)) {
mc.precharge -= HPAGE_PMD_NR;
mc.moved_charge += HPAGE_PMD_NR;
}
page = target.page;
if (isolate_lru_page(page))
goto put;
- pc = lookup_page_cgroup(page);
- if (!mem_cgroup_move_account(page, 1, pc,
- mc.from, mc.to)) {
+ if (!mem_cgroup_move_account(page, 1, mc.from, mc.to)) {
mc.precharge--;
/* we uncharge from mc.from later. */
mc.moved_charge++;
struct vm_area_struct *vma;
lru_add_drain_all();
+ /*
+ * Signal mem_cgroup_begin_page_stat() to take the memcg's
+ * move_lock while we're moving its pages to another memcg.
+ * Then wait for already started RCU-only updates to finish.
+ */
+ atomic_inc(&mc.from->moving_account);
+ synchronize_rcu();
retry:
if (unlikely(!down_read_trylock(&mm->mmap_sem))) {
/*
break;
}
up_read(&mm->mmap_sem);
+ atomic_dec(&mc.from->moving_account);
}
static void mem_cgroup_move_task(struct cgroup_subsys_state *css,
*/
void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
{
- struct page_cgroup *pc;
+ struct mem_cgroup *memcg;
unsigned short oldid;
VM_BUG_ON_PAGE(PageLRU(page), page);
if (!do_swap_account)
return;
- pc = lookup_page_cgroup(page);
+ memcg = page->mem_cgroup;
/* Readahead page, never charged */
- if (!PageCgroupUsed(pc))
+ if (!memcg)
return;
- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
-
- oldid = swap_cgroup_record(entry, mem_cgroup_id(pc->mem_cgroup));
+ oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
VM_BUG_ON_PAGE(oldid, page);
+ mem_cgroup_swap_statistics(memcg, true);
- pc->flags &= ~PCG_MEMSW;
- css_get(&pc->mem_cgroup->css);
- mem_cgroup_swap_statistics(pc->mem_cgroup, true);
+ page->mem_cgroup = NULL;
+
+ if (!mem_cgroup_is_root(memcg))
+ page_counter_uncharge(&memcg->memory, 1);
+
+ /* XXX: caller holds IRQ-safe mapping->tree_lock */
+ VM_BUG_ON(!irqs_disabled());
+
+ mem_cgroup_charge_statistics(memcg, page, -1);
+ memcg_check_events(memcg, page);
}
/**
goto out;
if (PageSwapCache(page)) {
- struct page_cgroup *pc = lookup_page_cgroup(page);
/*
* Every swap fault against a single page tries to charge the
* page, bail as early as possible. shmem_unuse() encounters
* the page lock, which serializes swap cache removal, which
* in turn serializes uncharging.
*/
- if (PageCgroupUsed(pc))
+ if (page->mem_cgroup)
goto out;
}
}
static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
- unsigned long nr_mem, unsigned long nr_memsw,
unsigned long nr_anon, unsigned long nr_file,
unsigned long nr_huge, struct page *dummy_page)
{
+ unsigned long nr_pages = nr_anon + nr_file;
unsigned long flags;
if (!mem_cgroup_is_root(memcg)) {
- if (nr_mem)
- page_counter_uncharge(&memcg->memory, nr_mem);
- if (nr_memsw)
- page_counter_uncharge(&memcg->memsw, nr_memsw);
+ page_counter_uncharge(&memcg->memory, nr_pages);
+ if (do_swap_account)
+ page_counter_uncharge(&memcg->memsw, nr_pages);
memcg_oom_recover(memcg);
}
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
- __this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+ __this_cpu_add(memcg->stat->nr_page_events, nr_pages);
memcg_check_events(memcg, dummy_page);
local_irq_restore(flags);
if (!mem_cgroup_is_root(memcg))
- css_put_many(&memcg->css, max(nr_mem, nr_memsw));
+ css_put_many(&memcg->css, nr_pages);
}
static void uncharge_list(struct list_head *page_list)
{
struct mem_cgroup *memcg = NULL;
- unsigned long nr_memsw = 0;
unsigned long nr_anon = 0;
unsigned long nr_file = 0;
unsigned long nr_huge = 0;
unsigned long pgpgout = 0;
- unsigned long nr_mem = 0;
struct list_head *next;
struct page *page;
next = page_list->next;
do {
unsigned int nr_pages = 1;
- struct page_cgroup *pc;
page = list_entry(next, struct page, lru);
next = page->lru.next;
VM_BUG_ON_PAGE(PageLRU(page), page);
VM_BUG_ON_PAGE(page_count(page), page);
- pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!page->mem_cgroup)
continue;
/*
* Nobody should be changing or seriously looking at
- * pc->mem_cgroup and pc->flags at this point, we have
- * fully exclusive access to the page.
+ * page->mem_cgroup at this point, we have fully
+ * exclusive access to the page.
*/
- if (memcg != pc->mem_cgroup) {
+ if (memcg != page->mem_cgroup) {
if (memcg) {
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
- pgpgout = nr_mem = nr_memsw = 0;
- nr_anon = nr_file = nr_huge = 0;
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
+ pgpgout = nr_anon = nr_file = nr_huge = 0;
}
- memcg = pc->mem_cgroup;
+ memcg = page->mem_cgroup;
}
if (PageTransHuge(page)) {
else
nr_file += nr_pages;
- if (pc->flags & PCG_MEM)
- nr_mem += nr_pages;
- if (pc->flags & PCG_MEMSW)
- nr_memsw += nr_pages;
- pc->flags = 0;
+ page->mem_cgroup = NULL;
pgpgout++;
} while (next != page_list);
if (memcg)
- uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
- nr_anon, nr_file, nr_huge, page);
+ uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+ nr_huge, page);
}
/**
*/
void mem_cgroup_uncharge(struct page *page)
{
- struct page_cgroup *pc;
-
if (mem_cgroup_disabled())
return;
/* Don't touch page->lru of any random page, pre-check: */
- pc = lookup_page_cgroup(page);
- if (!PageCgroupUsed(pc))
+ if (!page->mem_cgroup)
return;
INIT_LIST_HEAD(&page->lru);
void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
bool lrucare)
{
- struct page_cgroup *pc;
+ struct mem_cgroup *memcg;
int isolated;
VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
return;
/* Page cache replacement: new page already charged? */
- pc = lookup_page_cgroup(newpage);
- if (PageCgroupUsed(pc))
+ if (newpage->mem_cgroup)
return;
/*
* uncharged page when the PFN walker finds a page that
* reclaim just put back on the LRU but has not released yet.
*/
- pc = lookup_page_cgroup(oldpage);
- if (!PageCgroupUsed(pc))
+ memcg = oldpage->mem_cgroup;
+ if (!memcg)
return;
- VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
- VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
-
if (lrucare)
lock_page_lru(oldpage, &isolated);
- pc->flags = 0;
+ oldpage->mem_cgroup = NULL;
if (lrucare)
unlock_page_lru(oldpage, isolated);
- commit_charge(newpage, pc->mem_cgroup, lrucare);
+ commit_charge(newpage, memcg, lrucare);
}
/*