Merge tag 'powerpc-4.5-2' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc...
[deliverable/linux.git] / mm / memcontrol.c
index f6bc78f4ed137bd4fd3b46b439170b6ebc485bd0..d06cae2de783acf462162e27f8770ff7bf60f4ad 100644 (file)
@@ -366,13 +366,6 @@ mem_cgroup_zone_zoneinfo(struct mem_cgroup *memcg, struct zone *zone)
  *
  * If memcg is bound to a traditional hierarchy, the css of root_mem_cgroup
  * is returned.
- *
- * XXX: The above description of behavior on the default hierarchy isn't
- * strictly true yet as replace_page_cache_page() can modify the
- * association before @page is released even on the default hierarchy;
- * however, the current and planned usages don't mix the the two functions
- * and replace_page_cache_page() will soon be updated to make the invariant
- * actually true.
  */
 struct cgroup_subsys_state *mem_cgroup_css_from_page(struct page *page)
 {
@@ -1220,7 +1213,7 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
                pr_cont(":");
 
                for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
-                       if (i == MEM_CGROUP_STAT_SWAP && !do_memsw_account())
+                       if (i == MEM_CGROUP_STAT_SWAP && !do_swap_account)
                                continue;
                        pr_cont(" %s:%luKB", mem_cgroup_stat_names[i],
                                K(mem_cgroup_read_stat(iter, i)));
@@ -1259,9 +1252,12 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
        limit = memcg->memory.limit;
        if (mem_cgroup_swappiness(memcg)) {
                unsigned long memsw_limit;
+               unsigned long swap_limit;
 
                memsw_limit = memcg->memsw.limit;
-               limit = min(limit + total_swap_pages, memsw_limit);
+               swap_limit = memcg->swap.limit;
+               swap_limit = min(swap_limit, (unsigned long)total_swap_pages);
+               limit = min(limit + swap_limit, memsw_limit);
        }
        return limit;
 }
@@ -2771,6 +2767,18 @@ static unsigned long tree_stat(struct mem_cgroup *memcg,
        return val;
 }
 
+static unsigned long tree_events(struct mem_cgroup *memcg,
+                                enum mem_cgroup_events_index idx)
+{
+       struct mem_cgroup *iter;
+       unsigned long val = 0;
+
+       for_each_mem_cgroup_tree(iter, memcg)
+               val += mem_cgroup_read_events(iter, idx);
+
+       return val;
+}
+
 static unsigned long mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 {
        unsigned long val;
@@ -4201,11 +4209,13 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
        if (parent && parent->use_hierarchy) {
                memcg->use_hierarchy = true;
                page_counter_init(&memcg->memory, &parent->memory);
+               page_counter_init(&memcg->swap, &parent->swap);
                page_counter_init(&memcg->memsw, &parent->memsw);
                page_counter_init(&memcg->kmem, &parent->kmem);
                page_counter_init(&memcg->tcpmem, &parent->tcpmem);
        } else {
                page_counter_init(&memcg->memory, NULL);
+               page_counter_init(&memcg->swap, NULL);
                page_counter_init(&memcg->memsw, NULL);
                page_counter_init(&memcg->kmem, NULL);
                page_counter_init(&memcg->tcpmem, NULL);
@@ -4628,7 +4638,8 @@ static int mem_cgroup_count_precharge_pte_range(pmd_t *pmd,
        pte_t *pte;
        spinlock_t *ptl;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (get_mctgt_type_thp(vma, addr, *pmd, NULL) == MC_TARGET_PAGE)
                        mc.precharge += HPAGE_PMD_NR;
                spin_unlock(ptl);
@@ -4816,7 +4827,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
        union mc_target target;
        struct page *page;
 
-       if (pmd_trans_huge_lock(pmd, vma, &ptl)) {
+       ptl = pmd_trans_huge_lock(pmd, vma);
+       if (ptl) {
                if (mc.precharge < HPAGE_PMD_NR) {
                        spin_unlock(ptl);
                        return 0;
@@ -5098,6 +5110,59 @@ static int memory_events_show(struct seq_file *m, void *v)
        return 0;
 }
 
+static int memory_stat_show(struct seq_file *m, void *v)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+       int i;
+
+       /*
+        * Provide statistics on the state of the memory subsystem as
+        * well as cumulative event counters that show past behavior.
+        *
+        * This list is ordered following a combination of these gradients:
+        * 1) generic big picture -> specifics and details
+        * 2) reflecting userspace activity -> reflecting kernel heuristics
+        *
+        * Current memory state:
+        */
+
+       seq_printf(m, "anon %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_RSS) * PAGE_SIZE);
+       seq_printf(m, "file %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_CACHE) * PAGE_SIZE);
+       seq_printf(m, "sock %llu\n",
+                  (u64)tree_stat(memcg, MEMCG_SOCK) * PAGE_SIZE);
+
+       seq_printf(m, "file_mapped %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED) *
+                  PAGE_SIZE);
+       seq_printf(m, "file_dirty %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_DIRTY) *
+                  PAGE_SIZE);
+       seq_printf(m, "file_writeback %llu\n",
+                  (u64)tree_stat(memcg, MEM_CGROUP_STAT_WRITEBACK) *
+                  PAGE_SIZE);
+
+       for (i = 0; i < NR_LRU_LISTS; i++) {
+               struct mem_cgroup *mi;
+               unsigned long val = 0;
+
+               for_each_mem_cgroup_tree(mi, memcg)
+                       val += mem_cgroup_nr_lru_pages(mi, BIT(i));
+               seq_printf(m, "%s %llu\n",
+                          mem_cgroup_lru_names[i], (u64)val * PAGE_SIZE);
+       }
+
+       /* Accumulated memory events */
+
+       seq_printf(m, "pgfault %lu\n",
+                  tree_events(memcg, MEM_CGROUP_EVENTS_PGFAULT));
+       seq_printf(m, "pgmajfault %lu\n",
+                  tree_events(memcg, MEM_CGROUP_EVENTS_PGMAJFAULT));
+
+       return 0;
+}
+
 static struct cftype memory_files[] = {
        {
                .name = "current",
@@ -5128,6 +5193,11 @@ static struct cftype memory_files[] = {
                .file_offset = offsetof(struct mem_cgroup, events_file),
                .seq_show = memory_events_show,
        },
+       {
+               .name = "stat",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = memory_stat_show,
+       },
        { }     /* terminate */
 };
 
@@ -5224,7 +5294,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
                if (page->mem_cgroup)
                        goto out;
 
-               if (do_memsw_account()) {
+               if (do_swap_account) {
                        swp_entry_t ent = { .val = page_private(page), };
                        unsigned short id = lookup_swap_cgroup_id(ent);
 
@@ -5459,7 +5529,8 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
 void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
 {
        struct mem_cgroup *memcg;
-       int isolated;
+       unsigned int nr_pages;
+       bool compound;
 
        VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
        VM_BUG_ON_PAGE(!PageLocked(newpage), newpage);
@@ -5479,11 +5550,21 @@ void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage)
        if (!memcg)
                return;
 
-       lock_page_lru(oldpage, &isolated);
-       oldpage->mem_cgroup = NULL;
-       unlock_page_lru(oldpage, isolated);
+       /* Force-charge the new page. The old one will be freed soon */
+       compound = PageTransHuge(newpage);
+       nr_pages = compound ? hpage_nr_pages(newpage) : 1;
+
+       page_counter_charge(&memcg->memory, nr_pages);
+       if (do_memsw_account())
+               page_counter_charge(&memcg->memsw, nr_pages);
+       css_get_many(&memcg->css, nr_pages);
 
        commit_charge(newpage, memcg, true);
+
+       local_irq_disable();
+       mem_cgroup_charge_statistics(memcg, newpage, compound, nr_pages);
+       memcg_check_events(memcg, newpage);
+       local_irq_enable();
 }
 
 DEFINE_STATIC_KEY_FALSE(memcg_sockets_enabled_key);
@@ -5554,6 +5635,8 @@ bool mem_cgroup_charge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
        if (in_softirq())
                gfp_mask = GFP_NOWAIT;
 
+       this_cpu_add(memcg->stat->count[MEMCG_SOCK], nr_pages);
+
        if (try_charge(memcg, gfp_mask, nr_pages) == 0)
                return true;
 
@@ -5573,6 +5656,8 @@ void mem_cgroup_uncharge_skmem(struct mem_cgroup *memcg, unsigned int nr_pages)
                return;
        }
 
+       this_cpu_sub(memcg->stat->count[MEMCG_SOCK], nr_pages);
+
        page_counter_uncharge(&memcg->memory, nr_pages);
        css_put_many(&memcg->css, nr_pages);
 }
@@ -5677,32 +5762,107 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
        memcg_check_events(memcg, page);
 }
 
+/*
+ * mem_cgroup_try_charge_swap - try charging a swap entry
+ * @page: page being added to swap
+ * @entry: swap entry to charge
+ *
+ * Try to charge @entry to the memcg that @page belongs to.
+ *
+ * Returns 0 on success, -ENOMEM on failure.
+ */
+int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
+{
+       struct mem_cgroup *memcg;
+       struct page_counter *counter;
+       unsigned short oldid;
+
+       if (!cgroup_subsys_on_dfl(memory_cgrp_subsys) || !do_swap_account)
+               return 0;
+
+       memcg = page->mem_cgroup;
+
+       /* Readahead page, never charged */
+       if (!memcg)
+               return 0;
+
+       if (!mem_cgroup_is_root(memcg) &&
+           !page_counter_try_charge(&memcg->swap, 1, &counter))
+               return -ENOMEM;
+
+       oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
+       VM_BUG_ON_PAGE(oldid, page);
+       mem_cgroup_swap_statistics(memcg, true);
+
+       css_get(&memcg->css);
+       return 0;
+}
+
 /**
  * mem_cgroup_uncharge_swap - uncharge a swap entry
  * @entry: swap entry to uncharge
  *
- * Drop the memsw charge associated with @entry.
+ * Drop the swap charge associated with @entry.
  */
 void mem_cgroup_uncharge_swap(swp_entry_t entry)
 {
        struct mem_cgroup *memcg;
        unsigned short id;
 
-       if (!do_memsw_account())
+       if (!do_swap_account)
                return;
 
        id = swap_cgroup_record(entry, 0);
        rcu_read_lock();
        memcg = mem_cgroup_from_id(id);
        if (memcg) {
-               if (!mem_cgroup_is_root(memcg))
-                       page_counter_uncharge(&memcg->memsw, 1);
+               if (!mem_cgroup_is_root(memcg)) {
+                       if (cgroup_subsys_on_dfl(memory_cgrp_subsys))
+                               page_counter_uncharge(&memcg->swap, 1);
+                       else
+                               page_counter_uncharge(&memcg->memsw, 1);
+               }
                mem_cgroup_swap_statistics(memcg, false);
                css_put(&memcg->css);
        }
        rcu_read_unlock();
 }
 
+long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg)
+{
+       long nr_swap_pages = get_nr_swap_pages();
+
+       if (!do_swap_account || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
+               return nr_swap_pages;
+       for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
+               nr_swap_pages = min_t(long, nr_swap_pages,
+                                     READ_ONCE(memcg->swap.limit) -
+                                     page_counter_read(&memcg->swap));
+       return nr_swap_pages;
+}
+
+bool mem_cgroup_swap_full(struct page *page)
+{
+       struct mem_cgroup *memcg;
+
+       VM_BUG_ON_PAGE(!PageLocked(page), page);
+
+       if (vm_swap_full())
+               return true;
+       if (!do_swap_account || !cgroup_subsys_on_dfl(memory_cgrp_subsys))
+               return false;
+
+       memcg = page->mem_cgroup;
+       if (!memcg)
+               return false;
+
+       for (; memcg != root_mem_cgroup; memcg = parent_mem_cgroup(memcg))
+               if (page_counter_read(&memcg->swap) * 2 >= memcg->swap.limit)
+                       return true;
+
+       return false;
+}
+
 /* for remember boot option*/
 #ifdef CONFIG_MEMCG_SWAP_ENABLED
 static int really_do_swap_account __initdata = 1;
@@ -5720,6 +5880,63 @@ static int __init enable_swap_account(char *s)
 }
 __setup("swapaccount=", enable_swap_account);
 
+static u64 swap_current_read(struct cgroup_subsys_state *css,
+                            struct cftype *cft)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+       return (u64)page_counter_read(&memcg->swap) * PAGE_SIZE;
+}
+
+static int swap_max_show(struct seq_file *m, void *v)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+       unsigned long max = READ_ONCE(memcg->swap.limit);
+
+       if (max == PAGE_COUNTER_MAX)
+               seq_puts(m, "max\n");
+       else
+               seq_printf(m, "%llu\n", (u64)max * PAGE_SIZE);
+
+       return 0;
+}
+
+static ssize_t swap_max_write(struct kernfs_open_file *of,
+                             char *buf, size_t nbytes, loff_t off)
+{
+       struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+       unsigned long max;
+       int err;
+
+       buf = strstrip(buf);
+       err = page_counter_memparse(buf, "max", &max);
+       if (err)
+               return err;
+
+       mutex_lock(&memcg_limit_mutex);
+       err = page_counter_limit(&memcg->swap, max);
+       mutex_unlock(&memcg_limit_mutex);
+       if (err)
+               return err;
+
+       return nbytes;
+}
+
+static struct cftype swap_files[] = {
+       {
+               .name = "swap.current",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .read_u64 = swap_current_read,
+       },
+       {
+               .name = "swap.max",
+               .flags = CFTYPE_NOT_ON_ROOT,
+               .seq_show = swap_max_show,
+               .write = swap_max_write,
+       },
+       { }     /* terminate */
+};
+
 static struct cftype memsw_cgroup_files[] = {
        {
                .name = "memsw.usage_in_bytes",
@@ -5751,6 +5968,8 @@ static int __init mem_cgroup_swap_init(void)
 {
        if (!mem_cgroup_disabled() && really_do_swap_account) {
                do_swap_account = 1;
+               WARN_ON(cgroup_add_dfl_cftypes(&memory_cgrp_subsys,
+                                              swap_files));
                WARN_ON(cgroup_add_legacy_cftypes(&memory_cgrp_subsys,
                                                  memsw_cgroup_files));
        }
This page took 0.029639 seconds and 5 git commands to generate.