Merge branch 'for-3.1' into for-3.2

[deliverable/linux.git] / mm / memcontrol.c
diff --git a/mm/memcontrol.c b/mm/memcontrol.c

index 2f5534e1968ca65962142ca9d5292676c1c10d26..f4ec4e7ca4cd2b64f9cfb2ea8845eba90373e549 100644 (file)
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -35,7 +35,6 @@
  #include <linux/limits.h>
  #include <linux/mutex.h>
  #include <linux/rbtree.h>
-#include <linux/shmem_fs.h>
  #include <linux/slab.h>
  #include <linux/swap.h>
  #include <linux/swapops.h>
@@ -1108,6 +1107,21 @@ void mem_cgroup_move_lists(struct page *page,
         mem_cgroup_add_lru_list(page, to);
  }
  
+/*
+ * Checks whether given mem is same or in the root_mem's
+ * hierarchy subtree
+ */
+static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_mem,
+               struct mem_cgroup *mem)
+{
+       if (root_mem != mem) {
+               return (root_mem->use_hierarchy &&
+                       css_is_ancestor(&mem->css, &root_mem->css));
+       }
+
+       return true;
+}
+
  int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
  {
         int ret;
@@ -1127,10 +1141,7 @@ int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *mem)
          * enabled in "curr" and "curr" is a child of "mem" in *cgroup*
          * hierarchy(even if use_hierarchy is disabled in "mem").
          */
-       if (mem->use_hierarchy)
-               ret = css_is_ancestor(&curr->css, &mem->css);
-       else
-               ret = (curr == mem);
+       ret = mem_cgroup_same_or_subtree(mem, curr);
         css_put(&curr->css);
         return ret;
  }
@@ -1369,10 +1380,9 @@ static bool mem_cgroup_under_move(struct mem_cgroup *mem)
         to = mc.to;
         if (!from)
                 goto unlock;
-       if (from == mem || to == mem
-           || (mem->use_hierarchy && css_is_ancestor(&from->css, &mem->css))
-           || (mem->use_hierarchy && css_is_ancestor(&to->css, &mem->css)))
-               ret = true;
+
+       ret = mem_cgroup_same_or_subtree(mem, from)
+               || mem_cgroup_same_or_subtree(mem, to);
  unlock:
         spin_unlock(&mc.lock);
         return ret;
@@ -1915,25 +1925,20 @@ struct oom_wait_info {
  static int memcg_oom_wake_function(wait_queue_t *wait,
         unsigned mode, int sync, void *arg)
  {
-       struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg;
+       struct mem_cgroup *wake_mem = (struct mem_cgroup *)arg,
+                         *oom_wait_mem;
         struct oom_wait_info *oom_wait_info;
  
         oom_wait_info = container_of(wait, struct oom_wait_info, wait);
+       oom_wait_mem = oom_wait_info->mem;
  
-       if (oom_wait_info->mem == wake_mem)
-               goto wakeup;
-       /* if no hierarchy, no match */
-       if (!oom_wait_info->mem->use_hierarchy || !wake_mem->use_hierarchy)
-               return 0;
         /*
          * Both of oom_wait_info->mem and wake_mem are stable under us.
          * Then we can use css_is_ancestor without taking care of RCU.
          */
-       if (!css_is_ancestor(&oom_wait_info->mem->css, &wake_mem->css) &&
-           !css_is_ancestor(&wake_mem->css, &oom_wait_info->mem->css))
+       if (!mem_cgroup_same_or_subtree(oom_wait_mem, wake_mem)
+                       && !mem_cgroup_same_or_subtree(wake_mem, oom_wait_mem))
                 return 0;
-
-wakeup:
         return autoremove_wake_function(wait, mode, sync, arg);
  }
  
@@ -2086,7 +2091,6 @@ struct memcg_stock_pcp {
  #define FLUSHING_CACHED_CHARGE (0)
  };
  static DEFINE_PER_CPU(struct memcg_stock_pcp, memcg_stock);
-static DEFINE_MUTEX(percpu_charge_mutex);
  
  /*
   * Try to consume stocked charge on this cpu. If success, one page is consumed
@@ -2154,19 +2158,14 @@ static void refill_stock(struct mem_cgroup *mem, unsigned int nr_pages)
  }
  
  /*
- * Tries to drain stocked charges in other cpus. This function is asynchronous
- * and just put a work per cpu for draining localy on each cpu. Caller can
- * expects some charges will be back to res_counter later but cannot wait for
- * it.
+ * Drains all per-CPU charge caches for given root_mem resp. subtree
+ * of the hierarchy under it. sync flag says whether we should block
+ * until the work is done.
   */
-static void drain_all_stock_async(struct mem_cgroup *root_mem)
+static void drain_all_stock(struct mem_cgroup *root_mem, bool sync)
  {
         int cpu, curcpu;
-       /*
-        * If someone calls draining, avoid adding more kworker runs.
-        */
-       if (!mutex_trylock(&percpu_charge_mutex))
-               return;
+
         /* Notify other cpus that system-wide "drain" is running */
         get_online_cpus();
         /*
@@ -2183,13 +2182,8 @@ static void drain_all_stock_async(struct mem_cgroup *root_mem)
                 mem = stock->cached;
                 if (!mem || !stock->nr_pages)
                         continue;
-               if (mem != root_mem) {
-                       if (!root_mem->use_hierarchy)
-                               continue;
-                       /* check whether "mem" is under tree of "root_mem" */
-                       if (!css_is_ancestor(&mem->css, &root_mem->css))
-                               continue;
-               }
+               if (!mem_cgroup_same_or_subtree(root_mem, mem))
+                       continue;
                 if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
                         if (cpu == curcpu)
                                 drain_local_stock(&stock->work);
@@ -2197,18 +2191,36 @@ static void drain_all_stock_async(struct mem_cgroup *root_mem)
                                 schedule_work_on(cpu, &stock->work);
                 }
         }
+
+       if (!sync)
+               goto out;
+
+       for_each_online_cpu(cpu) {
+               struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
+               if (mem_cgroup_same_or_subtree(root_mem, stock->cached) &&
+                               test_bit(FLUSHING_CACHED_CHARGE, &stock->flags))
+                       flush_work(&stock->work);
+       }
+out:
         put_online_cpus();
-       mutex_unlock(&percpu_charge_mutex);
-       /* We don't wait for flush_work */
+}
+
+/*
+ * Tries to drain stocked charges in other cpus. This function is asynchronous
+ * and just put a work per cpu for draining localy on each cpu. Caller can
+ * expects some charges will be back to res_counter later but cannot wait for
+ * it.
+ */
+static void drain_all_stock_async(struct mem_cgroup *root_mem)
+{
+       drain_all_stock(root_mem, false);
  }
  
  /* This is a synchronous drain interface. */
-static void drain_all_stock_sync(void)
+static void drain_all_stock_sync(struct mem_cgroup *root_mem)
  {
         /* called when force_empty is called */
-       mutex_lock(&percpu_charge_mutex);
-       schedule_on_each_cpu(drain_local_stock);
-       mutex_unlock(&percpu_charge_mutex);
+       drain_all_stock(root_mem, true);
  }
  
  /*
@@ -2860,30 +2872,6 @@ int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm,
                 return 0;
         if (PageCompound(page))
                 return 0;
-       /*
-        * Corner case handling. This is called from add_to_page_cache()
-        * in usual. But some FS (shmem) precharges this page before calling it
-        * and call add_to_page_cache() with GFP_NOWAIT.
-        *
-        * For GFP_NOWAIT case, the page may be pre-charged before calling
-        * add_to_page_cache(). (See shmem.c) check it here and avoid to call
-        * charge twice. (It works but has to pay a bit larger cost.)
-        * And when the page is SwapCache, it should take swap information
-        * into account. This is under lock_page() now.
-        */
-       if (!(gfp_mask & __GFP_WAIT)) {
-               struct page_cgroup *pc;
-
-               pc = lookup_page_cgroup(page);
-               if (!pc)
-                       return 0;
-               lock_page_cgroup(pc);
-               if (PageCgroupUsed(pc)) {
-                       unlock_page_cgroup(pc);
-                       return 0;
-               }
-               unlock_page_cgroup(pc);
-       }
  
         if (unlikely(!mm))
                 mm = &init_mm;
@@ -3473,31 +3461,6 @@ void mem_cgroup_end_migration(struct mem_cgroup *mem,
         cgroup_release_and_wakeup_rmdir(&mem->css);
  }
  
-/*
- * A call to try to shrink memory usage on charge failure at shmem's swapin.
- * Calling hierarchical_reclaim is not enough because we should update
- * last_oom_jiffies to prevent pagefault_out_of_memory from invoking global OOM.
- * Moreover considering hierarchy, we should reclaim from the mem_over_limit,
- * not from the memcg which this page would be charged to.
- * try_charge_swapin does all of these works properly.
- */
-int mem_cgroup_shmem_charge_fallback(struct page *page,
-                           struct mm_struct *mm,
-                           gfp_t gfp_mask)
-{
-       struct mem_cgroup *mem;
-       int ret;
-
-       if (mem_cgroup_disabled())
-               return 0;
-
-       ret = mem_cgroup_try_charge_swapin(mm, page, gfp_mask, &mem);
-       if (!ret)
-               mem_cgroup_cancel_charge_swapin(mem); /* it does !mem check */
-
-       return ret;
-}
-
  #ifdef CONFIG_DEBUG_VM
  static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
  {
@@ -3856,7 +3819,7 @@ move_account:
                         goto out;
                 /* This is for making all *used* pages to be on LRU. */
                 lru_add_drain_all();
-               drain_all_stock_sync();
+               drain_all_stock_sync(mem);
                 ret = 0;
                 mem_cgroup_start_move(mem);
                 for_each_node_state(node, N_HIGH_MEMORY) {
@@ -5317,15 +5280,17 @@ static struct page *mc_handle_file_pte(struct vm_area_struct *vma,
                 pgoff = pte_to_pgoff(ptent);
  
         /* page is moved even if it's not RSS of this task(page-faulted). */
-       if (!mapping_cap_swap_backed(mapping)) { /* normal file */
-               page = find_get_page(mapping, pgoff);
-       } else { /* shmem/tmpfs file. we should take account of swap too. */
-               swp_entry_t ent;
-               mem_cgroup_get_shmem_target(inode, pgoff, &page, &ent);
+       page = find_get_page(mapping, pgoff);
+
+#ifdef CONFIG_SWAP
+       /* shmem/tmpfs may report page out on swap: account for that too. */
+       if (radix_tree_exceptional_entry(page)) {
+               swp_entry_t swap = radix_to_swp_entry(page);
                 if (do_swap_account)
-                       entry->val = ent.val;
+                       *entry = swap;
+               page = find_get_page(&swapper_space, swap.val);
         }
-
+#endif
         return page;
  }