taint: add explicit flag to show whether lock dep is still OK.

[deliverable/linux.git] / mm / slab.c
diff --git a/mm/slab.c b/mm/slab.c

index e351acea60262ef8e85bcfa5027cd35f80f7c843..856e4a192d25c73954771b1b0893458f0d9209bb 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -87,7 +87,6 @@
   */
  
  #include       <linux/slab.h>
-#include       "slab.h"
  #include       <linux/mm.h>
  #include       <linux/poison.h>
  #include       <linux/swap.h>
@@ -128,6 +127,8 @@
  
  #include       "internal.h"
  
+#include       "slab.h"
+
  /*
   * DEBUG       - 1 for kmem_cache_create() to honour; SLAB_RED_ZONE & SLAB_POISON.
   *               0 for faster, smaller code (especially in the critical paths).
@@ -547,8 +548,6 @@ static struct cache_names __initdata cache_names[] = {
  #undef CACHE
  };
  
-static struct arraycache_init initarray_cache __initdata =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
  static struct arraycache_init initarray_generic =
      { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
  
@@ -643,6 +642,26 @@ static void init_node_lock_keys(int q)
         }
  }
  
+static void on_slab_lock_classes_node(struct kmem_cache *cachep, int q)
+{
+       struct kmem_list3 *l3;
+       l3 = cachep->nodelists[q];
+       if (!l3)
+               return;
+
+       slab_set_lock_classes(cachep, &on_slab_l3_key,
+                       &on_slab_alc_key, q);
+}
+
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+       int node;
+
+       VM_BUG_ON(OFF_SLAB(cachep));
+       for_each_node(node)
+               on_slab_lock_classes_node(cachep, node);
+}
+
  static inline void init_lock_keys(void)
  {
         int node;
@@ -659,6 +678,14 @@ static inline void init_lock_keys(void)
  {
  }
  
+static inline void on_slab_lock_classes(struct kmem_cache *cachep)
+{
+}
+
+static inline void on_slab_lock_classes_node(struct kmem_cache *cachep, int node)
+{
+}
+
  static void slab_set_debugobj_lock_classes_node(struct kmem_cache *cachep, int node)
  {
  }
@@ -785,7 +812,7 @@ static void __slab_error(const char *function, struct kmem_cache *cachep,
         printk(KERN_ERR "slab error in %s(): cache `%s': %s\n",
                function, cachep->name, msg);
         dump_stack();
-       add_taint(TAINT_BAD_PAGE);
+       add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
  }
  #endif
  
@@ -1387,6 +1414,9 @@ static int __cpuinit cpuup_prepare(long cpu)
                 free_alien_cache(alien);
                 if (cachep->flags & SLAB_DEBUG_OBJECTS)
                         slab_set_debugobj_lock_classes_node(cachep, node);
+               else if (!OFF_SLAB(cachep) &&
+                        !(cachep->flags & SLAB_DESTROY_BY_RCU))
+                       on_slab_lock_classes_node(cachep, node);
         }
         init_node_lock_keys(node);
  
@@ -1572,12 +1602,9 @@ static void setup_nodelists_pointer(struct kmem_cache *cachep)
   */
  void __init kmem_cache_init(void)
  {
-       size_t left_over;
         struct cache_sizes *sizes;
         struct cache_names *names;
         int i;
-       int order;
-       int node;
  
         kmem_cache = &kmem_cache_boot;
         setup_nodelists_pointer(kmem_cache);
@@ -1618,36 +1645,16 @@ void __init kmem_cache_init(void)
          * 6) Resize the head arrays of the kmalloc caches to their final sizes.
          */
  
-       node = numa_mem_id();
-
         /* 1) create the kmem_cache */
-       INIT_LIST_HEAD(&slab_caches);
-       list_add(&kmem_cache->list, &slab_caches);
-       kmem_cache->colour_off = cache_line_size();
-       kmem_cache->array[smp_processor_id()] = &initarray_cache.cache;
  
         /*
          * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
          */
-       kmem_cache->size = offsetof(struct kmem_cache, array[nr_cpu_ids]) +
-                                 nr_node_ids * sizeof(struct kmem_list3 *);
-       kmem_cache->object_size = kmem_cache->size;
-       kmem_cache->size = ALIGN(kmem_cache->object_size,
-                                       cache_line_size());
-       kmem_cache->reciprocal_buffer_size =
-               reciprocal_value(kmem_cache->size);
-
-       for (order = 0; order < MAX_ORDER; order++) {
-               cache_estimate(order, kmem_cache->size,
-                       cache_line_size(), 0, &left_over, &kmem_cache->num);
-               if (kmem_cache->num)
-                       break;
-       }
-       BUG_ON(!kmem_cache->num);
-       kmem_cache->gfporder = order;
-       kmem_cache->colour = left_over / kmem_cache->colour_off;
-       kmem_cache->slab_size = ALIGN(kmem_cache->num * sizeof(kmem_bufctl_t) +
-                                     sizeof(struct slab), cache_line_size());
+       create_boot_cache(kmem_cache, "kmem_cache",
+               offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+                                 nr_node_ids * sizeof(struct kmem_list3 *),
+                                 SLAB_HWCACHE_ALIGN);
+       list_add(&kmem_cache->list, &slab_caches);
  
         /* 2+3) create the kmalloc caches */
         sizes = malloc_sizes;
@@ -1695,7 +1702,6 @@ void __init kmem_cache_init(void)
  
                 ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
  
-               BUG_ON(cpu_cache_get(kmem_cache) != &initarray_cache.cache);
                 memcpy(ptr, cpu_cache_get(kmem_cache),
                        sizeof(struct arraycache_init));
                 /*
@@ -1889,6 +1895,7 @@ static void *kmem_getpages(struct kmem_cache *cachep, gfp_t flags, int nodeid)
                 if (page->pfmemalloc)
                         SetPageSlabPfmemalloc(page + i);
         }
+       memcg_bind_pages(cachep, cachep->gfporder);
  
         if (kmemcheck_enabled && !(cachep->flags & SLAB_NOTRACK)) {
                 kmemcheck_alloc_shadow(page, cachep->gfporder, flags, nodeid);
@@ -1925,9 +1932,11 @@ static void kmem_freepages(struct kmem_cache *cachep, void *addr)
                 __ClearPageSlab(page);
                 page++;
         }
+
+       memcg_release_pages(cachep, cachep->gfporder);
         if (current->reclaim_state)
                 current->reclaim_state->reclaimed_slab += nr_freed;
-       free_pages((unsigned long)addr, cachep->gfporder);
+       free_memcg_kmem_pages((unsigned long)addr, cachep->gfporder);
  }
  
  static void kmem_rcu_free(struct rcu_head *head)
@@ -2250,7 +2259,15 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  
         if (slab_state == DOWN) {
                 /*
-                * Note: the first kmem_cache_create must create the cache
+                * Note: Creation of first cache (kmem_cache).
+                * The setup_list3s is taken care
+                * of by the caller of __kmem_cache_create
+                */
+               cachep->array[smp_processor_id()] = &initarray_generic.cache;
+               slab_state = PARTIAL;
+       } else if (slab_state == PARTIAL) {
+               /*
+                * Note: the second kmem_cache_create must create the cache
                  * that's used by kmalloc(24), otherwise the creation of
                  * further caches will BUG().
                  */
@@ -2258,7 +2275,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
  
                 /*
                  * If the cache that's used by kmalloc(sizeof(kmem_list3)) is
-                * the first cache, then we need to set up all its list3s,
+                * the second cache, then we need to set up all its list3s,
                  * otherwise the creation of further caches will BUG().
                  */
                 set_up_list3s(cachep, SIZE_AC);
@@ -2267,6 +2284,7 @@ static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
                 else
                         slab_state = PARTIAL_ARRAYCACHE;
         } else {
+               /* Remaining boot caches */
                 cachep->array[smp_processor_id()] =
                         kmalloc(sizeof(struct arraycache_init), gfp);
  
@@ -2354,22 +2372,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 size &= ~(BYTES_PER_WORD - 1);
         }
  
-       /* calculate the final buffer alignment: */
-
-       /* 1) arch recommendation: can be overridden for debug */
-       if (flags & SLAB_HWCACHE_ALIGN) {
-               /*
-                * Default alignment: as specified by the arch code.  Except if
-                * an object is really small, then squeeze multiple objects into
-                * one cacheline.
-                */
-               ralign = cache_line_size();
-               while (size <= ralign / 2)
-                       ralign /= 2;
-       } else {
-               ralign = BYTES_PER_WORD;
-       }
-
         /*
          * Redzoning and user store require word alignment or possibly larger.
          * Note this will be overridden by architecture or caller mandated
@@ -2386,10 +2388,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 size &= ~(REDZONE_ALIGN - 1);
         }
  
-       /* 2) arch mandated alignment */
-       if (ralign < ARCH_SLAB_MINALIGN) {
-               ralign = ARCH_SLAB_MINALIGN;
-       }
         /* 3) caller mandated alignment */
         if (ralign < cachep->align) {
                 ralign = cachep->align;
@@ -2526,7 +2524,8 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
                 WARN_ON_ONCE(flags & SLAB_DESTROY_BY_RCU);
  
                 slab_set_debugobj_lock_classes(cachep);
-       }
+       } else if (!OFF_SLAB(cachep) && !(flags & SLAB_DESTROY_BY_RCU))
+               on_slab_lock_classes(cachep);
  
         return 0;
  }
@@ -3490,6 +3489,8 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
         if (slab_should_failslab(cachep, flags))
                 return NULL;
  
+       cachep = memcg_kmem_get_cache(cachep, flags);
+
         cache_alloc_debugcheck_before(cachep, flags);
         local_irq_save(save_flags);
  
@@ -3575,6 +3576,8 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
         if (slab_should_failslab(cachep, flags))
                 return NULL;
  
+       cachep = memcg_kmem_get_cache(cachep, flags);
+
         cache_alloc_debugcheck_before(cachep, flags);
         local_irq_save(save_flags);
         objp = __do_cache_alloc(cachep, flags);
@@ -3888,6 +3891,9 @@ EXPORT_SYMBOL(__kmalloc);
  void kmem_cache_free(struct kmem_cache *cachep, void *objp)
  {
         unsigned long flags;
+       cachep = cache_from_obj(cachep, objp);
+       if (!cachep)
+               return;
  
         local_irq_save(flags);
         debug_check_no_locks_freed(objp, cachep->object_size);
@@ -4035,7 +4041,7 @@ static void do_ccupdate_local(void *info)
  }
  
  /* Always called with the slab_mutex held */
-static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
                                 int batchcount, int shared, gfp_t gfp)
  {
         struct ccupdate_struct *new;
@@ -4078,12 +4084,49 @@ static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
         return alloc_kmemlist(cachep, gfp);
  }
  
+static int do_tune_cpucache(struct kmem_cache *cachep, int limit,
+                               int batchcount, int shared, gfp_t gfp)
+{
+       int ret;
+       struct kmem_cache *c = NULL;
+       int i = 0;
+
+       ret = __do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
+
+       if (slab_state < FULL)
+               return ret;
+
+       if ((ret < 0) || !is_root_cache(cachep))
+               return ret;
+
+       VM_BUG_ON(!mutex_is_locked(&slab_mutex));
+       for_each_memcg_cache_index(i) {
+               c = cache_from_memcg(cachep, i);
+               if (c)
+                       /* return value determined by the parent cache only */
+                       __do_tune_cpucache(c, limit, batchcount, shared, gfp);
+       }
+
+       return ret;
+}
+
  /* Called with slab_mutex held always */
  static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
  {
         int err;
-       int limit, shared;
+       int limit = 0;
+       int shared = 0;
+       int batchcount = 0;
+
+       if (!is_root_cache(cachep)) {
+               struct kmem_cache *root = memcg_root_cache(cachep);
+               limit = root->limit;
+               shared = root->shared;
+               batchcount = root->batchcount;
+       }
  
+       if (limit && shared && batchcount)
+               goto skip_setup;
         /*
          * The head array serves three purposes:
          * - create a LIFO ordering, i.e. return objects that are cache-warm
@@ -4125,7 +4168,9 @@ static int enable_cpucache(struct kmem_cache *cachep, gfp_t gfp)
         if (limit > 32)
                 limit = 32;
  #endif
-       err = do_tune_cpucache(cachep, limit, (limit + 1) / 2, shared, gfp);
+       batchcount = (limit + 1) / 2;
+skip_setup:
+       err = do_tune_cpucache(cachep, limit, batchcount, shared, gfp);
         if (err)
                 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
                        cachep->name, -err);