cma: fix watermark checking

[deliverable/linux.git] / mm / page_alloc.c
diff --git a/mm/page_alloc.c b/mm/page_alloc.c

index c13ea7538891d85988b029236f11e233632b58ec..f2c7cc6a3039efa0178017d6483c8f0f69d854d3 100644 (file)
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -558,7 +558,8 @@ static inline void __free_one_page(struct page *page,
                 if (page_is_guard(buddy)) {
                         clear_page_guard_flag(buddy);
                         set_page_private(page, 0);
-                       __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+                       __mod_zone_freepage_state(zone, 1 << order,
+                                                 migratetype);
                 } else {
                         list_del(&buddy->lru);
                         zone->free_area[order].nr_free--;
@@ -668,12 +669,17 @@ static void free_pcppages_bulk(struct zone *zone, int count,
                         batch_free = to_free;
  
                 do {
+                       int mt; /* migratetype of the to-be-freed page */
+
                         page = list_entry(list->prev, struct page, lru);
                         /* must delete as __free_one_page list manipulates */
                         list_del(&page->lru);
+                       mt = page_private(page);
                         /* MIGRATE_MOVABLE list may include MIGRATE_RESERVEs */
-                       __free_one_page(page, zone, 0, page_private(page));
-                       trace_mm_page_pcpu_drain(page, 0, page_private(page));
+                       __free_one_page(page, zone, 0, mt);
+                       trace_mm_page_pcpu_drain(page, 0, mt);
+                       if (is_migrate_cma(mt))
+                               __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, 1);
                 } while (--to_free && --batch_free && !list_empty(list));
         }
         __mod_zone_page_state(zone, NR_FREE_PAGES, count);
@@ -688,7 +694,8 @@ static void free_one_page(struct zone *zone, struct page *page, int order,
         zone->pages_scanned = 0;
  
         __free_one_page(page, zone, order, migratetype);
-       __mod_zone_page_state(zone, NR_FREE_PAGES, 1 << order);
+       if (unlikely(migratetype != MIGRATE_ISOLATE))
+               __mod_zone_freepage_state(zone, 1 << order, migratetype);
         spin_unlock(&zone->lock);
  }
  
@@ -811,7 +818,8 @@ static inline void expand(struct zone *zone, struct page *page,
                         set_page_guard_flag(&page[size]);
                         set_page_private(&page[size], high);
                         /* Guard pages are not available for any usage */
-                       __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << high));
+                       __mod_zone_freepage_state(zone, -(1 << high),
+                                                 migratetype);
                         continue;
                 }
  #endif
@@ -1137,6 +1145,9 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order,
                 }
                 set_page_private(page, mt);
                 list = &page->lru;
+               if (is_migrate_cma(mt))
+                       __mod_zone_page_state(zone, NR_FREE_CMA_PAGES,
+                                             -(1 << order));
         }
         __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order));
         spin_unlock(&zone->lock);
@@ -1380,20 +1391,16 @@ void split_page(struct page *page, unsigned int order)
  }
  
  /*
- * Similar to split_page except the page is already free. As this is only
- * being used for migration, the migratetype of the block also changes.
- * As this is called with interrupts disabled, the caller is responsible
- * for calling arch_alloc_page() and kernel_map_page() after interrupts
- * are enabled.
- *
- * Note: this is probably too low level an operation for use in drivers.
- * Please consult with lkml before using this in your driver.
+ * Similar to the split_page family of functions except that the page
+ * required at the given order and being isolated now to prevent races
+ * with parallel allocators
   */
-int split_free_page(struct page *page)
+int capture_free_page(struct page *page, int alloc_order, int migratetype)
  {
         unsigned int order;
         unsigned long watermark;
         struct zone *zone;
+       int mt;
  
         BUG_ON(!PageBuddy(page));
  
@@ -1409,12 +1416,16 @@ int split_free_page(struct page *page)
         list_del(&page->lru);
         zone->free_area[order].nr_free--;
         rmv_page_order(page);
-       __mod_zone_page_state(zone, NR_FREE_PAGES, -(1UL << order));
  
-       /* Split into individual pages */
-       set_page_refcounted(page);
-       split_page(page, order);
+       mt = get_pageblock_migratetype(page);
+       if (unlikely(mt != MIGRATE_ISOLATE))
+               __mod_zone_freepage_state(zone, -(1UL << order), mt);
  
+       if (alloc_order != order)
+               expand(zone, page, alloc_order, order,
+                       &zone->free_area[order], migratetype);
+
+       /* Set the pageblock if the captured page is at least a pageblock */
         if (order >= pageblock_order - 1) {
                 struct page *endpage = page + (1 << order) - 1;
                 for (; page < endpage; page += pageblock_nr_pages) {
@@ -1425,7 +1436,35 @@ int split_free_page(struct page *page)
                 }
         }
  
-       return 1 << order;
+       return 1UL << order;
+}
+
+/*
+ * Similar to split_page except the page is already free. As this is only
+ * being used for migration, the migratetype of the block also changes.
+ * As this is called with interrupts disabled, the caller is responsible
+ * for calling arch_alloc_page() and kernel_map_page() after interrupts
+ * are enabled.
+ *
+ * Note: this is probably too low level an operation for use in drivers.
+ * Please consult with lkml before using this in your driver.
+ */
+int split_free_page(struct page *page)
+{
+       unsigned int order;
+       int nr_pages;
+
+       BUG_ON(!PageBuddy(page));
+       order = page_order(page);
+
+       nr_pages = capture_free_page(page, order, 0);
+       if (!nr_pages)
+               return 0;
+
+       /* Split into individual pages */
+       set_page_refcounted(page);
+       split_page(page, order);
+       return nr_pages;
  }
  
  /*
@@ -1484,7 +1523,8 @@ again:
                 spin_unlock(&zone->lock);
                 if (!page)
                         goto failed;
-               __mod_zone_page_state(zone, NR_FREE_PAGES, -(1 << order));
+               __mod_zone_freepage_state(zone, -(1 << order),
+                                         get_pageblock_migratetype(page));
         }
  
         __count_zone_vm_events(PGALLOC, zone, 1 << order);
@@ -1501,19 +1541,6 @@ failed:
         return NULL;
  }
  
-/* The ALLOC_WMARK bits are used as an index to zone->watermark */
-#define ALLOC_WMARK_MIN                WMARK_MIN
-#define ALLOC_WMARK_LOW                WMARK_LOW
-#define ALLOC_WMARK_HIGH       WMARK_HIGH
-#define ALLOC_NO_WATERMARKS    0x04 /* don't check watermarks at all */
-
-/* Mask to get the watermark bits */
-#define ALLOC_WMARK_MASK       (ALLOC_NO_WATERMARKS-1)
-
-#define ALLOC_HARDER           0x10 /* try to alloc harder */
-#define ALLOC_HIGH             0x20 /* __GFP_HIGH set */
-#define ALLOC_CPUSET           0x40 /* check for correct cpuset */
-
  #ifdef CONFIG_FAIL_PAGE_ALLOC
  
  static struct {
@@ -1608,7 +1635,11 @@ static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
                 min -= min / 2;
         if (alloc_flags & ALLOC_HARDER)
                 min -= min / 4;
-
+#ifdef CONFIG_CMA
+       /* If allocation can't use CMA areas don't use free CMA pages */
+       if (!(alloc_flags & ALLOC_CMA))
+               free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
+#endif
         if (free_pages <= min + lowmem_reserve)
                 return false;
         for (o = 0; o < order; o++) {
@@ -2105,7 +2136,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         bool *contended_compaction, bool *deferred_compaction,
         unsigned long *did_some_progress)
  {
-       struct page *page;
+       struct page *page = NULL;
  
         if (!order)
                 return NULL;
@@ -2118,10 +2149,16 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
         current->flags |= PF_MEMALLOC;
         *did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
                                                 nodemask, sync_migration,
-                                               contended_compaction);
+                                               contended_compaction, &page);
         current->flags &= ~PF_MEMALLOC;
-       if (*did_some_progress != COMPACT_SKIPPED) {
  
+       /* If compaction captured a page, prep and use it */
+       if (page) {
+               prep_new_page(page, order, gfp_mask);
+               goto got_page;
+       }
+
+       if (*did_some_progress != COMPACT_SKIPPED) {
                 /* Page migration frees to the PCP lists but we want merging */
                 drain_pages(get_cpu());
                 put_cpu();
@@ -2131,6 +2168,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
                                 alloc_flags & ~ALLOC_NO_WATERMARKS,
                                 preferred_zone, migratetype);
                 if (page) {
+got_page:
                         preferred_zone->compact_considered = 0;
                         preferred_zone->compact_defer_shift = 0;
                         if (order >= preferred_zone->compact_order_failed)
@@ -2315,7 +2353,10 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
                                  unlikely(test_thread_flag(TIF_MEMDIE))))
                         alloc_flags |= ALLOC_NO_WATERMARKS;
         }
-
+#ifdef CONFIG_CMA
+       if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+               alloc_flags |= ALLOC_CMA;
+#endif
         return alloc_flags;
  }
  
@@ -2362,9 +2403,8 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
                 goto nopage;
  
  restart:
-       if (!(gfp_mask & __GFP_NO_KSWAPD))
-               wake_all_kswapd(order, zonelist, high_zoneidx,
-                                               zone_idx(preferred_zone));
+       wake_all_kswapd(order, zonelist, high_zoneidx,
+                                       zone_idx(preferred_zone));
  
         /*
          * OK, we're below the kswapd watermark and have kicked background
@@ -2441,7 +2481,7 @@ rebalance:
          * system then fail the allocation instead of entering direct reclaim.
          */
         if ((deferred_compaction || contended_compaction) &&
-                                               (gfp_mask & __GFP_NO_KSWAPD))
+           (gfp_mask & (__GFP_MOVABLE|__GFP_REPEAT)) == __GFP_MOVABLE)
                 goto nopage;
  
         /* Try direct reclaim and then allocating */
@@ -2541,6 +2581,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
         struct page *page = NULL;
         int migratetype = allocflags_to_migratetype(gfp_mask);
         unsigned int cpuset_mems_cookie;
+       int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
  
         gfp_mask &= gfp_allowed_mask;
  
@@ -2569,9 +2610,13 @@ retry_cpuset:
         if (!preferred_zone)
                 goto out;
  
+#ifdef CONFIG_CMA
+       if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+               alloc_flags |= ALLOC_CMA;
+#endif
         /* First allocation attempt */
         page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
-                       zonelist, high_zoneidx, ALLOC_WMARK_LOW|ALLOC_CPUSET,
+                       zonelist, high_zoneidx, alloc_flags,
                         preferred_zone, migratetype);
         if (unlikely(!page))
                 page = __alloc_pages_slowpath(gfp_mask, order,
@@ -2852,7 +2897,8 @@ void show_free_areas(unsigned int filter)
                 " unevictable:%lu"
                 " dirty:%lu writeback:%lu unstable:%lu\n"
                 " free:%lu slab_reclaimable:%lu slab_unreclaimable:%lu\n"
-               " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n",
+               " mapped:%lu shmem:%lu pagetables:%lu bounce:%lu\n"
+               " free_cma:%lu\n",
                 global_page_state(NR_ACTIVE_ANON),
                 global_page_state(NR_INACTIVE_ANON),
                 global_page_state(NR_ISOLATED_ANON),
@@ -2869,7 +2915,8 @@ void show_free_areas(unsigned int filter)
                 global_page_state(NR_FILE_MAPPED),
                 global_page_state(NR_SHMEM),
                 global_page_state(NR_PAGETABLE),
-               global_page_state(NR_BOUNCE));
+               global_page_state(NR_BOUNCE),
+               global_page_state(NR_FREE_CMA_PAGES));
  
         for_each_populated_zone(zone) {
                 int i;
@@ -2901,6 +2948,7 @@ void show_free_areas(unsigned int filter)
                         " pagetables:%lukB"
                         " unstable:%lukB"
                         " bounce:%lukB"
+                       " free_cma:%lukB"
                         " writeback_tmp:%lukB"
                         " pages_scanned:%lu"
                         " all_unreclaimable? %s"
@@ -2930,6 +2978,7 @@ void show_free_areas(unsigned int filter)
                         K(zone_page_state(zone, NR_PAGETABLE)),
                         K(zone_page_state(zone, NR_UNSTABLE_NFS)),
                         K(zone_page_state(zone, NR_BOUNCE)),
+                       K(zone_page_state(zone, NR_FREE_CMA_PAGES)),
                         K(zone_page_state(zone, NR_WRITEBACK_TEMP)),
                         zone->pages_scanned,
                         (zone->all_unreclaimable ? "yes" : "no")
@@ -5670,6 +5719,8 @@ static int __alloc_contig_migrate_range(unsigned long start, unsigned long end)
                         break;
                 }
  
+               reclaim_clean_pages_from_list(cc.zone, &cc.migratepages);
+
                 ret = migrate_pages(&cc.migratepages,
                                     __alloc_contig_migrate_alloc,
                                     0, false, MIGRATE_SYNC);