struct list_head *src = &lruvec->lists[lru];
unsigned long nr_taken = 0;
unsigned long nr_zone_taken[MAX_NR_ZONES] = { 0 };
+ unsigned long nr_skipped[MAX_NR_ZONES] = { 0, };
unsigned long scan, nr_pages;
LIST_HEAD(pages_skipped);
if (page_zonenum(page) > sc->reclaim_idx) {
list_move(&page->lru, &pages_skipped);
+ nr_skipped[page_zonenum(page)]++;
continue;
}
* scanning would soon rescan the same pages to skip and put the
* system at risk of premature OOM.
*/
- if (!list_empty(&pages_skipped))
+ if (!list_empty(&pages_skipped)) {
+ int zid;
+
list_splice(&pages_skipped, src);
+ for (zid = 0; zid < MAX_NR_ZONES; zid++) {
+ if (!nr_skipped[zid])
+ continue;
+
+ __count_zid_vm_events(PGSCAN_SKIP, zid, nr_skipped[zid]);
+ }
+ }
*nr_scanned = scan;
- trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
+ trace_mm_vmscan_lru_isolate(sc->reclaim_idx, sc->order, nr_to_scan, scan,
nr_taken, mode, is_file_lru(lru));
for (scan = 0; scan < MAX_NR_ZONES; scan++) {
nr_pages = nr_zone_taken[scan];
return true;
}
-static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc,
- enum zone_type classzone_idx)
+static bool shrink_node(pg_data_t *pgdat, struct scan_control *sc)
{
struct reclaim_state *reclaim_state = current->reclaim_state;
unsigned long nr_reclaimed, nr_scanned;
* Returns true if compaction should go ahead for a high-order request, or
* the high-order allocation would succeed without compaction.
*/
-static inline bool compaction_ready(struct zone *zone, int order, int classzone_idx)
+static inline bool compaction_ready(struct zone *zone, struct scan_control *sc)
{
unsigned long watermark;
bool watermark_ok;
* there is a buffer of free pages available to give compaction
* a reasonable chance of completing and allocating the page
*/
- watermark = high_wmark_pages(zone) + (2UL << order);
- watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, classzone_idx);
+ watermark = high_wmark_pages(zone) + (2UL << sc->order);
+ watermark_ok = zone_watermark_ok_safe(zone, 0, watermark, sc->reclaim_idx);
/*
* If compaction is deferred, reclaim up to a point where
* compaction will have a chance of success when re-enabled
*/
- if (compaction_deferred(zone, order))
+ if (compaction_deferred(zone, sc->order))
return watermark_ok;
/*
* If compaction is not ready to start and allocation is not likely
* to succeed without it, then keep reclaiming.
*/
- if (compaction_suitable(zone, order, 0, classzone_idx) == COMPACT_SKIPPED)
+ if (compaction_suitable(zone, sc->order, 0, sc->reclaim_idx) == COMPACT_SKIPPED)
return false;
return watermark_ok;
unsigned long nr_soft_reclaimed;
unsigned long nr_soft_scanned;
gfp_t orig_mask;
- enum zone_type classzone_idx;
pg_data_t *last_pgdat = NULL;
/*
orig_mask = sc->gfp_mask;
if (buffer_heads_over_limit) {
sc->gfp_mask |= __GFP_HIGHMEM;
- sc->reclaim_idx = classzone_idx = gfp_zone(sc->gfp_mask);
+ sc->reclaim_idx = gfp_zone(sc->gfp_mask);
}
for_each_zone_zonelist_nodemask(zone, z, zonelist,
if (!populated_zone(zone))
continue;
- /*
- * Note that reclaim_idx does not change as it is the highest
- * zone reclaimed from which for empty zones is a no-op but
- * classzone_idx is used by shrink_node to test if the slabs
- * should be shrunk on a given node.
- */
- classzone_idx = sc->reclaim_idx;
- while (!populated_zone(zone->zone_pgdat->node_zones +
- classzone_idx))
- classzone_idx--;
-
/*
* Take care memory controller reclaiming has small influence
* to global LRU.
*/
if (IS_ENABLED(CONFIG_COMPACTION) &&
sc->order > PAGE_ALLOC_COSTLY_ORDER &&
- zonelist_zone_idx(z) <= classzone_idx &&
- compaction_ready(zone, sc->order, classzone_idx)) {
+ compaction_ready(zone, sc)) {
sc->compaction_ready = true;
continue;
}
if (zone->zone_pgdat == last_pgdat)
continue;
last_pgdat = zone->zone_pgdat;
- shrink_node(zone->zone_pgdat, sc, classzone_idx);
+ shrink_node(zone->zone_pgdat, sc);
}
/*
delayacct_freepages_start();
if (global_reclaim(sc))
- count_vm_event(ALLOCSTALL);
+ __count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);
do {
vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
trace_mm_vmscan_direct_reclaim_begin(order,
sc.may_writepage,
- gfp_mask);
+ gfp_mask,
+ sc.reclaim_idx);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
trace_mm_vmscan_memcg_softlimit_reclaim_begin(sc.order,
sc.may_writepage,
- sc.gfp_mask);
+ sc.gfp_mask,
+ sc.reclaim_idx);
/*
* NOTE: Although we can get the priority field, using it
trace_mm_vmscan_memcg_reclaim_begin(0,
sc.may_writepage,
- sc.gfp_mask);
+ sc.gfp_mask,
+ sc.reclaim_idx);
nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
*
* Returns true if kswapd is ready to sleep
*/
-static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
- int classzone_idx)
+static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, int classzone_idx)
{
int i;
- /* If a direct reclaimer woke kswapd within HZ/10, it's premature */
- if (remaining)
- return false;
-
/*
* The throttled processes are normally woken up in balance_pgdat() as
* soon as pfmemalloc_watermark_ok() is true. But there is a potential
* This is used to determine if the scanning priority needs to be raised.
*/
static bool kswapd_shrink_node(pg_data_t *pgdat,
- int classzone_idx,
struct scan_control *sc)
{
struct zone *zone;
/* Reclaim a number of pages proportional to the number of zones */
sc->nr_to_reclaim = 0;
- for (z = 0; z <= classzone_idx; z++) {
+ for (z = 0; z <= sc->reclaim_idx; z++) {
zone = pgdat->node_zones + z;
if (!populated_zone(zone))
continue;
* Historically care was taken to put equal pressure on all zones but
* now pressure is applied based on node LRU order.
*/
- shrink_node(pgdat, sc, classzone_idx);
+ shrink_node(pgdat, sc);
/*
* Fragmentation may mean that the system cannot be rebalanced for
.may_writepage = !laptop_mode,
.may_unmap = 1,
.may_swap = 1,
- .reclaim_idx = classzone_idx,
};
count_vm_event(PAGEOUTRUN);
bool raise_priority = true;
sc.nr_reclaimed = 0;
+ sc.reclaim_idx = classzone_idx;
/*
- * If the number of buffer_heads in the machine exceeds the
- * maximum allowed level then reclaim from all zones. This is
- * not specific to highmem as highmem may not exist but it is
- * it is expected that buffer_heads are stripped in writeback.
+ * If the number of buffer_heads exceeds the maximum allowed
+ * then consider reclaiming from all zones. This has a dual
+ * purpose -- on 64-bit systems it is expected that
+ * buffer_heads are stripped during active rotation. On 32-bit
+ * systems, highmem pages can pin lowmem memory and shrinking
+ * buffers can relieve lowmem pressure. Reclaim may still not
+ * go ahead if all eligible zones for the original allocation
+ * request are balanced to avoid excessive reclaim from kswapd.
*/
if (buffer_heads_over_limit) {
for (i = MAX_NR_ZONES - 1; i >= 0; i--) {
if (!populated_zone(zone))
continue;
- classzone_idx = i;
+ sc.reclaim_idx = i;
break;
}
}
* Scanning from low to high zone would allow congestion to be
* cleared during a very small window when a small low
* zone was balanced even under extreme pressure when the
- * overall node may be congested.
+ * overall node may be congested. Note that sc.reclaim_idx
+ * is not used as buffer_heads_over_limit may have adjusted
+ * it.
*/
for (i = classzone_idx; i >= 0; i--) {
zone = pgdat->node_zones + i;
* enough pages are already being scanned that that high
* watermark would be met at 100% efficiency.
*/
- if (kswapd_shrink_node(pgdat, classzone_idx, &sc))
+ if (kswapd_shrink_node(pgdat, &sc))
raise_priority = false;
/*
prepare_to_wait(&pgdat->kswapd_wait, &wait, TASK_INTERRUPTIBLE);
/* Try to sleep for a short interval */
- if (prepare_kswapd_sleep(pgdat, reclaim_order, remaining, classzone_idx)) {
+ if (prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) {
/*
* Compaction records what page blocks it recently failed to
* isolate pages from and skips them in the future scanning.
* After a short sleep, check if it was a premature sleep. If not, then
* go fully to sleep until explicitly woken up.
*/
- if (prepare_kswapd_sleep(pgdat, reclaim_order, remaining, classzone_idx)) {
+ if (!remaining &&
+ prepare_kswapd_sleep(pgdat, reclaim_order, classzone_idx)) {
trace_mm_vmscan_kswapd_sleep(pgdat->node_id);
/*
* but kcompactd is woken to compact for the original
* request (alloc_order).
*/
- trace_mm_vmscan_kswapd_wake(pgdat->node_id, alloc_order);
+ trace_mm_vmscan_kswapd_wake(pgdat->node_id, classzone_idx,
+ alloc_order);
reclaim_order = balance_pgdat(pgdat, alloc_order, classzone_idx);
if (reclaim_order < alloc_order)
goto kswapd_try_sleep;
* priorities until we have enough memory freed.
*/
do {
- shrink_node(pgdat, &sc, classzone_idx);
+ shrink_node(pgdat, &sc);
} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
}