Commit | Line | Data |
---|---|---|
f6ac2354 CL |
1 | /* |
2 | * linux/mm/vmstat.c | |
3 | * | |
4 | * Manages VM statistics | |
5 | * Copyright (C) 1991, 1992, 1993, 1994 Linus Torvalds | |
2244b95a CL |
6 | * |
7 | * zoned VM statistics | |
8 | * Copyright (C) 2006 Silicon Graphics, Inc., | |
9 | * Christoph Lameter <christoph@lameter.com> | |
f6ac2354 CL |
10 | */ |
11 | ||
12 | #include <linux/config.h> | |
13 | #include <linux/mm.h> | |
2244b95a | 14 | #include <linux/module.h> |
f6ac2354 CL |
15 | |
16 | /* | |
17 | * Accumulate the page_state information across all CPUs. | |
18 | * The result is unavoidably approximate - it can change | |
19 | * during and after execution of this function. | |
20 | */ | |
21 | DEFINE_PER_CPU(struct page_state, page_states) = {0}; | |
22 | ||
f6ac2354 CL |
23 | static void __get_page_state(struct page_state *ret, int nr, cpumask_t *cpumask) |
24 | { | |
25 | unsigned cpu; | |
26 | ||
27 | memset(ret, 0, nr * sizeof(unsigned long)); | |
28 | cpus_and(*cpumask, *cpumask, cpu_online_map); | |
29 | ||
30 | for_each_cpu_mask(cpu, *cpumask) { | |
31 | unsigned long *in; | |
32 | unsigned long *out; | |
33 | unsigned off; | |
34 | unsigned next_cpu; | |
35 | ||
36 | in = (unsigned long *)&per_cpu(page_states, cpu); | |
37 | ||
38 | next_cpu = next_cpu(cpu, *cpumask); | |
39 | if (likely(next_cpu < NR_CPUS)) | |
40 | prefetch(&per_cpu(page_states, next_cpu)); | |
41 | ||
42 | out = (unsigned long *)ret; | |
43 | for (off = 0; off < nr; off++) | |
44 | *out++ += *in++; | |
45 | } | |
46 | } | |
47 | ||
48 | void get_page_state_node(struct page_state *ret, int node) | |
49 | { | |
50 | int nr; | |
51 | cpumask_t mask = node_to_cpumask(node); | |
52 | ||
53 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); | |
54 | nr /= sizeof(unsigned long); | |
55 | ||
56 | __get_page_state(ret, nr+1, &mask); | |
57 | } | |
58 | ||
59 | void get_page_state(struct page_state *ret) | |
60 | { | |
61 | int nr; | |
62 | cpumask_t mask = CPU_MASK_ALL; | |
63 | ||
64 | nr = offsetof(struct page_state, GET_PAGE_STATE_LAST); | |
65 | nr /= sizeof(unsigned long); | |
66 | ||
67 | __get_page_state(ret, nr + 1, &mask); | |
68 | } | |
69 | ||
70 | void get_full_page_state(struct page_state *ret) | |
71 | { | |
72 | cpumask_t mask = CPU_MASK_ALL; | |
73 | ||
74 | __get_page_state(ret, sizeof(*ret) / sizeof(unsigned long), &mask); | |
75 | } | |
76 | ||
77 | unsigned long read_page_state_offset(unsigned long offset) | |
78 | { | |
79 | unsigned long ret = 0; | |
80 | int cpu; | |
81 | ||
82 | for_each_online_cpu(cpu) { | |
83 | unsigned long in; | |
84 | ||
85 | in = (unsigned long)&per_cpu(page_states, cpu) + offset; | |
86 | ret += *((unsigned long *)in); | |
87 | } | |
88 | return ret; | |
89 | } | |
90 | ||
91 | void __mod_page_state_offset(unsigned long offset, unsigned long delta) | |
92 | { | |
93 | void *ptr; | |
94 | ||
95 | ptr = &__get_cpu_var(page_states); | |
96 | *(unsigned long *)(ptr + offset) += delta; | |
97 | } | |
98 | EXPORT_SYMBOL(__mod_page_state_offset); | |
99 | ||
100 | void mod_page_state_offset(unsigned long offset, unsigned long delta) | |
101 | { | |
102 | unsigned long flags; | |
103 | void *ptr; | |
104 | ||
105 | local_irq_save(flags); | |
106 | ptr = &__get_cpu_var(page_states); | |
107 | *(unsigned long *)(ptr + offset) += delta; | |
108 | local_irq_restore(flags); | |
109 | } | |
110 | EXPORT_SYMBOL(mod_page_state_offset); | |
111 | ||
112 | void __get_zone_counts(unsigned long *active, unsigned long *inactive, | |
113 | unsigned long *free, struct pglist_data *pgdat) | |
114 | { | |
115 | struct zone *zones = pgdat->node_zones; | |
116 | int i; | |
117 | ||
118 | *active = 0; | |
119 | *inactive = 0; | |
120 | *free = 0; | |
121 | for (i = 0; i < MAX_NR_ZONES; i++) { | |
122 | *active += zones[i].nr_active; | |
123 | *inactive += zones[i].nr_inactive; | |
124 | *free += zones[i].free_pages; | |
125 | } | |
126 | } | |
127 | ||
128 | void get_zone_counts(unsigned long *active, | |
129 | unsigned long *inactive, unsigned long *free) | |
130 | { | |
131 | struct pglist_data *pgdat; | |
132 | ||
133 | *active = 0; | |
134 | *inactive = 0; | |
135 | *free = 0; | |
136 | for_each_online_pgdat(pgdat) { | |
137 | unsigned long l, m, n; | |
138 | __get_zone_counts(&l, &m, &n, pgdat); | |
139 | *active += l; | |
140 | *inactive += m; | |
141 | *free += n; | |
142 | } | |
143 | } | |
144 | ||
2244b95a CL |
145 | /* |
146 | * Manage combined zone based / global counters | |
147 | * | |
148 | * vm_stat contains the global counters | |
149 | */ | |
150 | atomic_long_t vm_stat[NR_VM_ZONE_STAT_ITEMS]; | |
151 | EXPORT_SYMBOL(vm_stat); | |
152 | ||
153 | #ifdef CONFIG_SMP | |
154 | ||
155 | #define STAT_THRESHOLD 32 | |
156 | ||
157 | /* | |
158 | * Determine pointer to currently valid differential byte given a zone and | |
159 | * the item number. | |
160 | * | |
161 | * Preemption must be off | |
162 | */ | |
163 | static inline s8 *diff_pointer(struct zone *zone, enum zone_stat_item item) | |
164 | { | |
165 | return &zone_pcp(zone, smp_processor_id())->vm_stat_diff[item]; | |
166 | } | |
167 | ||
168 | /* | |
169 | * For use when we know that interrupts are disabled. | |
170 | */ | |
171 | void __mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
172 | int delta) | |
173 | { | |
174 | s8 *p; | |
175 | long x; | |
176 | ||
177 | p = diff_pointer(zone, item); | |
178 | x = delta + *p; | |
179 | ||
180 | if (unlikely(x > STAT_THRESHOLD || x < -STAT_THRESHOLD)) { | |
181 | zone_page_state_add(x, zone, item); | |
182 | x = 0; | |
183 | } | |
184 | ||
185 | *p = x; | |
186 | } | |
187 | EXPORT_SYMBOL(__mod_zone_page_state); | |
188 | ||
189 | /* | |
190 | * For an unknown interrupt state | |
191 | */ | |
192 | void mod_zone_page_state(struct zone *zone, enum zone_stat_item item, | |
193 | int delta) | |
194 | { | |
195 | unsigned long flags; | |
196 | ||
197 | local_irq_save(flags); | |
198 | __mod_zone_page_state(zone, item, delta); | |
199 | local_irq_restore(flags); | |
200 | } | |
201 | EXPORT_SYMBOL(mod_zone_page_state); | |
202 | ||
203 | /* | |
204 | * Optimized increment and decrement functions. | |
205 | * | |
206 | * These are only for a single page and therefore can take a struct page * | |
207 | * argument instead of struct zone *. This allows the inclusion of the code | |
208 | * generated for page_zone(page) into the optimized functions. | |
209 | * | |
210 | * No overflow check is necessary and therefore the differential can be | |
211 | * incremented or decremented in place which may allow the compilers to | |
212 | * generate better code. | |
213 | * | |
214 | * The increment or decrement is known and therefore one boundary check can | |
215 | * be omitted. | |
216 | * | |
217 | * Some processors have inc/dec instructions that are atomic vs an interrupt. | |
218 | * However, the code must first determine the differential location in a zone | |
219 | * based on the processor number and then inc/dec the counter. There is no | |
220 | * guarantee without disabling preemption that the processor will not change | |
221 | * in between and therefore the atomicity vs. interrupt cannot be exploited | |
222 | * in a useful way here. | |
223 | */ | |
224 | void __inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
225 | { | |
226 | struct zone *zone = page_zone(page); | |
227 | s8 *p = diff_pointer(zone, item); | |
228 | ||
229 | (*p)++; | |
230 | ||
231 | if (unlikely(*p > STAT_THRESHOLD)) { | |
232 | zone_page_state_add(*p, zone, item); | |
233 | *p = 0; | |
234 | } | |
235 | } | |
236 | EXPORT_SYMBOL(__inc_zone_page_state); | |
237 | ||
238 | void __dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
239 | { | |
240 | struct zone *zone = page_zone(page); | |
241 | s8 *p = diff_pointer(zone, item); | |
242 | ||
243 | (*p)--; | |
244 | ||
245 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
246 | zone_page_state_add(*p, zone, item); | |
247 | *p = 0; | |
248 | } | |
249 | } | |
250 | EXPORT_SYMBOL(__dec_zone_page_state); | |
251 | ||
252 | void inc_zone_page_state(struct page *page, enum zone_stat_item item) | |
253 | { | |
254 | unsigned long flags; | |
255 | struct zone *zone; | |
256 | s8 *p; | |
257 | ||
258 | zone = page_zone(page); | |
259 | local_irq_save(flags); | |
260 | p = diff_pointer(zone, item); | |
261 | ||
262 | (*p)++; | |
263 | ||
264 | if (unlikely(*p > STAT_THRESHOLD)) { | |
265 | zone_page_state_add(*p, zone, item); | |
266 | *p = 0; | |
267 | } | |
268 | local_irq_restore(flags); | |
269 | } | |
270 | EXPORT_SYMBOL(inc_zone_page_state); | |
271 | ||
272 | void dec_zone_page_state(struct page *page, enum zone_stat_item item) | |
273 | { | |
274 | unsigned long flags; | |
275 | struct zone *zone; | |
276 | s8 *p; | |
277 | ||
278 | zone = page_zone(page); | |
279 | local_irq_save(flags); | |
280 | p = diff_pointer(zone, item); | |
281 | ||
282 | (*p)--; | |
283 | ||
284 | if (unlikely(*p < -STAT_THRESHOLD)) { | |
285 | zone_page_state_add(*p, zone, item); | |
286 | *p = 0; | |
287 | } | |
288 | local_irq_restore(flags); | |
289 | } | |
290 | EXPORT_SYMBOL(dec_zone_page_state); | |
291 | ||
292 | /* | |
293 | * Update the zone counters for one cpu. | |
294 | */ | |
295 | void refresh_cpu_vm_stats(int cpu) | |
296 | { | |
297 | struct zone *zone; | |
298 | int i; | |
299 | unsigned long flags; | |
300 | ||
301 | for_each_zone(zone) { | |
302 | struct per_cpu_pageset *pcp; | |
303 | ||
304 | pcp = zone_pcp(zone, cpu); | |
305 | ||
306 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
307 | if (pcp->vm_stat_diff[i]) { | |
308 | local_irq_save(flags); | |
309 | zone_page_state_add(pcp->vm_stat_diff[i], | |
310 | zone, i); | |
311 | pcp->vm_stat_diff[i] = 0; | |
312 | local_irq_restore(flags); | |
313 | } | |
314 | } | |
315 | } | |
316 | ||
317 | static void __refresh_cpu_vm_stats(void *dummy) | |
318 | { | |
319 | refresh_cpu_vm_stats(smp_processor_id()); | |
320 | } | |
321 | ||
322 | /* | |
323 | * Consolidate all counters. | |
324 | * | |
325 | * Note that the result is less inaccurate but still inaccurate | |
326 | * if concurrent processes are allowed to run. | |
327 | */ | |
328 | void refresh_vm_stats(void) | |
329 | { | |
330 | on_each_cpu(__refresh_cpu_vm_stats, NULL, 0, 1); | |
331 | } | |
332 | EXPORT_SYMBOL(refresh_vm_stats); | |
333 | ||
334 | #endif | |
335 | ||
f6ac2354 CL |
336 | #ifdef CONFIG_PROC_FS |
337 | ||
338 | #include <linux/seq_file.h> | |
339 | ||
340 | static void *frag_start(struct seq_file *m, loff_t *pos) | |
341 | { | |
342 | pg_data_t *pgdat; | |
343 | loff_t node = *pos; | |
344 | for (pgdat = first_online_pgdat(); | |
345 | pgdat && node; | |
346 | pgdat = next_online_pgdat(pgdat)) | |
347 | --node; | |
348 | ||
349 | return pgdat; | |
350 | } | |
351 | ||
352 | static void *frag_next(struct seq_file *m, void *arg, loff_t *pos) | |
353 | { | |
354 | pg_data_t *pgdat = (pg_data_t *)arg; | |
355 | ||
356 | (*pos)++; | |
357 | return next_online_pgdat(pgdat); | |
358 | } | |
359 | ||
360 | static void frag_stop(struct seq_file *m, void *arg) | |
361 | { | |
362 | } | |
363 | ||
364 | /* | |
365 | * This walks the free areas for each zone. | |
366 | */ | |
367 | static int frag_show(struct seq_file *m, void *arg) | |
368 | { | |
369 | pg_data_t *pgdat = (pg_data_t *)arg; | |
370 | struct zone *zone; | |
371 | struct zone *node_zones = pgdat->node_zones; | |
372 | unsigned long flags; | |
373 | int order; | |
374 | ||
375 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; ++zone) { | |
376 | if (!populated_zone(zone)) | |
377 | continue; | |
378 | ||
379 | spin_lock_irqsave(&zone->lock, flags); | |
380 | seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); | |
381 | for (order = 0; order < MAX_ORDER; ++order) | |
382 | seq_printf(m, "%6lu ", zone->free_area[order].nr_free); | |
383 | spin_unlock_irqrestore(&zone->lock, flags); | |
384 | seq_putc(m, '\n'); | |
385 | } | |
386 | return 0; | |
387 | } | |
388 | ||
389 | struct seq_operations fragmentation_op = { | |
390 | .start = frag_start, | |
391 | .next = frag_next, | |
392 | .stop = frag_stop, | |
393 | .show = frag_show, | |
394 | }; | |
395 | ||
396 | static char *vmstat_text[] = { | |
2244b95a | 397 | /* Zoned VM counters */ |
f3dbd344 | 398 | "nr_anon_pages", |
65ba55f5 | 399 | "nr_mapped", |
347ce434 | 400 | "nr_file_pages", |
9a865ffa | 401 | "nr_slab", |
df849a15 | 402 | "nr_page_table_pages", |
2244b95a CL |
403 | |
404 | /* Page state */ | |
f6ac2354 CL |
405 | "nr_dirty", |
406 | "nr_writeback", | |
407 | "nr_unstable", | |
f6ac2354 CL |
408 | |
409 | "pgpgin", | |
410 | "pgpgout", | |
411 | "pswpin", | |
412 | "pswpout", | |
413 | ||
414 | "pgalloc_high", | |
415 | "pgalloc_normal", | |
416 | "pgalloc_dma32", | |
417 | "pgalloc_dma", | |
418 | ||
419 | "pgfree", | |
420 | "pgactivate", | |
421 | "pgdeactivate", | |
422 | ||
423 | "pgfault", | |
424 | "pgmajfault", | |
425 | ||
426 | "pgrefill_high", | |
427 | "pgrefill_normal", | |
428 | "pgrefill_dma32", | |
429 | "pgrefill_dma", | |
430 | ||
431 | "pgsteal_high", | |
432 | "pgsteal_normal", | |
433 | "pgsteal_dma32", | |
434 | "pgsteal_dma", | |
435 | ||
436 | "pgscan_kswapd_high", | |
437 | "pgscan_kswapd_normal", | |
438 | "pgscan_kswapd_dma32", | |
439 | "pgscan_kswapd_dma", | |
440 | ||
441 | "pgscan_direct_high", | |
442 | "pgscan_direct_normal", | |
443 | "pgscan_direct_dma32", | |
444 | "pgscan_direct_dma", | |
445 | ||
446 | "pginodesteal", | |
447 | "slabs_scanned", | |
448 | "kswapd_steal", | |
449 | "kswapd_inodesteal", | |
450 | "pageoutrun", | |
451 | "allocstall", | |
452 | ||
453 | "pgrotated", | |
454 | "nr_bounce", | |
455 | }; | |
456 | ||
457 | /* | |
458 | * Output information about zones in @pgdat. | |
459 | */ | |
460 | static int zoneinfo_show(struct seq_file *m, void *arg) | |
461 | { | |
462 | pg_data_t *pgdat = arg; | |
463 | struct zone *zone; | |
464 | struct zone *node_zones = pgdat->node_zones; | |
465 | unsigned long flags; | |
466 | ||
467 | for (zone = node_zones; zone - node_zones < MAX_NR_ZONES; zone++) { | |
468 | int i; | |
469 | ||
470 | if (!populated_zone(zone)) | |
471 | continue; | |
472 | ||
473 | spin_lock_irqsave(&zone->lock, flags); | |
474 | seq_printf(m, "Node %d, zone %8s", pgdat->node_id, zone->name); | |
475 | seq_printf(m, | |
476 | "\n pages free %lu" | |
477 | "\n min %lu" | |
478 | "\n low %lu" | |
479 | "\n high %lu" | |
480 | "\n active %lu" | |
481 | "\n inactive %lu" | |
482 | "\n scanned %lu (a: %lu i: %lu)" | |
483 | "\n spanned %lu" | |
484 | "\n present %lu", | |
485 | zone->free_pages, | |
486 | zone->pages_min, | |
487 | zone->pages_low, | |
488 | zone->pages_high, | |
489 | zone->nr_active, | |
490 | zone->nr_inactive, | |
491 | zone->pages_scanned, | |
492 | zone->nr_scan_active, zone->nr_scan_inactive, | |
493 | zone->spanned_pages, | |
494 | zone->present_pages); | |
2244b95a CL |
495 | |
496 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) | |
497 | seq_printf(m, "\n %-12s %lu", vmstat_text[i], | |
498 | zone_page_state(zone, i)); | |
499 | ||
f6ac2354 CL |
500 | seq_printf(m, |
501 | "\n protection: (%lu", | |
502 | zone->lowmem_reserve[0]); | |
503 | for (i = 1; i < ARRAY_SIZE(zone->lowmem_reserve); i++) | |
504 | seq_printf(m, ", %lu", zone->lowmem_reserve[i]); | |
505 | seq_printf(m, | |
506 | ")" | |
507 | "\n pagesets"); | |
508 | for_each_online_cpu(i) { | |
509 | struct per_cpu_pageset *pageset; | |
510 | int j; | |
511 | ||
512 | pageset = zone_pcp(zone, i); | |
513 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
514 | if (pageset->pcp[j].count) | |
515 | break; | |
516 | } | |
517 | if (j == ARRAY_SIZE(pageset->pcp)) | |
518 | continue; | |
519 | for (j = 0; j < ARRAY_SIZE(pageset->pcp); j++) { | |
520 | seq_printf(m, | |
521 | "\n cpu: %i pcp: %i" | |
522 | "\n count: %i" | |
523 | "\n high: %i" | |
524 | "\n batch: %i", | |
525 | i, j, | |
526 | pageset->pcp[j].count, | |
527 | pageset->pcp[j].high, | |
528 | pageset->pcp[j].batch); | |
529 | } | |
530 | #ifdef CONFIG_NUMA | |
531 | seq_printf(m, | |
532 | "\n numa_hit: %lu" | |
533 | "\n numa_miss: %lu" | |
534 | "\n numa_foreign: %lu" | |
535 | "\n interleave_hit: %lu" | |
536 | "\n local_node: %lu" | |
537 | "\n other_node: %lu", | |
538 | pageset->numa_hit, | |
539 | pageset->numa_miss, | |
540 | pageset->numa_foreign, | |
541 | pageset->interleave_hit, | |
542 | pageset->local_node, | |
543 | pageset->other_node); | |
544 | #endif | |
545 | } | |
546 | seq_printf(m, | |
547 | "\n all_unreclaimable: %u" | |
548 | "\n prev_priority: %i" | |
549 | "\n temp_priority: %i" | |
550 | "\n start_pfn: %lu", | |
551 | zone->all_unreclaimable, | |
552 | zone->prev_priority, | |
553 | zone->temp_priority, | |
554 | zone->zone_start_pfn); | |
555 | spin_unlock_irqrestore(&zone->lock, flags); | |
556 | seq_putc(m, '\n'); | |
557 | } | |
558 | return 0; | |
559 | } | |
560 | ||
561 | struct seq_operations zoneinfo_op = { | |
562 | .start = frag_start, /* iterate over all zones. The same as in | |
563 | * fragmentation. */ | |
564 | .next = frag_next, | |
565 | .stop = frag_stop, | |
566 | .show = zoneinfo_show, | |
567 | }; | |
568 | ||
569 | static void *vmstat_start(struct seq_file *m, loff_t *pos) | |
570 | { | |
2244b95a | 571 | unsigned long *v; |
f6ac2354 | 572 | struct page_state *ps; |
2244b95a | 573 | int i; |
f6ac2354 CL |
574 | |
575 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
576 | return NULL; | |
577 | ||
2244b95a CL |
578 | v = kmalloc(NR_VM_ZONE_STAT_ITEMS * sizeof(unsigned long) |
579 | + sizeof(*ps), GFP_KERNEL); | |
580 | m->private = v; | |
581 | if (!v) | |
f6ac2354 | 582 | return ERR_PTR(-ENOMEM); |
2244b95a CL |
583 | for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) |
584 | v[i] = global_page_state(i); | |
585 | ps = (struct page_state *)(v + NR_VM_ZONE_STAT_ITEMS); | |
f6ac2354 CL |
586 | get_full_page_state(ps); |
587 | ps->pgpgin /= 2; /* sectors -> kbytes */ | |
588 | ps->pgpgout /= 2; | |
2244b95a | 589 | return v + *pos; |
f6ac2354 CL |
590 | } |
591 | ||
592 | static void *vmstat_next(struct seq_file *m, void *arg, loff_t *pos) | |
593 | { | |
594 | (*pos)++; | |
595 | if (*pos >= ARRAY_SIZE(vmstat_text)) | |
596 | return NULL; | |
597 | return (unsigned long *)m->private + *pos; | |
598 | } | |
599 | ||
600 | static int vmstat_show(struct seq_file *m, void *arg) | |
601 | { | |
602 | unsigned long *l = arg; | |
603 | unsigned long off = l - (unsigned long *)m->private; | |
604 | ||
605 | seq_printf(m, "%s %lu\n", vmstat_text[off], *l); | |
606 | return 0; | |
607 | } | |
608 | ||
609 | static void vmstat_stop(struct seq_file *m, void *arg) | |
610 | { | |
611 | kfree(m->private); | |
612 | m->private = NULL; | |
613 | } | |
614 | ||
615 | struct seq_operations vmstat_op = { | |
616 | .start = vmstat_start, | |
617 | .next = vmstat_next, | |
618 | .stop = vmstat_stop, | |
619 | .show = vmstat_show, | |
620 | }; | |
621 | ||
622 | #endif /* CONFIG_PROC_FS */ | |
623 |