maps4: move clear_refs code to task_mmu.c
[deliverable/linux.git] / fs / proc / task_mmu.c
index 4008c060f7ef1b2b9ddc51154d30e1ec865906a5..fcdbd233f25233670a23acf1c1a9781740dec5fa 100644 (file)
@@ -3,6 +3,7 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/highmem.h>
+#include <linux/ptrace.h>
 #include <linux/pagemap.h>
 #include <linux/mempolicy.h>
 
@@ -113,24 +114,38 @@ static void pad_len_spaces(struct seq_file *m, int len)
        seq_printf(m, "%*c", len, ' ');
 }
 
+/*
+ * Proportional Set Size(PSS): my share of RSS.
+ *
+ * PSS of a process is the count of pages it has in memory, where each
+ * page is divided by the number of processes sharing it.  So if a
+ * process has 1000 pages all to itself, and 1000 shared with one other
+ * process, its PSS will be 1500.
+ *
+ * To keep (accumulated) division errors low, we adopt a 64bit
+ * fixed-point pss counter to minimize division errors. So (pss >>
+ * PSS_SHIFT) would be the real byte count.
+ *
+ * A shift of 12 before division means (assuming 4K page size):
+ *     - 1M 3-user-pages add up to 8KB errors;
+ *     - supports mapcount up to 2^24, or 16M;
+ *     - supports PSS up to 2^52 bytes, or 4PB.
+ */
+#define PSS_SHIFT 12
+
 struct mem_size_stats
 {
+       struct vm_area_struct *vma;
        unsigned long resident;
        unsigned long shared_clean;
        unsigned long shared_dirty;
        unsigned long private_clean;
        unsigned long private_dirty;
        unsigned long referenced;
+       u64 pss;
 };
 
-struct pmd_walker {
-       struct vm_area_struct *vma;
-       void *private;
-       void (*action)(struct vm_area_struct *, pmd_t *, unsigned long,
-                      unsigned long, void *);
-};
-
-static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats *mss)
+static int show_map(struct seq_file *m, void *v)
 {
        struct proc_maps_private *priv = m->private;
        struct task_struct *task = priv->task;
@@ -142,6 +157,9 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
        dev_t dev = 0;
        int len;
 
+       if (maps_protect && !ptrace_may_attach(task))
+               return -EACCES;
+
        if (file) {
                struct inode *inode = vma->vm_file->f_path.dentry->d_inode;
                dev = inode->i_sb->s_dev;
@@ -187,41 +205,20 @@ static int show_map_internal(struct seq_file *m, void *v, struct mem_size_stats
        }
        seq_putc(m, '\n');
 
-       if (mss)
-               seq_printf(m,
-                          "Size:           %8lu kB\n"
-                          "Rss:            %8lu kB\n"
-                          "Shared_Clean:   %8lu kB\n"
-                          "Shared_Dirty:   %8lu kB\n"
-                          "Private_Clean:  %8lu kB\n"
-                          "Private_Dirty:  %8lu kB\n"
-                          "Referenced:     %8lu kB\n",
-                          (vma->vm_end - vma->vm_start) >> 10,
-                          mss->resident >> 10,
-                          mss->shared_clean  >> 10,
-                          mss->shared_dirty  >> 10,
-                          mss->private_clean >> 10,
-                          mss->private_dirty >> 10,
-                          mss->referenced >> 10);
-
        if (m->count < m->size)  /* vma is copied successfully */
                m->version = (vma != get_gate_vma(task))? vma->vm_start: 0;
        return 0;
 }
 
-static int show_map(struct seq_file *m, void *v)
-{
-       return show_map_internal(m, v, NULL);
-}
-
-static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-                           unsigned long addr, unsigned long end,
-                           void *private)
+static int smaps_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
+                          void *private)
 {
        struct mem_size_stats *mss = private;
+       struct vm_area_struct *vma = mss->vma;
        pte_t *pte, ptent;
        spinlock_t *ptl;
        struct page *page;
+       int mapcount;
 
        pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
        for (; addr != end; pte++, addr += PAGE_SIZE) {
@@ -238,26 +235,30 @@ static void smaps_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
                /* Accumulate the size in pages that have been accessed. */
                if (pte_young(ptent) || PageReferenced(page))
                        mss->referenced += PAGE_SIZE;
-               if (page_mapcount(page) >= 2) {
+               mapcount = page_mapcount(page);
+               if (mapcount >= 2) {
                        if (pte_dirty(ptent))
                                mss->shared_dirty += PAGE_SIZE;
                        else
                                mss->shared_clean += PAGE_SIZE;
+                       mss->pss += (PAGE_SIZE << PSS_SHIFT) / mapcount;
                } else {
                        if (pte_dirty(ptent))
                                mss->private_dirty += PAGE_SIZE;
                        else
                                mss->private_clean += PAGE_SIZE;
+                       mss->pss += (PAGE_SIZE << PSS_SHIFT);
                }
        }
        pte_unmap_unlock(pte - 1, ptl);
        cond_resched();
+       return 0;
 }
 
-static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
-                                unsigned long addr, unsigned long end,
-                                void *private)
+static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
+                               unsigned long end, void *private)
 {
+       struct vm_area_struct *vma = private;
        pte_t *pte, ptent;
        spinlock_t *ptl;
        struct page *page;
@@ -278,95 +279,91 @@ static void clear_refs_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
        }
        pte_unmap_unlock(pte - 1, ptl);
        cond_resched();
+       return 0;
 }
 
-static inline void walk_pmd_range(struct pmd_walker *walker, pud_t *pud,
-                                 unsigned long addr, unsigned long end)
-{
-       pmd_t *pmd;
-       unsigned long next;
-
-       for (pmd = pmd_offset(pud, addr); addr != end;
-            pmd++, addr = next) {
-               next = pmd_addr_end(addr, end);
-               if (pmd_none_or_clear_bad(pmd))
-                       continue;
-               walker->action(walker->vma, pmd, addr, next, walker->private);
-       }
-}
-
-static inline void walk_pud_range(struct pmd_walker *walker, pgd_t *pgd,
-                                 unsigned long addr, unsigned long end)
-{
-       pud_t *pud;
-       unsigned long next;
-
-       for (pud = pud_offset(pgd, addr); addr != end;
-            pud++, addr = next) {
-               next = pud_addr_end(addr, end);
-               if (pud_none_or_clear_bad(pud))
-                       continue;
-               walk_pmd_range(walker, pud, addr, next);
-       }
-}
-
-/*
- * walk_page_range - walk the page tables of a VMA with a callback
- * @vma - VMA to walk
- * @action - callback invoked for every bottom-level (PTE) page table
- * @private - private data passed to the callback function
- *
- * Recursively walk the page table for the memory area in a VMA, calling
- * a callback for every bottom-level (PTE) page table.
- */
-static inline void walk_page_range(struct vm_area_struct *vma,
-                                  void (*action)(struct vm_area_struct *,
-                                                 pmd_t *, unsigned long,
-                                                 unsigned long, void *),
-                                  void *private)
-{
-       unsigned long addr = vma->vm_start;
-       unsigned long end = vma->vm_end;
-       struct pmd_walker walker = {
-               .vma            = vma,
-               .private        = private,
-               .action         = action,
-       };
-       pgd_t *pgd;
-       unsigned long next;
-
-       for (pgd = pgd_offset(vma->vm_mm, addr); addr != end;
-            pgd++, addr = next) {
-               next = pgd_addr_end(addr, end);
-               if (pgd_none_or_clear_bad(pgd))
-                       continue;
-               walk_pud_range(&walker, pgd, addr, next);
-       }
-}
+static struct mm_walk smaps_walk = { .pmd_entry = smaps_pte_range };
 
 static int show_smap(struct seq_file *m, void *v)
 {
        struct vm_area_struct *vma = v;
        struct mem_size_stats mss;
+       int ret;
 
        memset(&mss, 0, sizeof mss);
+       mss.vma = vma;
        if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-               walk_page_range(vma, smaps_pte_range, &mss);
-       return show_map_internal(m, v, &mss);
+               walk_page_range(vma->vm_mm, vma->vm_start, vma->vm_end,
+                               &smaps_walk, &mss);
+
+       ret = show_map(m, v);
+       if (ret)
+               return ret;
+
+       seq_printf(m,
+                  "Size:           %8lu kB\n"
+                  "Rss:            %8lu kB\n"
+                  "Pss:            %8lu kB\n"
+                  "Shared_Clean:   %8lu kB\n"
+                  "Shared_Dirty:   %8lu kB\n"
+                  "Private_Clean:  %8lu kB\n"
+                  "Private_Dirty:  %8lu kB\n"
+                  "Referenced:     %8lu kB\n",
+                  (vma->vm_end - vma->vm_start) >> 10,
+                  mss.resident >> 10,
+                  (unsigned long)(mss.pss >> (10 + PSS_SHIFT)),
+                  mss.shared_clean  >> 10,
+                  mss.shared_dirty  >> 10,
+                  mss.private_clean >> 10,
+                  mss.private_dirty >> 10,
+                  mss.referenced >> 10);
+
+       return ret;
 }
 
-void clear_refs_smap(struct mm_struct *mm)
+static struct mm_walk clear_refs_walk = { .pmd_entry = clear_refs_pte_range };
+
+static ssize_t clear_refs_write(struct file *file, const char __user *buf,
+                               size_t count, loff_t *ppos)
 {
+       struct task_struct *task;
+       char buffer[PROC_NUMBUF], *end;
+       struct mm_struct *mm;
        struct vm_area_struct *vma;
 
-       down_read(&mm->mmap_sem);
-       for (vma = mm->mmap; vma; vma = vma->vm_next)
-               if (vma->vm_mm && !is_vm_hugetlb_page(vma))
-                       walk_page_range(vma, clear_refs_pte_range, NULL);
-       flush_tlb_mm(mm);
-       up_read(&mm->mmap_sem);
+       memset(buffer, 0, sizeof(buffer));
+       if (count > sizeof(buffer) - 1)
+               count = sizeof(buffer) - 1;
+       if (copy_from_user(buffer, buf, count))
+               return -EFAULT;
+       if (!simple_strtol(buffer, &end, 0))
+               return -EINVAL;
+       if (*end == '\n')
+               end++;
+       task = get_proc_task(file->f_path.dentry->d_inode);
+       if (!task)
+               return -ESRCH;
+       mm = get_task_mm(task);
+       if (mm) {
+               down_read(&mm->mmap_sem);
+               for (vma = mm->mmap; vma; vma = vma->vm_next)
+                       if (!is_vm_hugetlb_page(vma))
+                               walk_page_range(mm, vma->vm_start, vma->vm_end,
+                                               &clear_refs_walk, vma);
+               flush_tlb_mm(mm);
+               up_read(&mm->mmap_sem);
+               mmput(mm);
+       }
+       put_task_struct(task);
+       if (end - buffer == 0)
+               return -EIO;
+       return end - buffer;
 }
 
+const struct file_operations proc_clear_refs_operations = {
+       .write          = clear_refs_write,
+};
+
 static void *m_start(struct seq_file *m, loff_t *pos)
 {
        struct proc_maps_private *priv = m->private;
@@ -393,12 +390,11 @@ static void *m_start(struct seq_file *m, loff_t *pos)
        if (!priv->task)
                return NULL;
 
-       mm = get_task_mm(priv->task);
+       mm = mm_for_maps(priv->task);
        if (!mm)
                return NULL;
 
        priv->tail_vma = tail_vma = get_gate_vma(priv->task);
-       down_read(&mm->mmap_sem);
 
        /* Start with last addr hint */
        if (last_addr && (vma = find_vma(mm, last_addr))) {
@@ -512,11 +508,22 @@ const struct file_operations proc_maps_operations = {
 #ifdef CONFIG_NUMA
 extern int show_numa_map(struct seq_file *m, void *v);
 
+static int show_numa_map_checked(struct seq_file *m, void *v)
+{
+       struct proc_maps_private *priv = m->private;
+       struct task_struct *task = priv->task;
+
+       if (maps_protect && !ptrace_may_attach(task))
+               return -EACCES;
+
+       return show_numa_map(m, v);
+}
+
 static struct seq_operations proc_pid_numa_maps_op = {
         .start  = m_start,
         .next   = m_next,
         .stop   = m_stop,
-        .show   = show_numa_map
+        .show   = show_numa_map_checked
 };
 
 static int numa_maps_open(struct inode *inode, struct file *file)
This page took 0.044756 seconds and 5 git commands to generate.