2 * linux/mm/filemap_xip.c
4 * Copyright (C) 2005 IBM Corporation
5 * Author: Carsten Otte <cotte@de.ibm.com>
7 * derived from linux/mm/filemap.c - Copyright (C) Linus Torvalds
12 #include <linux/pagemap.h>
13 #include <linux/module.h>
14 #include <linux/uio.h>
15 #include <linux/rmap.h>
16 #include <asm/tlbflush.h>
20 * This is a file read routine for execute in place files, and uses
21 * the mapping->a_ops->get_xip_page() function for the actual low-level
24 * Note the struct file* is not used at all. It may be NULL.
27 do_xip_mapping_read(struct address_space
*mapping
,
28 struct file_ra_state
*_ra
,
31 read_descriptor_t
*desc
,
34 struct inode
*inode
= mapping
->host
;
35 unsigned long index
, end_index
, offset
;
38 BUG_ON(!mapping
->a_ops
->get_xip_page
);
40 index
= *ppos
>> PAGE_CACHE_SHIFT
;
41 offset
= *ppos
& ~PAGE_CACHE_MASK
;
43 isize
= i_size_read(inode
);
47 end_index
= (isize
- 1) >> PAGE_CACHE_SHIFT
;
50 unsigned long nr
, ret
;
52 /* nr is the maximum number of bytes to copy from this page */
54 if (index
>= end_index
) {
55 if (index
> end_index
)
57 nr
= ((isize
- 1) & ~PAGE_CACHE_MASK
) + 1;
64 page
= mapping
->a_ops
->get_xip_page(mapping
,
65 index
*(PAGE_SIZE
/512), 0);
68 if (unlikely(IS_ERR(page
))) {
69 if (PTR_ERR(page
) == -ENODATA
) {
71 page
= virt_to_page(empty_zero_page
);
73 desc
->error
= PTR_ERR(page
);
77 BUG_ON(!PageUptodate(page
));
79 /* If users can be writing to this page using arbitrary
80 * virtual addresses, take care about potential aliasing
81 * before reading the page on the kernel side.
83 if (mapping_writably_mapped(mapping
))
84 flush_dcache_page(page
);
87 * Ok, we have the page, and it's up-to-date, so
88 * now we can copy it to user space...
90 * The actor routine returns how many bytes were actually used..
91 * NOTE! This may not be the same as how much of a user buffer
92 * we filled up (we may be padding etc), so we can only update
93 * "pos" here (the actor routine has to update the user buffer
94 * pointers and the remaining count).
96 ret
= actor(desc
, page
, offset
, nr
);
98 index
+= offset
>> PAGE_CACHE_SHIFT
;
99 offset
&= ~PAGE_CACHE_MASK
;
101 if (ret
== nr
&& desc
->count
)
106 /* Did not get the page. Report it */
112 *ppos
= ((loff_t
) index
<< PAGE_CACHE_SHIFT
) + offset
;
118 * This is the "read()" routine for all filesystems
119 * that uses the get_xip_page address space operation.
122 __xip_file_aio_read(struct kiocb
*iocb
, const struct iovec
*iov
,
123 unsigned long nr_segs
, loff_t
*ppos
)
125 struct file
*filp
= iocb
->ki_filp
;
131 for (seg
= 0; seg
< nr_segs
; seg
++) {
132 const struct iovec
*iv
= &iov
[seg
];
135 * If any segment has a negative length, or the cumulative
136 * length ever wraps negative then return -EINVAL.
138 count
+= iv
->iov_len
;
139 if (unlikely((ssize_t
)(count
|iv
->iov_len
) < 0))
141 if (access_ok(VERIFY_WRITE
, iv
->iov_base
, iv
->iov_len
))
146 count
-= iv
->iov_len
; /* This segment is no good */
152 for (seg
= 0; seg
< nr_segs
; seg
++) {
153 read_descriptor_t desc
;
156 desc
.arg
.buf
= iov
[seg
].iov_base
;
157 desc
.count
= iov
[seg
].iov_len
;
161 do_xip_mapping_read(filp
->f_mapping
, &filp
->f_ra
, filp
,
162 ppos
, &desc
, file_read_actor
);
163 retval
+= desc
.written
;
174 xip_file_aio_read(struct kiocb
*iocb
, char __user
*buf
, size_t count
,
177 struct iovec local_iov
= { .iov_base
= buf
, .iov_len
= count
};
179 BUG_ON(iocb
->ki_pos
!= pos
);
180 return __xip_file_aio_read(iocb
, &local_iov
, 1, &iocb
->ki_pos
);
182 EXPORT_SYMBOL_GPL(xip_file_aio_read
);
185 xip_file_readv(struct file
*filp
, const struct iovec
*iov
,
186 unsigned long nr_segs
, loff_t
*ppos
)
190 init_sync_kiocb(&kiocb
, filp
);
191 return __xip_file_aio_read(&kiocb
, iov
, nr_segs
, ppos
);
193 EXPORT_SYMBOL_GPL(xip_file_readv
);
196 xip_file_sendfile(struct file
*in_file
, loff_t
*ppos
,
197 size_t count
, read_actor_t actor
, void *target
)
199 read_descriptor_t desc
;
206 desc
.arg
.data
= target
;
209 do_xip_mapping_read(in_file
->f_mapping
, &in_file
->f_ra
, in_file
,
215 EXPORT_SYMBOL_GPL(xip_file_sendfile
);
218 * __xip_unmap is invoked from xip_unmap and
221 * This function walks all vmas of the address_space and unmaps the
222 * empty_zero_page when found at pgoff. Should it go in rmap.c?
225 __xip_unmap (struct address_space
* mapping
,
228 struct vm_area_struct
*vma
;
229 struct mm_struct
*mm
;
230 struct prio_tree_iter iter
;
231 unsigned long address
;
235 spin_lock(&mapping
->i_mmap_lock
);
236 vma_prio_tree_foreach(vma
, &iter
, &mapping
->i_mmap
, pgoff
, pgoff
) {
238 address
= vma
->vm_start
+
239 ((pgoff
- vma
->vm_pgoff
) << PAGE_SHIFT
);
240 BUG_ON(address
< vma
->vm_start
|| address
>= vma
->vm_end
);
242 * We need the page_table_lock to protect us from page faults,
243 * munmap, fork, etc...
245 pte
= page_check_address(virt_to_page(empty_zero_page
), mm
,
248 /* Nuke the page table entry. */
249 flush_cache_page(vma
, address
, pte_pfn(pte
));
250 pteval
= ptep_clear_flush(vma
, address
, pte
);
251 BUG_ON(pte_dirty(pteval
));
253 spin_unlock(&mm
->page_table_lock
);
256 spin_unlock(&mapping
->i_mmap_lock
);
260 * xip_nopage() is invoked via the vma operations vector for a
261 * mapped memory region to read in file data during a page fault.
263 * This function is derived from filemap_nopage, but used for execute in place
266 xip_file_nopage(struct vm_area_struct
* area
,
267 unsigned long address
,
270 struct file
*file
= area
->vm_file
;
271 struct address_space
*mapping
= file
->f_mapping
;
272 struct inode
*inode
= mapping
->host
;
274 unsigned long size
, pgoff
, endoff
;
276 pgoff
= ((address
- area
->vm_start
) >> PAGE_CACHE_SHIFT
)
278 endoff
= ((area
->vm_end
- area
->vm_start
) >> PAGE_CACHE_SHIFT
)
281 size
= (i_size_read(inode
) + PAGE_CACHE_SIZE
- 1) >> PAGE_CACHE_SHIFT
;
286 page
= mapping
->a_ops
->get_xip_page(mapping
, pgoff
*(PAGE_SIZE
/512), 0);
288 BUG_ON(!PageUptodate(page
));
291 if (PTR_ERR(page
) != -ENODATA
)
295 if ((area
->vm_flags
& (VM_WRITE
| VM_MAYWRITE
)) &&
296 (area
->vm_flags
& (VM_SHARED
| VM_MAYSHARE
)) &&
297 (!(mapping
->host
->i_sb
->s_flags
& MS_RDONLY
))) {
298 /* maybe shared writable, allocate new block */
299 page
= mapping
->a_ops
->get_xip_page (mapping
,
300 pgoff
*(PAGE_SIZE
/512), 1);
303 BUG_ON(!PageUptodate(page
));
304 /* unmap page at pgoff from all other vmas */
305 __xip_unmap(mapping
, pgoff
);
307 /* not shared and writable, use empty_zero_page */
308 page
= virt_to_page(empty_zero_page
);
314 static struct vm_operations_struct xip_file_vm_ops
= {
315 .nopage
= xip_file_nopage
,
318 int xip_file_mmap(struct file
* file
, struct vm_area_struct
* vma
)
320 BUG_ON(!file
->f_mapping
->a_ops
->get_xip_page
);
323 vma
->vm_ops
= &xip_file_vm_ops
;
326 EXPORT_SYMBOL_GPL(xip_file_mmap
);
329 do_xip_file_write(struct kiocb
*iocb
, const struct iovec
*iov
,
330 unsigned long nr_segs
, loff_t pos
, loff_t
*ppos
,
333 struct file
*file
= iocb
->ki_filp
;
334 struct address_space
* mapping
= file
->f_mapping
;
335 struct address_space_operations
*a_ops
= mapping
->a_ops
;
336 struct inode
*inode
= mapping
->host
;
340 const struct iovec
*cur_iov
= iov
; /* current iovec */
341 size_t iov_base
= 0; /* offset in the current iovec */
345 BUG_ON(!mapping
->a_ops
->get_xip_page
);
350 unsigned long offset
;
353 offset
= (pos
& (PAGE_CACHE_SIZE
-1)); /* Within page */
354 index
= pos
>> PAGE_CACHE_SHIFT
;
355 bytes
= PAGE_CACHE_SIZE
- offset
;
360 * Bring in the user page that we will copy from _first_.
361 * Otherwise there's a nasty deadlock on copying from the
362 * same page as we're writing to, without it being marked
365 fault_in_pages_readable(buf
, bytes
);
367 page
= a_ops
->get_xip_page(mapping
,
368 index
*(PAGE_SIZE
/512), 0);
369 if (IS_ERR(page
) && (PTR_ERR(page
) == -ENODATA
)) {
370 /* we allocate a new page unmap it */
371 page
= a_ops
->get_xip_page(mapping
,
372 index
*(PAGE_SIZE
/512), 1);
374 /* unmap page at pgoff from all other vmas */
375 __xip_unmap(mapping
, index
);
380 status
= PTR_ERR(page
);
384 BUG_ON(!PageUptodate(page
));
386 if (likely(nr_segs
== 1))
387 copied
= filemap_copy_from_user(page
, offset
,
390 copied
= filemap_copy_from_user_iovec(page
, offset
,
391 cur_iov
, iov_base
, bytes
);
392 flush_dcache_page(page
);
393 if (likely(copied
> 0)) {
401 if (unlikely(nr_segs
> 1))
402 filemap_set_next_iovec(&cur_iov
,
406 if (unlikely(copied
!= bytes
))
414 * No need to use i_size_read() here, the i_size
415 * cannot change under us because we hold i_sem.
417 if (pos
> inode
->i_size
) {
418 i_size_write(inode
, pos
);
419 mark_inode_dirty(inode
);
422 return written
? written
: status
;
426 xip_file_aio_write_nolock(struct kiocb
*iocb
, const struct iovec
*iov
,
427 unsigned long nr_segs
, loff_t
*ppos
)
429 struct file
*file
= iocb
->ki_filp
;
430 struct address_space
* mapping
= file
->f_mapping
;
431 size_t ocount
; /* original count */
432 size_t count
; /* after file limit checks */
433 struct inode
*inode
= mapping
->host
;
440 for (seg
= 0; seg
< nr_segs
; seg
++) {
441 const struct iovec
*iv
= &iov
[seg
];
444 * If any segment has a negative length, or the cumulative
445 * length ever wraps negative then return -EINVAL.
447 ocount
+= iv
->iov_len
;
448 if (unlikely((ssize_t
)(ocount
|iv
->iov_len
) < 0))
450 if (access_ok(VERIFY_READ
, iv
->iov_base
, iv
->iov_len
))
455 ocount
-= iv
->iov_len
; /* This segment is no good */
462 vfs_check_frozen(inode
->i_sb
, SB_FREEZE_WRITE
);
466 err
= generic_write_checks(file
, &pos
, &count
, S_ISBLK(inode
->i_mode
));
473 err
= remove_suid(file
->f_dentry
);
477 inode_update_time(inode
, 1);
479 /* use execute in place to copy directly to disk */
480 written
= do_xip_file_write (iocb
, iov
,
481 nr_segs
, pos
, ppos
, count
);
483 return written
? written
: err
;
487 __xip_file_write_nolock(struct file
*file
, const struct iovec
*iov
,
488 unsigned long nr_segs
, loff_t
*ppos
)
492 init_sync_kiocb(&kiocb
, file
);
493 return xip_file_aio_write_nolock(&kiocb
, iov
, nr_segs
, ppos
);
497 xip_file_aio_write(struct kiocb
*iocb
, const char __user
*buf
,
498 size_t count
, loff_t pos
)
500 struct file
*file
= iocb
->ki_filp
;
501 struct address_space
*mapping
= file
->f_mapping
;
502 struct inode
*inode
= mapping
->host
;
504 struct iovec local_iov
= { .iov_base
= (void __user
*)buf
,
507 BUG_ON(iocb
->ki_pos
!= pos
);
510 ret
= xip_file_aio_write_nolock(iocb
, &local_iov
, 1, &iocb
->ki_pos
);
514 EXPORT_SYMBOL_GPL(xip_file_aio_write
);
516 ssize_t
xip_file_writev(struct file
*file
, const struct iovec
*iov
,
517 unsigned long nr_segs
, loff_t
*ppos
)
519 struct address_space
*mapping
= file
->f_mapping
;
520 struct inode
*inode
= mapping
->host
;
524 ret
= __xip_file_write_nolock(file
, iov
, nr_segs
, ppos
);
528 EXPORT_SYMBOL_GPL(xip_file_writev
);
531 * truncate a page used for execute in place
532 * functionality is analog to block_truncate_page but does use get_xip_page
533 * to get the page instead of page cache
536 xip_truncate_page(struct address_space
*mapping
, loff_t from
)
538 pgoff_t index
= from
>> PAGE_CACHE_SHIFT
;
539 unsigned offset
= from
& (PAGE_CACHE_SIZE
-1);
546 BUG_ON(!mapping
->a_ops
->get_xip_page
);
548 blocksize
= 1 << mapping
->host
->i_blkbits
;
549 length
= offset
& (blocksize
- 1);
551 /* Block boundary? Nothing to do */
555 length
= blocksize
- length
;
557 page
= mapping
->a_ops
->get_xip_page(mapping
,
558 index
*(PAGE_SIZE
/512), 0);
562 if (unlikely(IS_ERR(page
))) {
563 if (PTR_ERR(page
) == -ENODATA
) {
564 /* Hole? No need to truncate */
571 BUG_ON(!PageUptodate(page
));
572 kaddr
= kmap_atomic(page
, KM_USER0
);
573 memset(kaddr
+ offset
, 0, length
);
574 kunmap_atomic(kaddr
, KM_USER0
);
576 flush_dcache_page(page
);
581 EXPORT_SYMBOL_GPL(xip_truncate_page
);