VFS: Make more complete truncate operation available to CacheFiles
[deliverable/linux.git] / fs / open.c
1 /*
2 * linux/fs/open.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7 #include <linux/string.h>
8 #include <linux/mm.h>
9 #include <linux/file.h>
10 #include <linux/fdtable.h>
11 #include <linux/fsnotify.h>
12 #include <linux/module.h>
13 #include <linux/tty.h>
14 #include <linux/namei.h>
15 #include <linux/backing-dev.h>
16 #include <linux/capability.h>
17 #include <linux/securebits.h>
18 #include <linux/security.h>
19 #include <linux/mount.h>
20 #include <linux/fcntl.h>
21 #include <linux/slab.h>
22 #include <asm/uaccess.h>
23 #include <linux/fs.h>
24 #include <linux/personality.h>
25 #include <linux/pagemap.h>
26 #include <linux/syscalls.h>
27 #include <linux/rcupdate.h>
28 #include <linux/audit.h>
29 #include <linux/falloc.h>
30 #include <linux/fs_struct.h>
31 #include <linux/ima.h>
32 #include <linux/dnotify.h>
33
34 #include "internal.h"
35
36 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
37 struct file *filp)
38 {
39 int ret;
40 struct iattr newattrs;
41
42 /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
43 if (length < 0)
44 return -EINVAL;
45
46 newattrs.ia_size = length;
47 newattrs.ia_valid = ATTR_SIZE | time_attrs;
48 if (filp) {
49 newattrs.ia_file = filp;
50 newattrs.ia_valid |= ATTR_FILE;
51 }
52
53 /* Remove suid/sgid on truncate too */
54 ret = should_remove_suid(dentry);
55 if (ret)
56 newattrs.ia_valid |= ret | ATTR_FORCE;
57
58 mutex_lock(&dentry->d_inode->i_mutex);
59 ret = notify_change(dentry, &newattrs);
60 mutex_unlock(&dentry->d_inode->i_mutex);
61 return ret;
62 }
63
64 long vfs_truncate(struct path *path, loff_t length)
65 {
66 struct inode *inode;
67 long error;
68
69 inode = path->dentry->d_inode;
70
71 /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
72 if (S_ISDIR(inode->i_mode))
73 return -EISDIR;
74 if (!S_ISREG(inode->i_mode))
75 return -EINVAL;
76
77 error = mnt_want_write(path->mnt);
78 if (error)
79 goto out;
80
81 error = inode_permission(inode, MAY_WRITE);
82 if (error)
83 goto mnt_drop_write_and_out;
84
85 error = -EPERM;
86 if (IS_APPEND(inode))
87 goto mnt_drop_write_and_out;
88
89 error = get_write_access(inode);
90 if (error)
91 goto mnt_drop_write_and_out;
92
93 /*
94 * Make sure that there are no leases. get_write_access() protects
95 * against the truncate racing with a lease-granting setlease().
96 */
97 error = break_lease(inode, O_WRONLY);
98 if (error)
99 goto put_write_and_out;
100
101 error = locks_verify_truncate(inode, NULL, length);
102 if (!error)
103 error = security_path_truncate(path);
104 if (!error)
105 error = do_truncate(path->dentry, length, 0, NULL);
106
107 put_write_and_out:
108 put_write_access(inode);
109 mnt_drop_write_and_out:
110 mnt_drop_write(path->mnt);
111 out:
112 return error;
113 }
114 EXPORT_SYMBOL_GPL(vfs_truncate);
115
116 static long do_sys_truncate(const char __user *pathname, loff_t length)
117 {
118 struct path path;
119 int error;
120
121 if (length < 0) /* sorry, but loff_t says... */
122 return -EINVAL;
123
124 error = user_path(pathname, &path);
125 if (!error) {
126 error = vfs_truncate(&path, length);
127 path_put(&path);
128 }
129 return error;
130 }
131
132 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
133 {
134 return do_sys_truncate(path, length);
135 }
136
137 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
138 {
139 struct inode *inode;
140 struct dentry *dentry;
141 struct fd f;
142 int error;
143
144 error = -EINVAL;
145 if (length < 0)
146 goto out;
147 error = -EBADF;
148 f = fdget(fd);
149 if (!f.file)
150 goto out;
151
152 /* explicitly opened as large or we are on 64-bit box */
153 if (f.file->f_flags & O_LARGEFILE)
154 small = 0;
155
156 dentry = f.file->f_path.dentry;
157 inode = dentry->d_inode;
158 error = -EINVAL;
159 if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
160 goto out_putf;
161
162 error = -EINVAL;
163 /* Cannot ftruncate over 2^31 bytes without large file support */
164 if (small && length > MAX_NON_LFS)
165 goto out_putf;
166
167 error = -EPERM;
168 if (IS_APPEND(inode))
169 goto out_putf;
170
171 sb_start_write(inode->i_sb);
172 error = locks_verify_truncate(inode, f.file, length);
173 if (!error)
174 error = security_path_truncate(&f.file->f_path);
175 if (!error)
176 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
177 sb_end_write(inode->i_sb);
178 out_putf:
179 fdput(f);
180 out:
181 return error;
182 }
183
184 SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
185 {
186 long ret = do_sys_ftruncate(fd, length, 1);
187 /* avoid REGPARM breakage on x86: */
188 asmlinkage_protect(2, ret, fd, length);
189 return ret;
190 }
191
192 /* LFS versions of truncate are only needed on 32 bit machines */
193 #if BITS_PER_LONG == 32
194 SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length)
195 {
196 return do_sys_truncate(path, length);
197 }
198 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
199 asmlinkage long SyS_truncate64(long path, loff_t length)
200 {
201 return SYSC_truncate64((const char __user *) path, length);
202 }
203 SYSCALL_ALIAS(sys_truncate64, SyS_truncate64);
204 #endif
205
206 SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length)
207 {
208 long ret = do_sys_ftruncate(fd, length, 0);
209 /* avoid REGPARM breakage on x86: */
210 asmlinkage_protect(2, ret, fd, length);
211 return ret;
212 }
213 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
214 asmlinkage long SyS_ftruncate64(long fd, loff_t length)
215 {
216 return SYSC_ftruncate64((unsigned int) fd, length);
217 }
218 SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
219 #endif
220 #endif /* BITS_PER_LONG == 32 */
221
222
223 int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
224 {
225 struct inode *inode = file->f_path.dentry->d_inode;
226 long ret;
227
228 if (offset < 0 || len <= 0)
229 return -EINVAL;
230
231 /* Return error if mode is not supported */
232 if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
233 return -EOPNOTSUPP;
234
235 /* Punch hole must have keep size set */
236 if ((mode & FALLOC_FL_PUNCH_HOLE) &&
237 !(mode & FALLOC_FL_KEEP_SIZE))
238 return -EOPNOTSUPP;
239
240 if (!(file->f_mode & FMODE_WRITE))
241 return -EBADF;
242
243 /* It's not possible punch hole on append only file */
244 if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
245 return -EPERM;
246
247 if (IS_IMMUTABLE(inode))
248 return -EPERM;
249
250 /*
251 * Revalidate the write permissions, in case security policy has
252 * changed since the files were opened.
253 */
254 ret = security_file_permission(file, MAY_WRITE);
255 if (ret)
256 return ret;
257
258 if (S_ISFIFO(inode->i_mode))
259 return -ESPIPE;
260
261 /*
262 * Let individual file system decide if it supports preallocation
263 * for directories or not.
264 */
265 if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
266 return -ENODEV;
267
268 /* Check for wrap through zero too */
269 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
270 return -EFBIG;
271
272 if (!file->f_op->fallocate)
273 return -EOPNOTSUPP;
274
275 sb_start_write(inode->i_sb);
276 ret = file->f_op->fallocate(file, mode, offset, len);
277 sb_end_write(inode->i_sb);
278 return ret;
279 }
280
281 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
282 {
283 struct fd f = fdget(fd);
284 int error = -EBADF;
285
286 if (f.file) {
287 error = do_fallocate(f.file, mode, offset, len);
288 fdput(f);
289 }
290 return error;
291 }
292
293 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
294 asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
295 {
296 return SYSC_fallocate((int)fd, (int)mode, offset, len);
297 }
298 SYSCALL_ALIAS(sys_fallocate, SyS_fallocate);
299 #endif
300
301 /*
302 * access() needs to use the real uid/gid, not the effective uid/gid.
303 * We do this by temporarily clearing all FS-related capabilities and
304 * switching the fsuid/fsgid around to the real ones.
305 */
306 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
307 {
308 const struct cred *old_cred;
309 struct cred *override_cred;
310 struct path path;
311 struct inode *inode;
312 int res;
313
314 if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */
315 return -EINVAL;
316
317 override_cred = prepare_creds();
318 if (!override_cred)
319 return -ENOMEM;
320
321 override_cred->fsuid = override_cred->uid;
322 override_cred->fsgid = override_cred->gid;
323
324 if (!issecure(SECURE_NO_SETUID_FIXUP)) {
325 /* Clear the capabilities if we switch to a non-root user */
326 kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
327 if (!uid_eq(override_cred->uid, root_uid))
328 cap_clear(override_cred->cap_effective);
329 else
330 override_cred->cap_effective =
331 override_cred->cap_permitted;
332 }
333
334 old_cred = override_creds(override_cred);
335
336 res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
337 if (res)
338 goto out;
339
340 inode = path.dentry->d_inode;
341
342 if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
343 /*
344 * MAY_EXEC on regular files is denied if the fs is mounted
345 * with the "noexec" flag.
346 */
347 res = -EACCES;
348 if (path.mnt->mnt_flags & MNT_NOEXEC)
349 goto out_path_release;
350 }
351
352 res = inode_permission(inode, mode | MAY_ACCESS);
353 /* SuS v2 requires we report a read only fs too */
354 if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
355 goto out_path_release;
356 /*
357 * This is a rare case where using __mnt_is_readonly()
358 * is OK without a mnt_want/drop_write() pair. Since
359 * no actual write to the fs is performed here, we do
360 * not need to telegraph to that to anyone.
361 *
362 * By doing this, we accept that this access is
363 * inherently racy and know that the fs may change
364 * state before we even see this result.
365 */
366 if (__mnt_is_readonly(path.mnt))
367 res = -EROFS;
368
369 out_path_release:
370 path_put(&path);
371 out:
372 revert_creds(old_cred);
373 put_cred(override_cred);
374 return res;
375 }
376
377 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
378 {
379 return sys_faccessat(AT_FDCWD, filename, mode);
380 }
381
382 SYSCALL_DEFINE1(chdir, const char __user *, filename)
383 {
384 struct path path;
385 int error;
386
387 error = user_path_dir(filename, &path);
388 if (error)
389 goto out;
390
391 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
392 if (error)
393 goto dput_and_out;
394
395 set_fs_pwd(current->fs, &path);
396
397 dput_and_out:
398 path_put(&path);
399 out:
400 return error;
401 }
402
403 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
404 {
405 struct fd f = fdget_raw(fd);
406 struct inode *inode;
407 int error = -EBADF;
408
409 error = -EBADF;
410 if (!f.file)
411 goto out;
412
413 inode = f.file->f_path.dentry->d_inode;
414
415 error = -ENOTDIR;
416 if (!S_ISDIR(inode->i_mode))
417 goto out_putf;
418
419 error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
420 if (!error)
421 set_fs_pwd(current->fs, &f.file->f_path);
422 out_putf:
423 fdput(f);
424 out:
425 return error;
426 }
427
428 SYSCALL_DEFINE1(chroot, const char __user *, filename)
429 {
430 struct path path;
431 int error;
432
433 error = user_path_dir(filename, &path);
434 if (error)
435 goto out;
436
437 error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
438 if (error)
439 goto dput_and_out;
440
441 error = -EPERM;
442 if (!nsown_capable(CAP_SYS_CHROOT))
443 goto dput_and_out;
444 error = security_path_chroot(&path);
445 if (error)
446 goto dput_and_out;
447
448 set_fs_root(current->fs, &path);
449 error = 0;
450 dput_and_out:
451 path_put(&path);
452 out:
453 return error;
454 }
455
456 static int chmod_common(struct path *path, umode_t mode)
457 {
458 struct inode *inode = path->dentry->d_inode;
459 struct iattr newattrs;
460 int error;
461
462 error = mnt_want_write(path->mnt);
463 if (error)
464 return error;
465 mutex_lock(&inode->i_mutex);
466 error = security_path_chmod(path, mode);
467 if (error)
468 goto out_unlock;
469 newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
470 newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
471 error = notify_change(path->dentry, &newattrs);
472 out_unlock:
473 mutex_unlock(&inode->i_mutex);
474 mnt_drop_write(path->mnt);
475 return error;
476 }
477
478 SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
479 {
480 struct file * file;
481 int err = -EBADF;
482
483 file = fget(fd);
484 if (file) {
485 audit_inode(NULL, file->f_path.dentry, 0);
486 err = chmod_common(&file->f_path, mode);
487 fput(file);
488 }
489 return err;
490 }
491
492 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
493 {
494 struct path path;
495 int error;
496
497 error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
498 if (!error) {
499 error = chmod_common(&path, mode);
500 path_put(&path);
501 }
502 return error;
503 }
504
505 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
506 {
507 return sys_fchmodat(AT_FDCWD, filename, mode);
508 }
509
510 static int chown_common(struct path *path, uid_t user, gid_t group)
511 {
512 struct inode *inode = path->dentry->d_inode;
513 int error;
514 struct iattr newattrs;
515 kuid_t uid;
516 kgid_t gid;
517
518 uid = make_kuid(current_user_ns(), user);
519 gid = make_kgid(current_user_ns(), group);
520
521 newattrs.ia_valid = ATTR_CTIME;
522 if (user != (uid_t) -1) {
523 if (!uid_valid(uid))
524 return -EINVAL;
525 newattrs.ia_valid |= ATTR_UID;
526 newattrs.ia_uid = uid;
527 }
528 if (group != (gid_t) -1) {
529 if (!gid_valid(gid))
530 return -EINVAL;
531 newattrs.ia_valid |= ATTR_GID;
532 newattrs.ia_gid = gid;
533 }
534 if (!S_ISDIR(inode->i_mode))
535 newattrs.ia_valid |=
536 ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
537 mutex_lock(&inode->i_mutex);
538 error = security_path_chown(path, uid, gid);
539 if (!error)
540 error = notify_change(path->dentry, &newattrs);
541 mutex_unlock(&inode->i_mutex);
542
543 return error;
544 }
545
546 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
547 gid_t, group, int, flag)
548 {
549 struct path path;
550 int error = -EINVAL;
551 int lookup_flags;
552
553 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
554 goto out;
555
556 lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
557 if (flag & AT_EMPTY_PATH)
558 lookup_flags |= LOOKUP_EMPTY;
559 error = user_path_at(dfd, filename, lookup_flags, &path);
560 if (error)
561 goto out;
562 error = mnt_want_write(path.mnt);
563 if (error)
564 goto out_release;
565 error = chown_common(&path, user, group);
566 mnt_drop_write(path.mnt);
567 out_release:
568 path_put(&path);
569 out:
570 return error;
571 }
572
573 SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
574 {
575 return sys_fchownat(AT_FDCWD, filename, user, group, 0);
576 }
577
578 SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
579 {
580 return sys_fchownat(AT_FDCWD, filename, user, group,
581 AT_SYMLINK_NOFOLLOW);
582 }
583
584 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
585 {
586 struct fd f = fdget(fd);
587 int error = -EBADF;
588
589 if (!f.file)
590 goto out;
591
592 error = mnt_want_write_file(f.file);
593 if (error)
594 goto out_fput;
595 audit_inode(NULL, f.file->f_path.dentry, 0);
596 error = chown_common(&f.file->f_path, user, group);
597 mnt_drop_write_file(f.file);
598 out_fput:
599 fdput(f);
600 out:
601 return error;
602 }
603
604 /*
605 * You have to be very careful that these write
606 * counts get cleaned up in error cases and
607 * upon __fput(). This should probably never
608 * be called outside of __dentry_open().
609 */
610 static inline int __get_file_write_access(struct inode *inode,
611 struct vfsmount *mnt)
612 {
613 int error;
614 error = get_write_access(inode);
615 if (error)
616 return error;
617 /*
618 * Do not take mount writer counts on
619 * special files since no writes to
620 * the mount itself will occur.
621 */
622 if (!special_file(inode->i_mode)) {
623 /*
624 * Balanced in __fput()
625 */
626 error = __mnt_want_write(mnt);
627 if (error)
628 put_write_access(inode);
629 }
630 return error;
631 }
632
633 int open_check_o_direct(struct file *f)
634 {
635 /* NB: we're sure to have correct a_ops only after f_op->open */
636 if (f->f_flags & O_DIRECT) {
637 if (!f->f_mapping->a_ops ||
638 ((!f->f_mapping->a_ops->direct_IO) &&
639 (!f->f_mapping->a_ops->get_xip_mem))) {
640 return -EINVAL;
641 }
642 }
643 return 0;
644 }
645
646 static int do_dentry_open(struct file *f,
647 int (*open)(struct inode *, struct file *),
648 const struct cred *cred)
649 {
650 static const struct file_operations empty_fops = {};
651 struct inode *inode;
652 int error;
653
654 f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
655 FMODE_PREAD | FMODE_PWRITE;
656
657 if (unlikely(f->f_flags & O_PATH))
658 f->f_mode = FMODE_PATH;
659
660 path_get(&f->f_path);
661 inode = f->f_path.dentry->d_inode;
662 if (f->f_mode & FMODE_WRITE) {
663 error = __get_file_write_access(inode, f->f_path.mnt);
664 if (error)
665 goto cleanup_file;
666 if (!special_file(inode->i_mode))
667 file_take_write(f);
668 }
669
670 f->f_mapping = inode->i_mapping;
671 f->f_pos = 0;
672 file_sb_list_add(f, inode->i_sb);
673
674 if (unlikely(f->f_mode & FMODE_PATH)) {
675 f->f_op = &empty_fops;
676 return 0;
677 }
678
679 f->f_op = fops_get(inode->i_fop);
680
681 error = security_file_open(f, cred);
682 if (error)
683 goto cleanup_all;
684
685 error = break_lease(inode, f->f_flags);
686 if (error)
687 goto cleanup_all;
688
689 if (!open && f->f_op)
690 open = f->f_op->open;
691 if (open) {
692 error = open(inode, f);
693 if (error)
694 goto cleanup_all;
695 }
696 if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
697 i_readcount_inc(inode);
698
699 f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
700
701 file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
702
703 return 0;
704
705 cleanup_all:
706 fops_put(f->f_op);
707 file_sb_list_del(f);
708 if (f->f_mode & FMODE_WRITE) {
709 put_write_access(inode);
710 if (!special_file(inode->i_mode)) {
711 /*
712 * We don't consider this a real
713 * mnt_want/drop_write() pair
714 * because it all happenend right
715 * here, so just reset the state.
716 */
717 file_reset_write(f);
718 __mnt_drop_write(f->f_path.mnt);
719 }
720 }
721 cleanup_file:
722 path_put(&f->f_path);
723 f->f_path.mnt = NULL;
724 f->f_path.dentry = NULL;
725 return error;
726 }
727
728 /**
729 * finish_open - finish opening a file
730 * @od: opaque open data
731 * @dentry: pointer to dentry
732 * @open: open callback
733 *
734 * This can be used to finish opening a file passed to i_op->atomic_open().
735 *
736 * If the open callback is set to NULL, then the standard f_op->open()
737 * filesystem callback is substituted.
738 */
739 int finish_open(struct file *file, struct dentry *dentry,
740 int (*open)(struct inode *, struct file *),
741 int *opened)
742 {
743 int error;
744 BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
745
746 file->f_path.dentry = dentry;
747 error = do_dentry_open(file, open, current_cred());
748 if (!error)
749 *opened |= FILE_OPENED;
750
751 return error;
752 }
753 EXPORT_SYMBOL(finish_open);
754
755 /**
756 * finish_no_open - finish ->atomic_open() without opening the file
757 *
758 * @od: opaque open data
759 * @dentry: dentry or NULL (as returned from ->lookup())
760 *
761 * This can be used to set the result of a successful lookup in ->atomic_open().
762 * The filesystem's atomic_open() method shall return NULL after calling this.
763 */
764 int finish_no_open(struct file *file, struct dentry *dentry)
765 {
766 file->f_path.dentry = dentry;
767 return 1;
768 }
769 EXPORT_SYMBOL(finish_no_open);
770
771 struct file *dentry_open(const struct path *path, int flags,
772 const struct cred *cred)
773 {
774 int error;
775 struct file *f;
776
777 validate_creds(cred);
778
779 /* We must always pass in a valid mount pointer. */
780 BUG_ON(!path->mnt);
781
782 error = -ENFILE;
783 f = get_empty_filp();
784 if (f == NULL)
785 return ERR_PTR(error);
786
787 f->f_flags = flags;
788 f->f_path = *path;
789 error = do_dentry_open(f, NULL, cred);
790 if (!error) {
791 error = open_check_o_direct(f);
792 if (error) {
793 fput(f);
794 f = ERR_PTR(error);
795 }
796 } else {
797 put_filp(f);
798 f = ERR_PTR(error);
799 }
800 return f;
801 }
802 EXPORT_SYMBOL(dentry_open);
803
804 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
805 {
806 int lookup_flags = 0;
807 int acc_mode;
808
809 if (flags & O_CREAT)
810 op->mode = (mode & S_IALLUGO) | S_IFREG;
811 else
812 op->mode = 0;
813
814 /* Must never be set by userspace */
815 flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
816
817 /*
818 * O_SYNC is implemented as __O_SYNC|O_DSYNC. As many places only
819 * check for O_DSYNC if the need any syncing at all we enforce it's
820 * always set instead of having to deal with possibly weird behaviour
821 * for malicious applications setting only __O_SYNC.
822 */
823 if (flags & __O_SYNC)
824 flags |= O_DSYNC;
825
826 /*
827 * If we have O_PATH in the open flag. Then we
828 * cannot have anything other than the below set of flags
829 */
830 if (flags & O_PATH) {
831 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
832 acc_mode = 0;
833 } else {
834 acc_mode = MAY_OPEN | ACC_MODE(flags);
835 }
836
837 op->open_flag = flags;
838
839 /* O_TRUNC implies we need access checks for write permissions */
840 if (flags & O_TRUNC)
841 acc_mode |= MAY_WRITE;
842
843 /* Allow the LSM permission hook to distinguish append
844 access from general write access. */
845 if (flags & O_APPEND)
846 acc_mode |= MAY_APPEND;
847
848 op->acc_mode = acc_mode;
849
850 op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
851
852 if (flags & O_CREAT) {
853 op->intent |= LOOKUP_CREATE;
854 if (flags & O_EXCL)
855 op->intent |= LOOKUP_EXCL;
856 }
857
858 if (flags & O_DIRECTORY)
859 lookup_flags |= LOOKUP_DIRECTORY;
860 if (!(flags & O_NOFOLLOW))
861 lookup_flags |= LOOKUP_FOLLOW;
862 return lookup_flags;
863 }
864
865 /**
866 * file_open_name - open file and return file pointer
867 *
868 * @name: struct filename containing path to open
869 * @flags: open flags as per the open(2) second argument
870 * @mode: mode for the new file if O_CREAT is set, else ignored
871 *
872 * This is the helper to open a file from kernelspace if you really
873 * have to. But in generally you should not do this, so please move
874 * along, nothing to see here..
875 */
876 struct file *file_open_name(struct filename *name, int flags, umode_t mode)
877 {
878 struct open_flags op;
879 int lookup = build_open_flags(flags, mode, &op);
880 return do_filp_open(AT_FDCWD, name, &op, lookup);
881 }
882
883 /**
884 * filp_open - open file and return file pointer
885 *
886 * @filename: path to open
887 * @flags: open flags as per the open(2) second argument
888 * @mode: mode for the new file if O_CREAT is set, else ignored
889 *
890 * This is the helper to open a file from kernelspace if you really
891 * have to. But in generally you should not do this, so please move
892 * along, nothing to see here..
893 */
894 struct file *filp_open(const char *filename, int flags, umode_t mode)
895 {
896 struct filename name = {.name = filename};
897 return file_open_name(&name, flags, mode);
898 }
899 EXPORT_SYMBOL(filp_open);
900
901 struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
902 const char *filename, int flags)
903 {
904 struct open_flags op;
905 int lookup = build_open_flags(flags, 0, &op);
906 if (flags & O_CREAT)
907 return ERR_PTR(-EINVAL);
908 if (!filename && (flags & O_DIRECTORY))
909 if (!dentry->d_inode->i_op->lookup)
910 return ERR_PTR(-ENOTDIR);
911 return do_file_open_root(dentry, mnt, filename, &op, lookup);
912 }
913 EXPORT_SYMBOL(file_open_root);
914
915 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
916 {
917 struct open_flags op;
918 int lookup = build_open_flags(flags, mode, &op);
919 struct filename *tmp = getname(filename);
920 int fd = PTR_ERR(tmp);
921
922 if (!IS_ERR(tmp)) {
923 fd = get_unused_fd_flags(flags);
924 if (fd >= 0) {
925 struct file *f = do_filp_open(dfd, tmp, &op, lookup);
926 if (IS_ERR(f)) {
927 put_unused_fd(fd);
928 fd = PTR_ERR(f);
929 } else {
930 fsnotify_open(f);
931 fd_install(fd, f);
932 }
933 }
934 putname(tmp);
935 }
936 return fd;
937 }
938
939 SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
940 {
941 long ret;
942
943 if (force_o_largefile())
944 flags |= O_LARGEFILE;
945
946 ret = do_sys_open(AT_FDCWD, filename, flags, mode);
947 /* avoid REGPARM breakage on x86: */
948 asmlinkage_protect(3, ret, filename, flags, mode);
949 return ret;
950 }
951
952 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
953 umode_t, mode)
954 {
955 long ret;
956
957 if (force_o_largefile())
958 flags |= O_LARGEFILE;
959
960 ret = do_sys_open(dfd, filename, flags, mode);
961 /* avoid REGPARM breakage on x86: */
962 asmlinkage_protect(4, ret, dfd, filename, flags, mode);
963 return ret;
964 }
965
966 #ifndef __alpha__
967
968 /*
969 * For backward compatibility? Maybe this should be moved
970 * into arch/i386 instead?
971 */
972 SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
973 {
974 return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
975 }
976
977 #endif
978
979 /*
980 * "id" is the POSIX thread ID. We use the
981 * files pointer for this..
982 */
983 int filp_close(struct file *filp, fl_owner_t id)
984 {
985 int retval = 0;
986
987 if (!file_count(filp)) {
988 printk(KERN_ERR "VFS: Close: file count is 0\n");
989 return 0;
990 }
991
992 if (filp->f_op && filp->f_op->flush)
993 retval = filp->f_op->flush(filp, id);
994
995 if (likely(!(filp->f_mode & FMODE_PATH))) {
996 dnotify_flush(filp, id);
997 locks_remove_posix(filp, id);
998 }
999 fput(filp);
1000 return retval;
1001 }
1002
1003 EXPORT_SYMBOL(filp_close);
1004
1005 /*
1006 * Careful here! We test whether the file pointer is NULL before
1007 * releasing the fd. This ensures that one clone task can't release
1008 * an fd while another clone is opening it.
1009 */
1010 SYSCALL_DEFINE1(close, unsigned int, fd)
1011 {
1012 int retval = __close_fd(current->files, fd);
1013
1014 /* can't restart close syscall because file table entry was cleared */
1015 if (unlikely(retval == -ERESTARTSYS ||
1016 retval == -ERESTARTNOINTR ||
1017 retval == -ERESTARTNOHAND ||
1018 retval == -ERESTART_RESTARTBLOCK))
1019 retval = -EINTR;
1020
1021 return retval;
1022 }
1023 EXPORT_SYMBOL(sys_close);
1024
1025 /*
1026 * This routine simulates a hangup on the tty, to arrange that users
1027 * are given clean terminals at login time.
1028 */
1029 SYSCALL_DEFINE0(vhangup)
1030 {
1031 if (capable(CAP_SYS_TTY_CONFIG)) {
1032 tty_vhangup_self();
1033 return 0;
1034 }
1035 return -EPERM;
1036 }
1037
1038 /*
1039 * Called when an inode is about to be open.
1040 * We use this to disallow opening large files on 32bit systems if
1041 * the caller didn't specify O_LARGEFILE. On 64bit systems we force
1042 * on this flag in sys_open.
1043 */
1044 int generic_file_open(struct inode * inode, struct file * filp)
1045 {
1046 if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
1047 return -EOVERFLOW;
1048 return 0;
1049 }
1050
1051 EXPORT_SYMBOL(generic_file_open);
1052
1053 /*
1054 * This is used by subsystems that don't want seekable
1055 * file descriptors. The function is not supposed to ever fail, the only
1056 * reason it returns an 'int' and not 'void' is so that it can be plugged
1057 * directly into file_operations structure.
1058 */
1059 int nonseekable_open(struct inode *inode, struct file *filp)
1060 {
1061 filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
1062 return 0;
1063 }
1064
1065 EXPORT_SYMBOL(nonseekable_open);
This page took 0.127903 seconds and 5 git commands to generate.