orangefs: remove vestigial async io code
[deliverable/linux.git] / fs / orangefs / file.c
1 /*
2 * (C) 2001 Clemson University and The University of Chicago
3 *
4 * See COPYING in top-level directory.
5 */
6
7 /*
8 * Linux VFS file operations.
9 */
10
11 #include "protocol.h"
12 #include "orangefs-kernel.h"
13 #include "orangefs-bufmap.h"
14 #include <linux/fs.h>
15 #include <linux/pagemap.h>
16
17 /*
18 * Copy to client-core's address space from the buffers specified
19 * by the iovec upto total_size bytes.
20 * NOTE: the iovector can either contain addresses which
21 * can futher be kernel-space or user-space addresses.
22 * or it can pointers to struct page's
23 */
24 static int precopy_buffers(struct orangefs_bufmap *bufmap,
25 int buffer_index,
26 struct iov_iter *iter,
27 size_t total_size)
28 {
29 int ret = 0;
30 /*
31 * copy data from application/kernel by pulling it out
32 * of the iovec.
33 */
34
35
36 if (total_size) {
37 ret = orangefs_bufmap_copy_from_iovec(bufmap,
38 iter,
39 buffer_index,
40 total_size);
41 if (ret < 0)
42 gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
43 __func__,
44 (long)ret);
45 }
46
47 if (ret < 0)
48 gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
49 __func__,
50 (long)ret);
51 return ret;
52 }
53
54 /*
55 * Copy from client-core's address space to the buffers specified
56 * by the iovec upto total_size bytes.
57 * NOTE: the iovector can either contain addresses which
58 * can futher be kernel-space or user-space addresses.
59 * or it can pointers to struct page's
60 */
61 static int postcopy_buffers(struct orangefs_bufmap *bufmap,
62 int buffer_index,
63 struct iov_iter *iter,
64 size_t total_size)
65 {
66 int ret = 0;
67 /*
68 * copy data to application/kernel by pushing it out to
69 * the iovec. NOTE; target buffers can be addresses or
70 * struct page pointers.
71 */
72 if (total_size) {
73 ret = orangefs_bufmap_copy_to_iovec(bufmap,
74 iter,
75 buffer_index,
76 total_size);
77 if (ret < 0)
78 gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
79 __func__,
80 (long)ret);
81 }
82 return ret;
83 }
84
85 /*
86 * Post and wait for the I/O upcall to finish
87 */
88 static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
89 loff_t *offset, struct iov_iter *iter,
90 size_t total_size, loff_t readahead_size)
91 {
92 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
93 struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
94 struct orangefs_bufmap *bufmap = NULL;
95 struct orangefs_kernel_op_s *new_op = NULL;
96 struct iov_iter saved = *iter;
97 int buffer_index = -1;
98 ssize_t ret;
99
100 new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO);
101 if (!new_op)
102 return -ENOMEM;
103
104 /* synchronous I/O */
105 new_op->upcall.req.io.readahead_size = readahead_size;
106 new_op->upcall.req.io.io_type = type;
107 new_op->upcall.req.io.refn = orangefs_inode->refn;
108
109 populate_shared_memory:
110 /* get a shared buffer index */
111 ret = orangefs_bufmap_get(&bufmap, &buffer_index);
112 if (ret < 0) {
113 gossip_debug(GOSSIP_FILE_DEBUG,
114 "%s: orangefs_bufmap_get failure (%ld)\n",
115 __func__, (long)ret);
116 goto out;
117 }
118 gossip_debug(GOSSIP_FILE_DEBUG,
119 "%s(%pU): GET op %p -> buffer_index %d\n",
120 __func__,
121 handle,
122 new_op,
123 buffer_index);
124
125 new_op->uses_shared_memory = 1;
126 new_op->upcall.req.io.buf_index = buffer_index;
127 new_op->upcall.req.io.count = total_size;
128 new_op->upcall.req.io.offset = *offset;
129
130 gossip_debug(GOSSIP_FILE_DEBUG,
131 "%s(%pU): offset: %llu total_size: %zd\n",
132 __func__,
133 handle,
134 llu(*offset),
135 total_size);
136 /*
137 * Stage 1: copy the buffers into client-core's address space
138 * precopy_buffers only pertains to writes.
139 */
140 if (type == ORANGEFS_IO_WRITE) {
141 ret = precopy_buffers(bufmap,
142 buffer_index,
143 iter,
144 total_size);
145 if (ret < 0)
146 goto out;
147 }
148
149 gossip_debug(GOSSIP_FILE_DEBUG,
150 "%s(%pU): Calling post_io_request with tag (%llu)\n",
151 __func__,
152 handle,
153 llu(new_op->tag));
154
155 /* Stage 2: Service the I/O operation */
156 ret = service_operation(new_op,
157 type == ORANGEFS_IO_WRITE ?
158 "file_write" :
159 "file_read",
160 get_interruptible_flag(inode));
161
162 /*
163 * If service_operation() returns -EAGAIN #and# the operation was
164 * purged from orangefs_request_list or htable_ops_in_progress, then
165 * we know that the client was restarted, causing the shared memory
166 * area to be wiped clean. To restart a write operation in this
167 * case, we must re-copy the data from the user's iovec to a NEW
168 * shared memory location. To restart a read operation, we must get
169 * a new shared memory location.
170 */
171 if (ret == -EAGAIN && op_state_purged(new_op)) {
172 orangefs_bufmap_put(buffer_index);
173 buffer_index = -1;
174 if (type == ORANGEFS_IO_WRITE)
175 *iter = saved;
176 gossip_debug(GOSSIP_FILE_DEBUG,
177 "%s:going to repopulate_shared_memory.\n",
178 __func__);
179 goto populate_shared_memory;
180 }
181
182 if (ret < 0) {
183 /*
184 * don't write an error to syslog on signaled operation
185 * termination unless we've got debugging turned on, as
186 * this can happen regularly (i.e. ctrl-c)
187 */
188 if (ret == -EINTR)
189 gossip_debug(GOSSIP_FILE_DEBUG,
190 "%s: returning error %ld\n", __func__,
191 (long)ret);
192 else
193 gossip_err("%s: error in %s handle %pU, returning %zd\n",
194 __func__,
195 type == ORANGEFS_IO_READ ?
196 "read from" : "write to",
197 handle, ret);
198 if (orangefs_cancel_op_in_progress(new_op))
199 return ret;
200
201 goto out;
202 }
203
204 /*
205 * Stage 3: Post copy buffers from client-core's address space
206 * postcopy_buffers only pertains to reads.
207 */
208 if (type == ORANGEFS_IO_READ) {
209 ret = postcopy_buffers(bufmap,
210 buffer_index,
211 iter,
212 new_op->downcall.resp.io.amt_complete);
213 if (ret < 0)
214 goto out;
215 }
216 gossip_debug(GOSSIP_FILE_DEBUG,
217 "%s(%pU): Amount written as returned by the sys-io call:%d\n",
218 __func__,
219 handle,
220 (int)new_op->downcall.resp.io.amt_complete);
221
222 ret = new_op->downcall.resp.io.amt_complete;
223
224 /*
225 * tell the device file owner waiting on I/O that this read has
226 * completed and it can return now.
227 */
228
229 out:
230 if (buffer_index >= 0) {
231 orangefs_bufmap_put(buffer_index);
232 gossip_debug(GOSSIP_FILE_DEBUG,
233 "%s(%pU): PUT buffer_index %d\n",
234 __func__, handle, buffer_index);
235 buffer_index = -1;
236 }
237 op_release(new_op);
238 return ret;
239 }
240
241 /*
242 * Common entry point for read/write/readv/writev
243 * This function will dispatch it to either the direct I/O
244 * or buffered I/O path depending on the mount options and/or
245 * augmented/extended metadata attached to the file.
246 * Note: File extended attributes override any mount options.
247 */
248 static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file,
249 loff_t *offset, struct iov_iter *iter)
250 {
251 struct inode *inode = file->f_mapping->host;
252 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
253 struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
254 size_t count = iov_iter_count(iter);
255 ssize_t total_count = 0;
256 ssize_t ret = -EINVAL;
257
258 gossip_debug(GOSSIP_FILE_DEBUG,
259 "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
260 __func__,
261 handle,
262 (int)count);
263
264 if (type == ORANGEFS_IO_WRITE) {
265 gossip_debug(GOSSIP_FILE_DEBUG,
266 "%s(%pU): proceeding with offset : %llu, "
267 "size %d\n",
268 __func__,
269 handle,
270 llu(*offset),
271 (int)count);
272 }
273
274 if (count == 0) {
275 ret = 0;
276 goto out;
277 }
278
279 while (iov_iter_count(iter)) {
280 size_t each_count = iov_iter_count(iter);
281 size_t amt_complete;
282
283 /* how much to transfer in this loop iteration */
284 if (each_count > orangefs_bufmap_size_query())
285 each_count = orangefs_bufmap_size_query();
286
287 gossip_debug(GOSSIP_FILE_DEBUG,
288 "%s(%pU): size of each_count(%d)\n",
289 __func__,
290 handle,
291 (int)each_count);
292 gossip_debug(GOSSIP_FILE_DEBUG,
293 "%s(%pU): BEFORE wait_for_io: offset is %d\n",
294 __func__,
295 handle,
296 (int)*offset);
297
298 ret = wait_for_direct_io(type, inode, offset, iter,
299 each_count, 0);
300 gossip_debug(GOSSIP_FILE_DEBUG,
301 "%s(%pU): return from wait_for_io:%d\n",
302 __func__,
303 handle,
304 (int)ret);
305
306 if (ret < 0)
307 goto out;
308
309 *offset += ret;
310 total_count += ret;
311 amt_complete = ret;
312
313 gossip_debug(GOSSIP_FILE_DEBUG,
314 "%s(%pU): AFTER wait_for_io: offset is %d\n",
315 __func__,
316 handle,
317 (int)*offset);
318
319 /*
320 * if we got a short I/O operations,
321 * fall out and return what we got so far
322 */
323 if (amt_complete < each_count)
324 break;
325 } /*end while */
326
327 if (total_count > 0)
328 ret = total_count;
329 out:
330 if (ret > 0) {
331 if (type == ORANGEFS_IO_READ) {
332 file_accessed(file);
333 } else {
334 SetMtimeFlag(orangefs_inode);
335 inode->i_mtime = CURRENT_TIME;
336 mark_inode_dirty_sync(inode);
337 }
338 }
339
340 gossip_debug(GOSSIP_FILE_DEBUG,
341 "%s(%pU): Value(%d) returned.\n",
342 __func__,
343 handle,
344 (int)ret);
345
346 return ret;
347 }
348
349 /*
350 * Read data from a specified offset in a file (referenced by inode).
351 * Data may be placed either in a user or kernel buffer.
352 */
353 ssize_t orangefs_inode_read(struct inode *inode,
354 struct iov_iter *iter,
355 loff_t *offset,
356 loff_t readahead_size)
357 {
358 struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
359 size_t count = iov_iter_count(iter);
360 size_t bufmap_size;
361 ssize_t ret = -EINVAL;
362
363 g_orangefs_stats.reads++;
364
365 bufmap_size = orangefs_bufmap_size_query();
366 if (count > bufmap_size) {
367 gossip_debug(GOSSIP_FILE_DEBUG,
368 "%s: count is too large (%zd/%zd)!\n",
369 __func__, count, bufmap_size);
370 return -EINVAL;
371 }
372
373 gossip_debug(GOSSIP_FILE_DEBUG,
374 "%s(%pU) %zd@%llu\n",
375 __func__,
376 &orangefs_inode->refn.khandle,
377 count,
378 llu(*offset));
379
380 ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter,
381 count, readahead_size);
382 if (ret > 0)
383 *offset += ret;
384
385 gossip_debug(GOSSIP_FILE_DEBUG,
386 "%s(%pU): Value(%zd) returned.\n",
387 __func__,
388 &orangefs_inode->refn.khandle,
389 ret);
390
391 return ret;
392 }
393
394 static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
395 {
396 struct file *file = iocb->ki_filp;
397 loff_t pos = *(&iocb->ki_pos);
398 ssize_t rc = 0;
399
400 BUG_ON(iocb->private);
401
402 gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n");
403
404 g_orangefs_stats.reads++;
405
406 rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter);
407 iocb->ki_pos = pos;
408
409 return rc;
410 }
411
412 static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
413 {
414 struct file *file = iocb->ki_filp;
415 loff_t pos;
416 ssize_t rc;
417
418 BUG_ON(iocb->private);
419
420 gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n");
421
422 mutex_lock(&file->f_mapping->host->i_mutex);
423
424 /* Make sure generic_write_checks sees an up to date inode size. */
425 if (file->f_flags & O_APPEND) {
426 rc = orangefs_inode_getattr(file->f_mapping->host,
427 ORANGEFS_ATTR_SYS_SIZE, 0);
428 if (rc) {
429 gossip_err("%s: orangefs_inode_getattr failed, rc:%zd:.\n",
430 __func__, rc);
431 goto out;
432 }
433 }
434
435 if (file->f_pos > i_size_read(file->f_mapping->host))
436 orangefs_i_size_write(file->f_mapping->host, file->f_pos);
437
438 rc = generic_write_checks(iocb, iter);
439
440 if (rc <= 0) {
441 gossip_err("%s: generic_write_checks failed, rc:%zd:.\n",
442 __func__, rc);
443 goto out;
444 }
445
446 /*
447 * if we are appending, generic_write_checks would have updated
448 * pos to the end of the file, so we will wait till now to set
449 * pos...
450 */
451 pos = *(&iocb->ki_pos);
452
453 rc = do_readv_writev(ORANGEFS_IO_WRITE,
454 file,
455 &pos,
456 iter);
457 if (rc < 0) {
458 gossip_err("%s: do_readv_writev failed, rc:%zd:.\n",
459 __func__, rc);
460 goto out;
461 }
462
463 iocb->ki_pos = pos;
464 g_orangefs_stats.writes++;
465
466 out:
467
468 mutex_unlock(&file->f_mapping->host->i_mutex);
469 return rc;
470 }
471
472 /*
473 * Perform a miscellaneous operation on a file.
474 */
475 static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
476 {
477 int ret = -ENOTTY;
478 __u64 val = 0;
479 unsigned long uval;
480
481 gossip_debug(GOSSIP_FILE_DEBUG,
482 "orangefs_ioctl: called with cmd %d\n",
483 cmd);
484
485 /*
486 * we understand some general ioctls on files, such as the immutable
487 * and append flags
488 */
489 if (cmd == FS_IOC_GETFLAGS) {
490 val = 0;
491 ret = orangefs_inode_getxattr(file_inode(file),
492 ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
493 "user.pvfs2.meta_hint",
494 &val, sizeof(val));
495 if (ret < 0 && ret != -ENODATA)
496 return ret;
497 else if (ret == -ENODATA)
498 val = 0;
499 uval = val;
500 gossip_debug(GOSSIP_FILE_DEBUG,
501 "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n",
502 (unsigned long long)uval);
503 return put_user(uval, (int __user *)arg);
504 } else if (cmd == FS_IOC_SETFLAGS) {
505 ret = 0;
506 if (get_user(uval, (int __user *)arg))
507 return -EFAULT;
508 /*
509 * ORANGEFS_MIRROR_FL is set internally when the mirroring mode
510 * is turned on for a file. The user is not allowed to turn
511 * on this bit, but the bit is present if the user first gets
512 * the flags and then updates the flags with some new
513 * settings. So, we ignore it in the following edit. bligon.
514 */
515 if ((uval & ~ORANGEFS_MIRROR_FL) &
516 (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) {
517 gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n");
518 return -EINVAL;
519 }
520 val = uval;
521 gossip_debug(GOSSIP_FILE_DEBUG,
522 "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n",
523 (unsigned long long)val);
524 ret = orangefs_inode_setxattr(file_inode(file),
525 ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
526 "user.pvfs2.meta_hint",
527 &val, sizeof(val), 0);
528 }
529
530 return ret;
531 }
532
533 /*
534 * Memory map a region of a file.
535 */
536 static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
537 {
538 gossip_debug(GOSSIP_FILE_DEBUG,
539 "orangefs_file_mmap: called on %s\n",
540 (file ?
541 (char *)file->f_path.dentry->d_name.name :
542 (char *)"Unknown"));
543
544 /* set the sequential readahead hint */
545 vma->vm_flags |= VM_SEQ_READ;
546 vma->vm_flags &= ~VM_RAND_READ;
547
548 /* Use readonly mmap since we cannot support writable maps. */
549 return generic_file_readonly_mmap(file, vma);
550 }
551
552 #define mapping_nrpages(idata) ((idata)->nrpages)
553
554 /*
555 * Called to notify the module that there are no more references to
556 * this file (i.e. no processes have it open).
557 *
558 * \note Not called when each file is closed.
559 */
560 static int orangefs_file_release(struct inode *inode, struct file *file)
561 {
562 gossip_debug(GOSSIP_FILE_DEBUG,
563 "orangefs_file_release: called on %s\n",
564 file->f_path.dentry->d_name.name);
565
566 orangefs_flush_inode(inode);
567
568 /*
569 * remove all associated inode pages from the page cache and mmap
570 * readahead cache (if any); this forces an expensive refresh of
571 * data for the next caller of mmap (or 'get_block' accesses)
572 */
573 if (file->f_path.dentry->d_inode &&
574 file->f_path.dentry->d_inode->i_mapping &&
575 mapping_nrpages(&file->f_path.dentry->d_inode->i_data))
576 truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
577 0);
578 return 0;
579 }
580
581 /*
582 * Push all data for a specific file onto permanent storage.
583 */
584 static int orangefs_fsync(struct file *file,
585 loff_t start,
586 loff_t end,
587 int datasync)
588 {
589 int ret = -EINVAL;
590 struct orangefs_inode_s *orangefs_inode =
591 ORANGEFS_I(file->f_path.dentry->d_inode);
592 struct orangefs_kernel_op_s *new_op = NULL;
593
594 /* required call */
595 filemap_write_and_wait_range(file->f_mapping, start, end);
596
597 new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC);
598 if (!new_op)
599 return -ENOMEM;
600 new_op->upcall.req.fsync.refn = orangefs_inode->refn;
601
602 ret = service_operation(new_op,
603 "orangefs_fsync",
604 get_interruptible_flag(file->f_path.dentry->d_inode));
605
606 gossip_debug(GOSSIP_FILE_DEBUG,
607 "orangefs_fsync got return value of %d\n",
608 ret);
609
610 op_release(new_op);
611
612 orangefs_flush_inode(file->f_path.dentry->d_inode);
613 return ret;
614 }
615
616 /*
617 * Change the file pointer position for an instance of an open file.
618 *
619 * \note If .llseek is overriden, we must acquire lock as described in
620 * Documentation/filesystems/Locking.
621 *
622 * Future upgrade could support SEEK_DATA and SEEK_HOLE but would
623 * require much changes to the FS
624 */
625 static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin)
626 {
627 int ret = -EINVAL;
628 struct inode *inode = file->f_path.dentry->d_inode;
629
630 if (!inode) {
631 gossip_err("orangefs_file_llseek: invalid inode (NULL)\n");
632 return ret;
633 }
634
635 if (origin == ORANGEFS_SEEK_END) {
636 /*
637 * revalidate the inode's file size.
638 * NOTE: We are only interested in file size here,
639 * so we set mask accordingly.
640 */
641 ret = orangefs_inode_getattr(inode, ORANGEFS_ATTR_SYS_SIZE, 0);
642 if (ret) {
643 gossip_debug(GOSSIP_FILE_DEBUG,
644 "%s:%s:%d calling make bad inode\n",
645 __FILE__,
646 __func__,
647 __LINE__);
648 orangefs_make_bad_inode(inode);
649 return ret;
650 }
651 }
652
653 gossip_debug(GOSSIP_FILE_DEBUG,
654 "orangefs_file_llseek: offset is %ld | origin is %d"
655 " | inode size is %lu\n",
656 (long)offset,
657 origin,
658 (unsigned long)file->f_path.dentry->d_inode->i_size);
659
660 return generic_file_llseek(file, offset, origin);
661 }
662
663 /*
664 * Support local locks (locks that only this kernel knows about)
665 * if Orangefs was mounted -o local_lock.
666 */
667 static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl)
668 {
669 int rc = -EINVAL;
670
671 if (ORANGEFS_SB(filp->f_inode->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) {
672 if (cmd == F_GETLK) {
673 rc = 0;
674 posix_test_lock(filp, fl);
675 } else {
676 rc = posix_lock_file(filp, fl, NULL);
677 }
678 }
679
680 return rc;
681 }
682
683 /** ORANGEFS implementation of VFS file operations */
684 const struct file_operations orangefs_file_operations = {
685 .llseek = orangefs_file_llseek,
686 .read_iter = orangefs_file_read_iter,
687 .write_iter = orangefs_file_write_iter,
688 .lock = orangefs_lock,
689 .unlocked_ioctl = orangefs_ioctl,
690 .mmap = orangefs_file_mmap,
691 .open = generic_file_open,
692 .release = orangefs_file_release,
693 .fsync = orangefs_fsync,
694 };
This page took 0.072065 seconds and 5 git commands to generate.