NFS: Create a common pgio_alloc and pgio_release function
[deliverable/linux.git] / fs / nfs / read.c
1 /*
2 * linux/fs/nfs/read.c
3 *
4 * Block I/O for NFS
5 *
6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7 * modified for async RPC by okir@monad.swb.de
8 */
9
10 #include <linux/time.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/fcntl.h>
14 #include <linux/stat.h>
15 #include <linux/mm.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/sunrpc/clnt.h>
19 #include <linux/nfs_fs.h>
20 #include <linux/nfs_page.h>
21 #include <linux/module.h>
22
23 #include "nfs4_fs.h"
24 #include "internal.h"
25 #include "iostat.h"
26 #include "fscache.h"
27 #include "pnfs.h"
28
29 #define NFSDBG_FACILITY NFSDBG_PAGECACHE
30
31 static const struct nfs_pageio_ops nfs_pageio_read_ops;
32 static const struct rpc_call_ops nfs_read_common_ops;
33 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
34
35 static struct kmem_cache *nfs_rdata_cachep;
36
37 struct nfs_rw_header *nfs_readhdr_alloc(void)
38 {
39 struct nfs_rw_header *rhdr;
40
41 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
42 if (rhdr) {
43 struct nfs_pgio_header *hdr = &rhdr->header;
44
45 INIT_LIST_HEAD(&hdr->pages);
46 INIT_LIST_HEAD(&hdr->rpc_list);
47 spin_lock_init(&hdr->lock);
48 atomic_set(&hdr->refcnt, 0);
49 }
50 return rhdr;
51 }
52 EXPORT_SYMBOL_GPL(nfs_readhdr_alloc);
53
54 void nfs_readhdr_free(struct nfs_pgio_header *hdr)
55 {
56 struct nfs_rw_header *rhdr = container_of(hdr, struct nfs_rw_header, header);
57
58 kmem_cache_free(nfs_rdata_cachep, rhdr);
59 }
60 EXPORT_SYMBOL_GPL(nfs_readhdr_free);
61
62 static
63 int nfs_return_empty_page(struct page *page)
64 {
65 zero_user(page, 0, PAGE_CACHE_SIZE);
66 SetPageUptodate(page);
67 unlock_page(page);
68 return 0;
69 }
70
71 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
72 struct inode *inode, bool force_mds,
73 const struct nfs_pgio_completion_ops *compl_ops)
74 {
75 struct nfs_server *server = NFS_SERVER(inode);
76 const struct nfs_pageio_ops *pg_ops = &nfs_pageio_read_ops;
77
78 #ifdef CONFIG_NFS_V4_1
79 if (server->pnfs_curr_ld && !force_mds)
80 pg_ops = server->pnfs_curr_ld->pg_read_ops;
81 #endif
82 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, server->rsize, 0);
83 }
84 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
85
86 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
87 {
88 pgio->pg_ops = &nfs_pageio_read_ops;
89 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
90 }
91 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
92
93 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
94 struct page *page)
95 {
96 struct nfs_page *new;
97 unsigned int len;
98 struct nfs_pageio_descriptor pgio;
99
100 len = nfs_page_length(page);
101 if (len == 0)
102 return nfs_return_empty_page(page);
103 new = nfs_create_request(ctx, inode, page, 0, len);
104 if (IS_ERR(new)) {
105 unlock_page(page);
106 return PTR_ERR(new);
107 }
108 if (len < PAGE_CACHE_SIZE)
109 zero_user_segment(page, len, PAGE_CACHE_SIZE);
110
111 nfs_pageio_init_read(&pgio, inode, false,
112 &nfs_async_read_completion_ops);
113 nfs_pageio_add_request(&pgio, new);
114 nfs_pageio_complete(&pgio);
115 NFS_I(inode)->read_io += pgio.pg_bytes_written;
116 return 0;
117 }
118
119 static void nfs_readpage_release(struct nfs_page *req)
120 {
121 struct inode *d_inode = req->wb_context->dentry->d_inode;
122
123 if (PageUptodate(req->wb_page))
124 nfs_readpage_to_fscache(d_inode, req->wb_page, 0);
125
126 unlock_page(req->wb_page);
127
128 dprintk("NFS: read done (%s/%Lu %d@%Ld)\n",
129 req->wb_context->dentry->d_inode->i_sb->s_id,
130 (unsigned long long)NFS_FILEID(req->wb_context->dentry->d_inode),
131 req->wb_bytes,
132 (long long)req_offset(req));
133 nfs_release_request(req);
134 }
135
136 /* Note io was page aligned */
137 static void nfs_read_completion(struct nfs_pgio_header *hdr)
138 {
139 unsigned long bytes = 0;
140
141 if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
142 goto out;
143 while (!list_empty(&hdr->pages)) {
144 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
145 struct page *page = req->wb_page;
146
147 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
148 if (bytes > hdr->good_bytes)
149 zero_user(page, 0, PAGE_SIZE);
150 else if (hdr->good_bytes - bytes < PAGE_SIZE)
151 zero_user_segment(page,
152 hdr->good_bytes & ~PAGE_MASK,
153 PAGE_SIZE);
154 }
155 bytes += req->wb_bytes;
156 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
157 if (bytes <= hdr->good_bytes)
158 SetPageUptodate(page);
159 } else
160 SetPageUptodate(page);
161 nfs_list_remove_request(req);
162 nfs_readpage_release(req);
163 }
164 out:
165 hdr->release(hdr);
166 }
167
168 int nfs_initiate_read(struct rpc_clnt *clnt,
169 struct nfs_pgio_data *data,
170 const struct rpc_call_ops *call_ops, int flags)
171 {
172 struct inode *inode = data->header->inode;
173 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
174 struct rpc_task *task;
175 struct rpc_message msg = {
176 .rpc_argp = &data->args,
177 .rpc_resp = &data->res,
178 .rpc_cred = data->header->cred,
179 };
180 struct rpc_task_setup task_setup_data = {
181 .task = &data->task,
182 .rpc_client = clnt,
183 .rpc_message = &msg,
184 .callback_ops = call_ops,
185 .callback_data = data,
186 .workqueue = nfsiod_workqueue,
187 .flags = RPC_TASK_ASYNC | swap_flags | flags,
188 };
189
190 /* Set up the initial task struct. */
191 NFS_PROTO(inode)->read_setup(data, &msg);
192
193 dprintk("NFS: %5u initiated read call (req %s/%llu, %u bytes @ "
194 "offset %llu)\n",
195 data->task.tk_pid,
196 inode->i_sb->s_id,
197 (unsigned long long)NFS_FILEID(inode),
198 data->args.count,
199 (unsigned long long)data->args.offset);
200
201 task = rpc_run_task(&task_setup_data);
202 if (IS_ERR(task))
203 return PTR_ERR(task);
204 rpc_put_task(task);
205 return 0;
206 }
207 EXPORT_SYMBOL_GPL(nfs_initiate_read);
208
209 /*
210 * Set up the NFS read request struct
211 */
212 static void nfs_read_rpcsetup(struct nfs_pgio_data *data,
213 unsigned int count, unsigned int offset)
214 {
215 struct nfs_page *req = data->header->req;
216
217 data->args.fh = NFS_FH(data->header->inode);
218 data->args.offset = req_offset(req) + offset;
219 data->args.pgbase = req->wb_pgbase + offset;
220 data->args.pages = data->pages.pagevec;
221 data->args.count = count;
222 data->args.context = get_nfs_open_context(req->wb_context);
223 data->args.lock_context = req->wb_lock_context;
224
225 data->res.fattr = &data->fattr;
226 data->res.count = count;
227 data->res.eof = 0;
228 nfs_fattr_init(&data->fattr);
229 }
230
231 static int nfs_do_read(struct nfs_pgio_data *data,
232 const struct rpc_call_ops *call_ops)
233 {
234 struct inode *inode = data->header->inode;
235
236 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0);
237 }
238
239 static int
240 nfs_do_multiple_reads(struct list_head *head,
241 const struct rpc_call_ops *call_ops)
242 {
243 struct nfs_pgio_data *data;
244 int ret = 0;
245
246 while (!list_empty(head)) {
247 int ret2;
248
249 data = list_first_entry(head, struct nfs_pgio_data, list);
250 list_del_init(&data->list);
251
252 ret2 = nfs_do_read(data, call_ops);
253 if (ret == 0)
254 ret = ret2;
255 }
256 return ret;
257 }
258
259 static void
260 nfs_async_read_error(struct list_head *head)
261 {
262 struct nfs_page *req;
263
264 while (!list_empty(head)) {
265 req = nfs_list_entry(head->next);
266 nfs_list_remove_request(req);
267 nfs_readpage_release(req);
268 }
269 }
270
271 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
272 .error_cleanup = nfs_async_read_error,
273 .completion = nfs_read_completion,
274 };
275
276 static void nfs_pagein_error(struct nfs_pageio_descriptor *desc,
277 struct nfs_pgio_header *hdr)
278 {
279 set_bit(NFS_IOHDR_REDO, &hdr->flags);
280 while (!list_empty(&hdr->rpc_list)) {
281 struct nfs_pgio_data *data = list_first_entry(&hdr->rpc_list,
282 struct nfs_pgio_data, list);
283 list_del(&data->list);
284 nfs_pgio_data_release(data);
285 }
286 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
287 }
288
289 /*
290 * Generate multiple requests to fill a single page.
291 *
292 * We optimize to reduce the number of read operations on the wire. If we
293 * detect that we're reading a page, or an area of a page, that is past the
294 * end of file, we do not generate NFS read operations but just clear the
295 * parts of the page that would have come back zero from the server anyway.
296 *
297 * We rely on the cached value of i_size to make this determination; another
298 * client can fill pages on the server past our cached end-of-file, but we
299 * won't see the new data until our attribute cache is updated. This is more
300 * or less conventional NFS client behavior.
301 */
302 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc,
303 struct nfs_pgio_header *hdr)
304 {
305 struct nfs_page *req = hdr->req;
306 struct page *page = req->wb_page;
307 struct nfs_pgio_data *data;
308 size_t rsize = desc->pg_bsize, nbytes;
309 unsigned int offset;
310
311 offset = 0;
312 nbytes = desc->pg_count;
313 do {
314 size_t len = min(nbytes,rsize);
315
316 data = nfs_pgio_data_alloc(hdr, 1);
317 if (!data) {
318 nfs_pagein_error(desc, hdr);
319 return -ENOMEM;
320 }
321 data->pages.pagevec[0] = page;
322 nfs_read_rpcsetup(data, len, offset);
323 list_add(&data->list, &hdr->rpc_list);
324 nbytes -= len;
325 offset += len;
326 } while (nbytes != 0);
327
328 nfs_list_remove_request(req);
329 nfs_list_add_request(req, &hdr->pages);
330 desc->pg_rpc_callops = &nfs_read_common_ops;
331 return 0;
332 }
333
334 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc,
335 struct nfs_pgio_header *hdr)
336 {
337 struct nfs_page *req;
338 struct page **pages;
339 struct nfs_pgio_data *data;
340 struct list_head *head = &desc->pg_list;
341
342 data = nfs_pgio_data_alloc(hdr, nfs_page_array_len(desc->pg_base,
343 desc->pg_count));
344 if (!data) {
345 nfs_pagein_error(desc, hdr);
346 return -ENOMEM;
347 }
348
349 pages = data->pages.pagevec;
350 while (!list_empty(head)) {
351 req = nfs_list_entry(head->next);
352 nfs_list_remove_request(req);
353 nfs_list_add_request(req, &hdr->pages);
354 *pages++ = req->wb_page;
355 }
356
357 nfs_read_rpcsetup(data, desc->pg_count, 0);
358 list_add(&data->list, &hdr->rpc_list);
359 desc->pg_rpc_callops = &nfs_read_common_ops;
360 return 0;
361 }
362
363 int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
364 struct nfs_pgio_header *hdr)
365 {
366 if (desc->pg_bsize < PAGE_CACHE_SIZE)
367 return nfs_pagein_multi(desc, hdr);
368 return nfs_pagein_one(desc, hdr);
369 }
370 EXPORT_SYMBOL_GPL(nfs_generic_pagein);
371
372 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
373 {
374 struct nfs_rw_header *rhdr;
375 struct nfs_pgio_header *hdr;
376 int ret;
377
378 rhdr = nfs_readhdr_alloc();
379 if (!rhdr) {
380 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
381 return -ENOMEM;
382 }
383 hdr = &rhdr->header;
384 nfs_pgheader_init(desc, hdr, nfs_readhdr_free);
385 atomic_inc(&hdr->refcnt);
386 ret = nfs_generic_pagein(desc, hdr);
387 if (ret == 0)
388 ret = nfs_do_multiple_reads(&hdr->rpc_list,
389 desc->pg_rpc_callops);
390 if (atomic_dec_and_test(&hdr->refcnt))
391 hdr->completion_ops->completion(hdr);
392 return ret;
393 }
394
395 static const struct nfs_pageio_ops nfs_pageio_read_ops = {
396 .pg_test = nfs_generic_pg_test,
397 .pg_doio = nfs_generic_pg_readpages,
398 };
399
400 /*
401 * This is the callback from RPC telling us whether a reply was
402 * received or some error occurred (timeout or socket shutdown).
403 */
404 int nfs_readpage_result(struct rpc_task *task, struct nfs_pgio_data *data)
405 {
406 struct inode *inode = data->header->inode;
407 int status;
408
409 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid,
410 task->tk_status);
411
412 status = NFS_PROTO(inode)->read_done(task, data);
413 if (status != 0)
414 return status;
415
416 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count);
417
418 if (task->tk_status == -ESTALE) {
419 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags);
420 nfs_mark_for_revalidate(inode);
421 }
422 return 0;
423 }
424
425 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_pgio_data *data)
426 {
427 struct nfs_pgio_args *argp = &data->args;
428 struct nfs_pgio_res *resp = &data->res;
429
430 /* This is a short read! */
431 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD);
432 /* Has the server at least made some progress? */
433 if (resp->count == 0) {
434 nfs_set_pgio_error(data->header, -EIO, argp->offset);
435 return;
436 }
437 /* Yes, so retry the read at the end of the data */
438 data->mds_offset += resp->count;
439 argp->offset += resp->count;
440 argp->pgbase += resp->count;
441 argp->count -= resp->count;
442 rpc_restart_call_prepare(task);
443 }
444
445 static void nfs_readpage_result_common(struct rpc_task *task, void *calldata)
446 {
447 struct nfs_pgio_data *data = calldata;
448 struct nfs_pgio_header *hdr = data->header;
449
450 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */
451 if (nfs_readpage_result(task, data) != 0)
452 return;
453 if (task->tk_status < 0)
454 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset);
455 else if (data->res.eof) {
456 loff_t bound;
457
458 bound = data->args.offset + data->res.count;
459 spin_lock(&hdr->lock);
460 if (bound < hdr->io_start + hdr->good_bytes) {
461 set_bit(NFS_IOHDR_EOF, &hdr->flags);
462 clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
463 hdr->good_bytes = bound - hdr->io_start;
464 }
465 spin_unlock(&hdr->lock);
466 } else if (data->res.count != data->args.count)
467 nfs_readpage_retry(task, data);
468 }
469
470 static void nfs_readpage_release_common(void *calldata)
471 {
472 nfs_pgio_data_release(calldata);
473 }
474
475 void nfs_read_prepare(struct rpc_task *task, void *calldata)
476 {
477 struct nfs_pgio_data *data = calldata;
478 int err;
479 err = NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data);
480 if (err)
481 rpc_exit(task, err);
482 }
483
484 static const struct rpc_call_ops nfs_read_common_ops = {
485 .rpc_call_prepare = nfs_read_prepare,
486 .rpc_call_done = nfs_readpage_result_common,
487 .rpc_release = nfs_readpage_release_common,
488 };
489
490 /*
491 * Read a page over NFS.
492 * We read the page synchronously in the following case:
493 * - The error flag is set for this page. This happens only when a
494 * previous async read operation failed.
495 */
496 int nfs_readpage(struct file *file, struct page *page)
497 {
498 struct nfs_open_context *ctx;
499 struct inode *inode = page_file_mapping(page)->host;
500 int error;
501
502 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
503 page, PAGE_CACHE_SIZE, page_file_index(page));
504 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
505 nfs_add_stats(inode, NFSIOS_READPAGES, 1);
506
507 /*
508 * Try to flush any pending writes to the file..
509 *
510 * NOTE! Because we own the page lock, there cannot
511 * be any new pending writes generated at this point
512 * for this page (other pages can be written to).
513 */
514 error = nfs_wb_page(inode, page);
515 if (error)
516 goto out_unlock;
517 if (PageUptodate(page))
518 goto out_unlock;
519
520 error = -ESTALE;
521 if (NFS_STALE(inode))
522 goto out_unlock;
523
524 if (file == NULL) {
525 error = -EBADF;
526 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
527 if (ctx == NULL)
528 goto out_unlock;
529 } else
530 ctx = get_nfs_open_context(nfs_file_open_context(file));
531
532 if (!IS_SYNC(inode)) {
533 error = nfs_readpage_from_fscache(ctx, inode, page);
534 if (error == 0)
535 goto out;
536 }
537
538 error = nfs_readpage_async(ctx, inode, page);
539
540 out:
541 put_nfs_open_context(ctx);
542 return error;
543 out_unlock:
544 unlock_page(page);
545 return error;
546 }
547
548 struct nfs_readdesc {
549 struct nfs_pageio_descriptor *pgio;
550 struct nfs_open_context *ctx;
551 };
552
553 static int
554 readpage_async_filler(void *data, struct page *page)
555 {
556 struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
557 struct inode *inode = page_file_mapping(page)->host;
558 struct nfs_page *new;
559 unsigned int len;
560 int error;
561
562 len = nfs_page_length(page);
563 if (len == 0)
564 return nfs_return_empty_page(page);
565
566 new = nfs_create_request(desc->ctx, inode, page, 0, len);
567 if (IS_ERR(new))
568 goto out_error;
569
570 if (len < PAGE_CACHE_SIZE)
571 zero_user_segment(page, len, PAGE_CACHE_SIZE);
572 if (!nfs_pageio_add_request(desc->pgio, new)) {
573 error = desc->pgio->pg_error;
574 goto out_unlock;
575 }
576 return 0;
577 out_error:
578 error = PTR_ERR(new);
579 out_unlock:
580 unlock_page(page);
581 return error;
582 }
583
584 int nfs_readpages(struct file *filp, struct address_space *mapping,
585 struct list_head *pages, unsigned nr_pages)
586 {
587 struct nfs_pageio_descriptor pgio;
588 struct nfs_readdesc desc = {
589 .pgio = &pgio,
590 };
591 struct inode *inode = mapping->host;
592 unsigned long npages;
593 int ret = -ESTALE;
594
595 dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
596 inode->i_sb->s_id,
597 (unsigned long long)NFS_FILEID(inode),
598 nr_pages);
599 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
600
601 if (NFS_STALE(inode))
602 goto out;
603
604 if (filp == NULL) {
605 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
606 if (desc.ctx == NULL)
607 return -EBADF;
608 } else
609 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
610
611 /* attempt to read as many of the pages as possible from the cache
612 * - this returns -ENOBUFS immediately if the cookie is negative
613 */
614 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping,
615 pages, &nr_pages);
616 if (ret == 0)
617 goto read_complete; /* all pages were read */
618
619 nfs_pageio_init_read(&pgio, inode, false,
620 &nfs_async_read_completion_ops);
621
622 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
623
624 nfs_pageio_complete(&pgio);
625 NFS_I(inode)->read_io += pgio.pg_bytes_written;
626 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
627 nfs_add_stats(inode, NFSIOS_READPAGES, npages);
628 read_complete:
629 put_nfs_open_context(desc.ctx);
630 out:
631 return ret;
632 }
633
634 int __init nfs_init_readpagecache(void)
635 {
636 nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
637 sizeof(struct nfs_rw_header),
638 0, SLAB_HWCACHE_ALIGN,
639 NULL);
640 if (nfs_rdata_cachep == NULL)
641 return -ENOMEM;
642
643 return 0;
644 }
645
646 void nfs_destroy_readpagecache(void)
647 {
648 kmem_cache_destroy(nfs_rdata_cachep);
649 }
This page took 0.043247 seconds and 5 git commands to generate.