f2fs: remove unneeded readahead in find_fsync_dnodes
[deliverable/linux.git] / fs / f2fs / recovery.c
1 /*
2 * fs/f2fs/recovery.c
3 *
4 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
5 * http://www.samsung.com/
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
10 */
11 #include <linux/fs.h>
12 #include <linux/f2fs_fs.h>
13 #include "f2fs.h"
14 #include "node.h"
15 #include "segment.h"
16
17 /*
18 * Roll forward recovery scenarios.
19 *
20 * [Term] F: fsync_mark, D: dentry_mark
21 *
22 * 1. inode(x) | CP | inode(x) | dnode(F)
23 * -> Update the latest inode(x).
24 *
25 * 2. inode(x) | CP | inode(F) | dnode(F)
26 * -> No problem.
27 *
28 * 3. inode(x) | CP | dnode(F) | inode(x)
29 * -> Recover to the latest dnode(F), and drop the last inode(x)
30 *
31 * 4. inode(x) | CP | dnode(F) | inode(F)
32 * -> No problem.
33 *
34 * 5. CP | inode(x) | dnode(F)
35 * -> The inode(DF) was missing. Should drop this dnode(F).
36 *
37 * 6. CP | inode(DF) | dnode(F)
38 * -> No problem.
39 *
40 * 7. CP | dnode(F) | inode(DF)
41 * -> If f2fs_iget fails, then goto next to find inode(DF).
42 *
43 * 8. CP | dnode(F) | inode(x)
44 * -> If f2fs_iget fails, then goto next to find inode(DF).
45 * But it will fail due to no inode(DF).
46 */
47
48 static struct kmem_cache *fsync_entry_slab;
49
50 bool space_for_roll_forward(struct f2fs_sb_info *sbi)
51 {
52 if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
53 > sbi->user_block_count)
54 return false;
55 return true;
56 }
57
58 static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
59 nid_t ino)
60 {
61 struct fsync_inode_entry *entry;
62
63 list_for_each_entry(entry, head, list)
64 if (entry->inode->i_ino == ino)
65 return entry;
66
67 return NULL;
68 }
69
70 static struct fsync_inode_entry *add_fsync_inode(struct list_head *head,
71 struct inode *inode)
72 {
73 struct fsync_inode_entry *entry;
74
75 entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
76 if (!entry)
77 return NULL;
78
79 entry->inode = inode;
80 list_add_tail(&entry->list, head);
81
82 return entry;
83 }
84
85 static void del_fsync_inode(struct fsync_inode_entry *entry)
86 {
87 iput(entry->inode);
88 list_del(&entry->list);
89 kmem_cache_free(fsync_entry_slab, entry);
90 }
91
92 static int recover_dentry(struct inode *inode, struct page *ipage)
93 {
94 struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
95 nid_t pino = le32_to_cpu(raw_inode->i_pino);
96 struct f2fs_dir_entry *de;
97 struct qstr name;
98 struct page *page;
99 struct inode *dir, *einode;
100 int err = 0;
101
102 dir = f2fs_iget(inode->i_sb, pino);
103 if (IS_ERR(dir)) {
104 err = PTR_ERR(dir);
105 goto out;
106 }
107
108 if (file_enc_name(inode)) {
109 iput(dir);
110 return 0;
111 }
112
113 name.len = le32_to_cpu(raw_inode->i_namelen);
114 name.name = raw_inode->i_name;
115
116 if (unlikely(name.len > F2FS_NAME_LEN)) {
117 WARN_ON(1);
118 err = -ENAMETOOLONG;
119 goto out_err;
120 }
121 retry:
122 de = f2fs_find_entry(dir, &name, &page);
123 if (de && inode->i_ino == le32_to_cpu(de->ino))
124 goto out_unmap_put;
125
126 if (de) {
127 einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
128 if (IS_ERR(einode)) {
129 WARN_ON(1);
130 err = PTR_ERR(einode);
131 if (err == -ENOENT)
132 err = -EEXIST;
133 goto out_unmap_put;
134 }
135 err = acquire_orphan_inode(F2FS_I_SB(inode));
136 if (err) {
137 iput(einode);
138 goto out_unmap_put;
139 }
140 f2fs_delete_entry(de, page, dir, einode);
141 iput(einode);
142 goto retry;
143 }
144 err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
145 if (err)
146 goto out_err;
147
148 if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
149 iput(dir);
150 } else {
151 add_dirty_dir_inode(dir);
152 set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
153 }
154
155 goto out;
156
157 out_unmap_put:
158 f2fs_dentry_kunmap(dir, page);
159 f2fs_put_page(page, 0);
160 out_err:
161 iput(dir);
162 out:
163 f2fs_msg(inode->i_sb, KERN_NOTICE,
164 "%s: ino = %x, name = %s, dir = %lx, err = %d",
165 __func__, ino_of_node(ipage), raw_inode->i_name,
166 IS_ERR(dir) ? 0 : dir->i_ino, err);
167 return err;
168 }
169
170 static void recover_inode(struct inode *inode, struct page *page)
171 {
172 struct f2fs_inode *raw = F2FS_INODE(page);
173 char *name;
174
175 inode->i_mode = le16_to_cpu(raw->i_mode);
176 i_size_write(inode, le64_to_cpu(raw->i_size));
177 inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
178 inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
179 inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
180 inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
181 inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
182 inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
183
184 if (file_enc_name(inode))
185 name = "<encrypted>";
186 else
187 name = F2FS_INODE(page)->i_name;
188
189 f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
190 ino_of_node(page), name);
191 }
192
193 static bool is_same_inode(struct inode *inode, struct page *ipage)
194 {
195 struct f2fs_inode *ri = F2FS_INODE(ipage);
196 struct timespec disk;
197
198 if (!IS_INODE(ipage))
199 return true;
200
201 disk.tv_sec = le64_to_cpu(ri->i_ctime);
202 disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
203 if (timespec_compare(&inode->i_ctime, &disk) > 0)
204 return false;
205
206 disk.tv_sec = le64_to_cpu(ri->i_atime);
207 disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
208 if (timespec_compare(&inode->i_atime, &disk) > 0)
209 return false;
210
211 disk.tv_sec = le64_to_cpu(ri->i_mtime);
212 disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
213 if (timespec_compare(&inode->i_mtime, &disk) > 0)
214 return false;
215
216 return true;
217 }
218
219 static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
220 {
221 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
222 struct curseg_info *curseg;
223 struct inode *inode;
224 struct page *page = NULL;
225 block_t blkaddr;
226 int err = 0;
227
228 /* get node pages in the current segment */
229 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
230 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
231
232 while (1) {
233 struct fsync_inode_entry *entry;
234
235 if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
236 return 0;
237
238 page = get_tmp_page(sbi, blkaddr);
239
240 if (cp_ver != cpver_of_node(page))
241 break;
242
243 if (!is_fsync_dnode(page))
244 goto next;
245
246 entry = get_fsync_inode(head, ino_of_node(page));
247 if (entry) {
248 if (!is_same_inode(entry->inode, page))
249 goto next;
250 } else {
251 if (IS_INODE(page) && is_dent_dnode(page)) {
252 err = recover_inode_page(sbi, page);
253 if (err)
254 break;
255 }
256
257 /*
258 * CP | dnode(F) | inode(DF)
259 * For this case, we should not give up now.
260 */
261 inode = f2fs_iget(sbi->sb, ino_of_node(page));
262 if (IS_ERR(inode)) {
263 err = PTR_ERR(inode);
264 if (err == -ENOENT) {
265 err = 0;
266 goto next;
267 }
268 break;
269 }
270
271 /* add this fsync inode to the list */
272 entry = add_fsync_inode(head, inode);
273 if (!entry) {
274 err = -ENOMEM;
275 iput(inode);
276 break;
277 }
278 }
279 entry->blkaddr = blkaddr;
280
281 if (IS_INODE(page) && is_dent_dnode(page))
282 entry->last_dentry = blkaddr;
283 next:
284 /* check next segment */
285 blkaddr = next_blkaddr_of_node(page);
286 f2fs_put_page(page, 1);
287
288 ra_meta_pages_cond(sbi, blkaddr);
289 }
290 f2fs_put_page(page, 1);
291 return err;
292 }
293
294 static void destroy_fsync_dnodes(struct list_head *head)
295 {
296 struct fsync_inode_entry *entry, *tmp;
297
298 list_for_each_entry_safe(entry, tmp, head, list)
299 del_fsync_inode(entry);
300 }
301
302 static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
303 block_t blkaddr, struct dnode_of_data *dn)
304 {
305 struct seg_entry *sentry;
306 unsigned int segno = GET_SEGNO(sbi, blkaddr);
307 unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
308 struct f2fs_summary_block *sum_node;
309 struct f2fs_summary sum;
310 struct page *sum_page, *node_page;
311 struct dnode_of_data tdn = *dn;
312 nid_t ino, nid;
313 struct inode *inode;
314 unsigned int offset;
315 block_t bidx;
316 int i;
317
318 sentry = get_seg_entry(sbi, segno);
319 if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
320 return 0;
321
322 /* Get the previous summary */
323 for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
324 struct curseg_info *curseg = CURSEG_I(sbi, i);
325 if (curseg->segno == segno) {
326 sum = curseg->sum_blk->entries[blkoff];
327 goto got_it;
328 }
329 }
330
331 sum_page = get_sum_page(sbi, segno);
332 sum_node = (struct f2fs_summary_block *)page_address(sum_page);
333 sum = sum_node->entries[blkoff];
334 f2fs_put_page(sum_page, 1);
335 got_it:
336 /* Use the locked dnode page and inode */
337 nid = le32_to_cpu(sum.nid);
338 if (dn->inode->i_ino == nid) {
339 tdn.nid = nid;
340 if (!dn->inode_page_locked)
341 lock_page(dn->inode_page);
342 tdn.node_page = dn->inode_page;
343 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
344 goto truncate_out;
345 } else if (dn->nid == nid) {
346 tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
347 goto truncate_out;
348 }
349
350 /* Get the node page */
351 node_page = get_node_page(sbi, nid);
352 if (IS_ERR(node_page))
353 return PTR_ERR(node_page);
354
355 offset = ofs_of_node(node_page);
356 ino = ino_of_node(node_page);
357 f2fs_put_page(node_page, 1);
358
359 if (ino != dn->inode->i_ino) {
360 /* Deallocate previous index in the node page */
361 inode = f2fs_iget(sbi->sb, ino);
362 if (IS_ERR(inode))
363 return PTR_ERR(inode);
364 } else {
365 inode = dn->inode;
366 }
367
368 bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
369
370 /*
371 * if inode page is locked, unlock temporarily, but its reference
372 * count keeps alive.
373 */
374 if (ino == dn->inode->i_ino && dn->inode_page_locked)
375 unlock_page(dn->inode_page);
376
377 set_new_dnode(&tdn, inode, NULL, NULL, 0);
378 if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
379 goto out;
380
381 if (tdn.data_blkaddr == blkaddr)
382 truncate_data_blocks_range(&tdn, 1);
383
384 f2fs_put_dnode(&tdn);
385 out:
386 if (ino != dn->inode->i_ino)
387 iput(inode);
388 else if (dn->inode_page_locked)
389 lock_page(dn->inode_page);
390 return 0;
391
392 truncate_out:
393 if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
394 truncate_data_blocks_range(&tdn, 1);
395 if (dn->inode->i_ino == nid && !dn->inode_page_locked)
396 unlock_page(dn->inode_page);
397 return 0;
398 }
399
400 static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
401 struct page *page, block_t blkaddr)
402 {
403 struct dnode_of_data dn;
404 struct node_info ni;
405 unsigned int start, end;
406 int err = 0, recovered = 0;
407
408 /* step 1: recover xattr */
409 if (IS_INODE(page)) {
410 recover_inline_xattr(inode, page);
411 } else if (f2fs_has_xattr_block(ofs_of_node(page))) {
412 /*
413 * Deprecated; xattr blocks should be found from cold log.
414 * But, we should remain this for backward compatibility.
415 */
416 recover_xattr_data(inode, page, blkaddr);
417 goto out;
418 }
419
420 /* step 2: recover inline data */
421 if (recover_inline_data(inode, page))
422 goto out;
423
424 /* step 3: recover data indices */
425 start = start_bidx_of_node(ofs_of_node(page), inode);
426 end = start + ADDRS_PER_PAGE(page, inode);
427
428 set_new_dnode(&dn, inode, NULL, NULL, 0);
429
430 err = get_dnode_of_data(&dn, start, ALLOC_NODE);
431 if (err)
432 goto out;
433
434 f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
435
436 get_node_info(sbi, dn.nid, &ni);
437 f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
438 f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
439
440 for (; start < end; start++, dn.ofs_in_node++) {
441 block_t src, dest;
442
443 src = datablock_addr(dn.node_page, dn.ofs_in_node);
444 dest = datablock_addr(page, dn.ofs_in_node);
445
446 /* skip recovering if dest is the same as src */
447 if (src == dest)
448 continue;
449
450 /* dest is invalid, just invalidate src block */
451 if (dest == NULL_ADDR) {
452 truncate_data_blocks_range(&dn, 1);
453 continue;
454 }
455
456 /*
457 * dest is reserved block, invalidate src block
458 * and then reserve one new block in dnode page.
459 */
460 if (dest == NEW_ADDR) {
461 truncate_data_blocks_range(&dn, 1);
462 err = reserve_new_block(&dn);
463 f2fs_bug_on(sbi, err);
464 continue;
465 }
466
467 /* dest is valid block, try to recover from src to dest */
468 if (is_valid_blkaddr(sbi, dest, META_POR)) {
469
470 if (src == NULL_ADDR) {
471 err = reserve_new_block(&dn);
472 /* We should not get -ENOSPC */
473 f2fs_bug_on(sbi, err);
474 }
475
476 /* Check the previous node page having this index */
477 err = check_index_in_prev_nodes(sbi, dest, &dn);
478 if (err)
479 goto err;
480
481 /* write dummy data page */
482 f2fs_replace_block(sbi, &dn, src, dest,
483 ni.version, false, false);
484 recovered++;
485 }
486 }
487
488 if (IS_INODE(dn.node_page))
489 sync_inode_page(&dn);
490
491 copy_node_footer(dn.node_page, page);
492 fill_node_footer(dn.node_page, dn.nid, ni.ino,
493 ofs_of_node(page), false);
494 set_page_dirty(dn.node_page);
495 err:
496 f2fs_put_dnode(&dn);
497 out:
498 f2fs_msg(sbi->sb, KERN_NOTICE,
499 "recover_data: ino = %lx, recovered = %d blocks, err = %d",
500 inode->i_ino, recovered, err);
501 return err;
502 }
503
504 static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
505 {
506 unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
507 struct curseg_info *curseg;
508 struct page *page = NULL;
509 int err = 0;
510 block_t blkaddr;
511
512 /* get node pages in the current segment */
513 curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
514 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
515
516 while (1) {
517 struct fsync_inode_entry *entry;
518
519 if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
520 break;
521
522 ra_meta_pages_cond(sbi, blkaddr);
523
524 page = get_tmp_page(sbi, blkaddr);
525
526 if (cp_ver != cpver_of_node(page)) {
527 f2fs_put_page(page, 1);
528 break;
529 }
530
531 entry = get_fsync_inode(head, ino_of_node(page));
532 if (!entry)
533 goto next;
534 /*
535 * inode(x) | CP | inode(x) | dnode(F)
536 * In this case, we can lose the latest inode(x).
537 * So, call recover_inode for the inode update.
538 */
539 if (IS_INODE(page))
540 recover_inode(entry->inode, page);
541 if (entry->last_dentry == blkaddr) {
542 err = recover_dentry(entry->inode, page);
543 if (err) {
544 f2fs_put_page(page, 1);
545 break;
546 }
547 }
548 err = do_recover_data(sbi, entry->inode, page, blkaddr);
549 if (err) {
550 f2fs_put_page(page, 1);
551 break;
552 }
553
554 if (entry->blkaddr == blkaddr)
555 del_fsync_inode(entry);
556 next:
557 /* check next segment */
558 blkaddr = next_blkaddr_of_node(page);
559 f2fs_put_page(page, 1);
560 }
561 if (!err)
562 allocate_new_segments(sbi);
563 return err;
564 }
565
566 int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
567 {
568 struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
569 struct list_head inode_list;
570 block_t blkaddr;
571 int err;
572 int ret = 0;
573 bool need_writecp = false;
574
575 fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
576 sizeof(struct fsync_inode_entry));
577 if (!fsync_entry_slab)
578 return -ENOMEM;
579
580 INIT_LIST_HEAD(&inode_list);
581
582 /* prevent checkpoint */
583 mutex_lock(&sbi->cp_mutex);
584
585 blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
586
587 /* step #1: find fsynced inode numbers */
588 err = find_fsync_dnodes(sbi, &inode_list);
589 if (err || list_empty(&inode_list))
590 goto out;
591
592 if (check_only) {
593 ret = 1;
594 goto out;
595 }
596
597 need_writecp = true;
598
599 /* step #2: recover data */
600 err = recover_data(sbi, &inode_list);
601 if (!err)
602 f2fs_bug_on(sbi, !list_empty(&inode_list));
603 out:
604 destroy_fsync_dnodes(&inode_list);
605 kmem_cache_destroy(fsync_entry_slab);
606
607 /* truncate meta pages to be used by the recovery */
608 truncate_inode_pages_range(META_MAPPING(sbi),
609 (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
610
611 if (err) {
612 truncate_inode_pages_final(NODE_MAPPING(sbi));
613 truncate_inode_pages_final(META_MAPPING(sbi));
614 }
615
616 clear_sbi_flag(sbi, SBI_POR_DOING);
617 if (err) {
618 bool invalidate = false;
619
620 if (discard_next_dnode(sbi, blkaddr))
621 invalidate = true;
622
623 /* Flush all the NAT/SIT pages */
624 while (get_pages(sbi, F2FS_DIRTY_META))
625 sync_meta_pages(sbi, META, LONG_MAX);
626
627 /* invalidate temporary meta page */
628 if (invalidate)
629 invalidate_mapping_pages(META_MAPPING(sbi),
630 blkaddr, blkaddr);
631
632 set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
633 mutex_unlock(&sbi->cp_mutex);
634 } else if (need_writecp) {
635 struct cp_control cpc = {
636 .reason = CP_RECOVERY,
637 };
638 mutex_unlock(&sbi->cp_mutex);
639 err = write_checkpoint(sbi, &cpc);
640 } else {
641 mutex_unlock(&sbi->cp_mutex);
642 }
643 return ret ? ret: err;
644 }
This page took 0.057344 seconds and 5 git commands to generate.