recovery.c source code [linux/fs/f2fs/recovery.c]

1	// SPDX-License-Identifier: GPL-2.0
2	/*
3	* fs/f2fs/recovery.c
4	*
5	* Copyright (c) 2012 Samsung Electronics Co., Ltd.
6	* http://www.samsung.com/
7	*/
8	#include <linux/unaligned.h>
9	#include <linux/fs.h>
10	#include <linux/f2fs_fs.h>
11	#include <linux/sched/mm.h>
12	#include "f2fs.h"
13	#include "node.h"
14	#include "segment.h"
15
16	/*
17	* Roll forward recovery scenarios.
18	*
19	* [Term] F: fsync_mark, D: dentry_mark
20	*
21	* 1. inode(x) \| CP \| inode(x) \| dnode(F)
22	* -> Update the latest inode(x).
23	*
24	* 2. inode(x) \| CP \| inode(F) \| dnode(F)
25	* -> No problem.
26	*
27	* 3. inode(x) \| CP \| dnode(F) \| inode(x)
28	* -> Recover to the latest dnode(F), and drop the last inode(x)
29	*
30	* 4. inode(x) \| CP \| dnode(F) \| inode(F)
31	* -> No problem.
32	*
33	* 5. CP \| inode(x) \| dnode(F)
34	* -> The inode(DF) was missing. Should drop this dnode(F).
35	*
36	* 6. CP \| inode(DF) \| dnode(F)
37	* -> No problem.
38	*
39	* 7. CP \| dnode(F) \| inode(DF)
40	* -> If f2fs_iget fails, then goto next to find inode(DF).
41	*
42	* 8. CP \| dnode(F) \| inode(x)
43	* -> If f2fs_iget fails, then goto next to find inode(DF).
44	* But it will fail due to no inode(DF).
45	*/
46
47	static struct kmem_cache *fsync_entry_slab;
48
49	bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
50	{
51	s64 nalloc = percpu_counter_sum_positive(fbc: &sbi->alloc_valid_block_count);
52
53	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
54	return false;
55	if (NM_I(sbi)->max_rf_node_blocks &&
56	percpu_counter_sum_positive(fbc: &sbi->rf_node_block_count) >=
57	NM_I(sbi)->max_rf_node_blocks)
58	return false;
59	return true;
60	}
61
62	static struct fsync_inode_entry get_fsync_inode(struct* list_head *head,
63	nid_t ino)
64	{
65	struct fsync_inode_entry *entry;
66
67	list_for_each_entry(entry, head, list)
68	if (entry->inode->i_ino == ino)
69	return entry;
70
71	return NULL;
72	}
73
74	static struct fsync_inode_entry add_fsync_inode(struct* f2fs_sb_info *sbi,
75	struct list_head *head, nid_t ino, bool quota_inode)
76	{
77	struct inode *inode;
78	struct fsync_inode_entry *entry;
79	int err;
80
81	inode = f2fs_iget_retry(sb: sbi->sb, ino);
82	if (IS_ERR(ptr: inode))
83	return ERR_CAST(ptr: inode);
84
85	err = f2fs_dquot_initialize(inode);
86	if (err)
87	goto err_out;
88
89	if (quota_inode) {
90	err = dquot_alloc_inode(inode);
91	if (err)
92	goto err_out;
93	}
94
95	entry = f2fs_kmem_cache_alloc(cachep: fsync_entry_slab,
96	GFP_F2FS_ZERO, nofail: true, NULL);
97	entry->inode = inode;
98	list_add_tail(new: &entry->list, head);
99
100	return entry;
101	err_out:
102	iput(inode);
103	return ERR_PTR(error: err);
104	}
105
106	static void del_fsync_inode(struct fsync_inode_entry entry, int* drop)
107	{
108	if (drop) {
109	/ inode should not be recovered, drop it /
110	f2fs_inode_synced(inode: entry->inode);
111	}
112	iput(entry->inode);
113	list_del(entry: &entry->list);
114	kmem_cache_free(s: fsync_entry_slab, objp: entry);
115	}
116
117	static int init_recovered_filename(const struct inode *dir,
118	struct f2fs_inode *raw_inode,
119	struct f2fs_filename *fname,
120	struct qstr *usr_fname)
121	{
122	int err;
123
124	memset(fname, `0`, sizeof(*fname));
125	fname->disk_name.len = le32_to_cpu(raw_inode->i_namelen);
126	fname->disk_name.name = raw_inode->i_name;
127
128	if (WARN_ON(fname->disk_name.len > F2FS_NAME_LEN))
129	return -ENAMETOOLONG;
130
131	if (!IS_ENCRYPTED(dir)) {
132	usr_fname->name = fname->disk_name.name;
133	usr_fname->len = fname->disk_name.len;
134	fname->usr_fname = usr_fname;
135	}
136
137	/ Compute the hash of the filename /
138	if (IS_ENCRYPTED(dir) && IS_CASEFOLDED(dir)) {
139	/*
140	* In this case the hash isn't computable without the key, so it
141	* was saved on-disk.
142	*/
143	if (fname->disk_name.len + sizeof(f2fs_hash_t) > F2FS_NAME_LEN)
144	return -EINVAL;
145	fname->hash = get_unaligned((f2fs_hash_t *)
146	&raw_inode->i_name[fname->disk_name.len]);
147	} else if (IS_CASEFOLDED(dir)) {
148	err = f2fs_init_casefolded_name(dir, fname);
149	if (err)
150	return err;
151	f2fs_hash_filename(dir, fname);
152	/ Case-sensitive match is fine for recovery /
153	f2fs_free_casefolded_name(fname);
154	} else {
155	f2fs_hash_filename(dir, fname);
156	}
157	return `0`;
158	}
159
160	static int recover_dentry(struct inode inode, struct* folio *ifolio,
161	struct list_head *dir_list)
162	{
163	struct f2fs_inode *raw_inode = F2FS_INODE(folio: ifolio);
164	nid_t pino = le32_to_cpu(raw_inode->i_pino);
165	struct f2fs_dir_entry *de;
166	struct f2fs_filename fname;
167	struct qstr usr_fname;
168	struct folio *folio;
169	struct inode dir, einode;
170	struct fsync_inode_entry *entry;
171	int err = `0`;
172	char *name;
173
174	entry = get_fsync_inode(head: dir_list, ino: pino);
175	if (!entry) {
176	entry = add_fsync_inode(sbi: F2FS_I_SB(inode), head: dir_list,
177	ino: pino, quota_inode: false);
178	if (IS_ERR(ptr: entry)) {
179	dir = ERR_CAST(ptr: entry);
180	err = PTR_ERR(ptr: entry);
181	goto out;
182	}
183	}
184
185	dir = entry->inode;
186	err = init_recovered_filename(dir, raw_inode, fname: &fname, usr_fname: &usr_fname);
187	if (err)
188	goto out;
189	retry:
190	de = __f2fs_find_entry(dir, fname: &fname, res_folio: &folio);
191	if (de && inode->i_ino == le32_to_cpu(de->ino))
192	goto out_put;
193
194	if (de) {
195	einode = f2fs_iget_retry(sb: inode->i_sb, le32_to_cpu(de->ino));
196	if (IS_ERR(ptr: einode)) {
197	WARN_ON(`1`);
198	err = PTR_ERR(ptr: einode);
199	if (err == -ENOENT)
200	err = -EEXIST;
201	goto out_put;
202	}
203
204	err = f2fs_dquot_initialize(inode: einode);
205	if (err) {
206	iput(einode);
207	goto out_put;
208	}
209
210	err = f2fs_acquire_orphan_inode(sbi: F2FS_I_SB(inode));
211	if (err) {
212	iput(einode);
213	goto out_put;
214	}
215	f2fs_delete_entry(dentry: de, folio, dir, inode: einode);
216	iput(einode);
217	goto retry;
218	} else if (IS_ERR(ptr: folio)) {
219	err = PTR_ERR(ptr: folio);
220	} else {
221	err = f2fs_add_dentry(dir, fname: &fname, inode,
222	ino: inode->i_ino, mode: inode->i_mode);
223	}
224	if (err == -ENOMEM)
225	goto retry;
226	goto out;
227
228	out_put:
229	f2fs_folio_put(folio, unlock: false);
230	out:
231	if (file_enc_name(inode))
232	name = "<encrypted>";
233	else
234	name = raw_inode->i_name;
235	f2fs_notice(F2FS_I_SB(inode), "%s: ino = %x, name = %s, dir = %lx, err = %d",
236	__func__, ino_of_node(ifolio), name,
237	IS_ERR(dir) ? `0` : dir->i_ino, err);
238	return err;
239	}
240
241	static int recover_quota_data(struct inode inode, struct* folio *folio)
242	{
243	struct f2fs_inode *raw = F2FS_INODE(folio);
244	struct iattr attr;
245	uid_t i_uid = le32_to_cpu(raw->i_uid);
246	gid_t i_gid = le32_to_cpu(raw->i_gid);
247	int err;
248
249	memset(&attr, `0`, sizeof(attr));
250
251	attr.ia_vfsuid = VFSUIDT_INIT(make_kuid(inode->i_sb->s_user_ns, i_uid));
252	attr.ia_vfsgid = VFSGIDT_INIT(make_kgid(inode->i_sb->s_user_ns, i_gid));
253
254	if (!vfsuid_eq(left: attr.ia_vfsuid, right: i_uid_into_vfsuid(idmap: &nop_mnt_idmap, inode)))
255	attr.ia_valid \|= ATTR_UID;
256	if (!vfsgid_eq(left: attr.ia_vfsgid, right: i_gid_into_vfsgid(idmap: &nop_mnt_idmap, inode)))
257	attr.ia_valid \|= ATTR_GID;
258
259	if (!attr.ia_valid)
260	return `0`;
261
262	err = dquot_transfer(idmap: &nop_mnt_idmap, inode, iattr: &attr);
263	if (err)
264	set_sbi_flag(sbi: F2FS_I_SB(inode), type: SBI_QUOTA_NEED_REPAIR);
265	return err;
266	}
267
268	static void recover_inline_flags(struct inode inode, struct* f2fs_inode *ri)
269	{
270	if (ri->i_inline & F2FS_PIN_FILE)
271	set_inode_flag(inode, flag: FI_PIN_FILE);
272	else
273	clear_inode_flag(inode, flag: FI_PIN_FILE);
274	if (ri->i_inline & F2FS_DATA_EXIST)
275	set_inode_flag(inode, flag: FI_DATA_EXIST);
276	else
277	clear_inode_flag(inode, flag: FI_DATA_EXIST);
278	}
279
280	static int recover_inode(struct inode inode, struct* folio *folio)
281	{
282	struct f2fs_inode *raw = F2FS_INODE(folio);
283	struct f2fs_inode_info *fi = F2FS_I(inode);
284	char *name;
285	int err;
286
287	inode->i_mode = le16_to_cpu(raw->i_mode);
288
289	err = recover_quota_data(inode, folio);
290	if (err)
291	return err;
292
293	i_uid_write(inode, le32_to_cpu(raw->i_uid));
294	i_gid_write(inode, le32_to_cpu(raw->i_gid));
295
296	if (raw->i_inline & F2FS_EXTRA_ATTR) {
297	if (f2fs_sb_has_project_quota(sbi: F2FS_I_SB(inode)) &&
298	F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
299	i_projid)) {
300	projid_t i_projid;
301	kprojid_t kprojid;
302
303	i_projid = (projid_t)le32_to_cpu(raw->i_projid);
304	kprojid = make_kprojid(from: &init_user_ns, projid: i_projid);
305
306	if (!projid_eq(left: kprojid, right: fi->i_projid)) {
307	err = f2fs_transfer_project_quota(inode,
308	kprojid);
309	if (err)
310	return err;
311	fi->i_projid = kprojid;
312	}
313	}
314	}
315
316	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
317	inode_set_atime(inode, le64_to_cpu(raw->i_atime),
318	le32_to_cpu(raw->i_atime_nsec));
319	inode_set_ctime(inode, le64_to_cpu(raw->i_ctime),
320	le32_to_cpu(raw->i_ctime_nsec));
321	inode_set_mtime(inode, le64_to_cpu(raw->i_mtime),
322	le32_to_cpu(raw->i_mtime_nsec));
323
324	fi->i_advise = raw->i_advise;
325	fi->i_flags = le32_to_cpu(raw->i_flags);
326	f2fs_set_inode_flags(inode);
327	fi->i_gc_failures = le16_to_cpu(raw->i_gc_failures);
328
329	recover_inline_flags(inode, ri: raw);
330
331	f2fs_mark_inode_dirty_sync(inode, sync: true);
332
333	if (file_enc_name(inode))
334	name = "<encrypted>";
335	else
336	name = F2FS_INODE(folio)->i_name;
337
338	f2fs_notice(F2FS_I_SB(inode), "recover_inode: ino = %x, name = %s, inline = %x",
339	ino_of_node(folio), name, raw->i_inline);
340	return `0`;
341	}
342
343	static unsigned int adjust_por_ra_blocks(struct f2fs_sb_info *sbi,
344	unsigned int ra_blocks, unsigned int blkaddr,
345	unsigned int next_blkaddr)
346	{
347	if (blkaddr + `1` == next_blkaddr)
348	ra_blocks = min_t(unsigned int, RECOVERY_MAX_RA_BLOCKS,
349	ra_blocks * `2`);
350	else if (next_blkaddr % BLKS_PER_SEG(sbi))
351	ra_blocks = max_t(unsigned int, RECOVERY_MIN_RA_BLOCKS,
352	ra_blocks / `2`);
353	return ra_blocks;
354	}
355
356	/ Detect looped node chain with Floyd's cycle detection algorithm. /
357	static int sanity_check_node_chain(struct f2fs_sb_info *sbi, block_t blkaddr,
358	block_t blkaddr_fast, bool is_detecting)
359	{
360	unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
361	int i;
362
363	if (!*is_detecting)
364	return `0`;
365
366	for (i = `0`; i < `2`; i++) {
367	struct folio *folio;
368
369	if (!f2fs_is_valid_blkaddr(sbi, blkaddr: *blkaddr_fast, type: META_POR)) {
370	*is_detecting = false;
371	return `0`;
372	}
373
374	folio = f2fs_get_tmp_folio(sbi, index: *blkaddr_fast);
375	if (IS_ERR(ptr: folio))
376	return PTR_ERR(ptr: folio);
377
378	if (!is_recoverable_dnode(folio)) {
379	f2fs_folio_put(folio, unlock: true);
380	*is_detecting = false;
381	return `0`;
382	}
383
384	ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr: *blkaddr_fast,
385	next_blkaddr: next_blkaddr_of_node(node_folio: folio));
386
387	*blkaddr_fast = next_blkaddr_of_node(node_folio: folio);
388	f2fs_folio_put(folio, unlock: true);
389
390	f2fs_ra_meta_pages_cond(sbi, index: *blkaddr_fast, ra_blocks);
391	}
392
393	if (*blkaddr_fast == blkaddr) {
394	f2fs_notice(sbi, "%s: Detect looped node chain on blkaddr:%u."
395	" Run fsck to fix it.", __func__, blkaddr);
396	return -EINVAL;
397	}
398	return `0`;
399	}
400
401	static int find_fsync_dnodes(struct f2fs_sb_info sbi, struct* list_head *head,
402	bool check_only, bool *new_inode)
403	{
404	struct curseg_info *curseg;
405	block_t blkaddr, blkaddr_fast;
406	bool is_detecting = true;
407	int err = `0`;
408
409	/ get node pages in the current segment /
410	curseg = CURSEG_I(sbi, type: CURSEG_WARM_NODE);
411	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
412	blkaddr_fast = blkaddr;
413
414	while (`1`) {
415	struct fsync_inode_entry *entry;
416	struct folio *folio;
417
418	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type: META_POR))
419	return `0`;
420
421	folio = f2fs_get_tmp_folio(sbi, index: blkaddr);
422	if (IS_ERR(ptr: folio)) {
423	err = PTR_ERR(ptr: folio);
424	break;
425	}
426
427	if (!is_recoverable_dnode(folio)) {
428	f2fs_folio_put(folio, unlock: true);
429	break;
430	}
431
432	if (!is_fsync_dnode(folio))
433	goto next;
434
435	entry = get_fsync_inode(head, ino: ino_of_node(node_folio: folio));
436	if (!entry) {
437	bool quota_inode = false;
438
439	if (!check_only &&
440	IS_INODE(folio) &&
441	is_dent_dnode(folio)) {
442	err = f2fs_recover_inode_page(sbi, folio);
443	if (err) {
444	f2fs_folio_put(folio, unlock: true);
445	break;
446	}
447	quota_inode = true;
448	}
449
450	entry = add_fsync_inode(sbi, head, ino: ino_of_node(node_folio: folio),
451	quota_inode);
452	if (IS_ERR(ptr: entry)) {
453	err = PTR_ERR(ptr: entry);
454	/*
455	* CP \| dnode(F) \| inode(DF)
456	* For this case, we should not give up now.
457	*/
458	if (err == -ENOENT) {
459	if (check_only)
460	*new_inode = true;
461	goto next;
462	}
463	f2fs_folio_put(folio, unlock: true);
464	break;
465	}
466	}
467	entry->blkaddr = blkaddr;
468
469	if (IS_INODE(folio) && is_dent_dnode(folio))
470	entry->last_dentry = blkaddr;
471	next:
472	/ check next segment /
473	blkaddr = next_blkaddr_of_node(node_folio: folio);
474	f2fs_folio_put(folio, unlock: true);
475
476	err = sanity_check_node_chain(sbi, blkaddr, blkaddr_fast: &blkaddr_fast,
477	is_detecting: &is_detecting);
478	if (err)
479	break;
480	}
481	return err;
482	}
483
484	static void destroy_fsync_dnodes(struct list_head head, int* drop)
485	{
486	struct fsync_inode_entry entry, tmp;
487
488	list_for_each_entry_safe(entry, tmp, head, list)
489	del_fsync_inode(entry, drop);
490	}
491
492	static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
493	block_t blkaddr, struct dnode_of_data *dn)
494	{
495	struct seg_entry *sentry;
496	unsigned int segno = GET_SEGNO(sbi, blkaddr);
497	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
498	struct f2fs_summary_block *sum_node;
499	struct f2fs_summary sum;
500	struct folio sum_folio, node_folio;
501	struct dnode_of_data tdn = *dn;
502	nid_t ino, nid;
503	struct inode *inode;
504	unsigned int offset, ofs_in_node, max_addrs;
505	block_t bidx;
506	int i;
507
508	sentry = get_seg_entry(sbi, segno);
509	if (!f2fs_test_bit(nr: blkoff, addr: sentry->cur_valid_map))
510	return `0`;
511
512	/ Get the previous summary /
513	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
514	struct curseg_info *curseg = CURSEG_I(sbi, type: i);
515
516	if (curseg->segno == segno) {
517	sum = curseg->sum_blk->entries[blkoff];
518	goto got_it;
519	}
520	}
521
522	sum_folio = f2fs_get_sum_folio(sbi, segno);
523	if (IS_ERR(ptr: sum_folio))
524	return PTR_ERR(ptr: sum_folio);
525	sum_node = SUM_BLK_PAGE_ADDR(sum_folio, segno);
526	sum = sum_node->entries[blkoff];
527	f2fs_folio_put(folio: sum_folio, unlock: true);
528	got_it:
529	/ Use the locked dnode page and inode /
530	nid = le32_to_cpu(sum.nid);
531	ofs_in_node = le16_to_cpu(sum.ofs_in_node);
532
533	max_addrs = ADDRS_PER_PAGE(dn->node_folio, dn->inode);
534	if (ofs_in_node >= max_addrs) {
535	f2fs_err(sbi, "Inconsistent ofs_in_node:%u in summary, ino:%lu, nid:%u, max:%u",
536	ofs_in_node, dn->inode->i_ino, nid, max_addrs);
537	f2fs_handle_error(sbi, error: ERROR_INCONSISTENT_SUMMARY);
538	return -EFSCORRUPTED;
539	}
540
541	if (dn->inode->i_ino == nid) {
542	tdn.nid = nid;
543	if (!dn->inode_folio_locked)
544	folio_lock(folio: dn->inode_folio);
545	tdn.node_folio = dn->inode_folio;
546	tdn.ofs_in_node = ofs_in_node;
547	goto truncate_out;
548	} else if (dn->nid == nid) {
549	tdn.ofs_in_node = ofs_in_node;
550	goto truncate_out;
551	}
552
553	/ Get the node page /
554	node_folio = f2fs_get_node_folio(sbi, nid, node_type: NODE_TYPE_REGULAR);
555	if (IS_ERR(ptr: node_folio))
556	return PTR_ERR(ptr: node_folio);
557
558	offset = ofs_of_node(node_folio);
559	ino = ino_of_node(node_folio);
560	f2fs_folio_put(folio: node_folio, unlock: true);
561
562	if (ino != dn->inode->i_ino) {
563	int ret;
564
565	/ Deallocate previous index in the node page /
566	inode = f2fs_iget_retry(sb: sbi->sb, ino);
567	if (IS_ERR(ptr: inode))
568	return PTR_ERR(ptr: inode);
569
570	ret = f2fs_dquot_initialize(inode);
571	if (ret) {
572	iput(inode);
573	return ret;
574	}
575	} else {
576	inode = dn->inode;
577	}
578
579	bidx = f2fs_start_bidx_of_node(node_ofs: offset, inode) +
580	le16_to_cpu(sum.ofs_in_node);
581
582	/*
583	* if inode page is locked, unlock temporarily, but its reference
584	* count keeps alive.
585	*/
586	if (ino == dn->inode->i_ino && dn->inode_folio_locked)
587	folio_unlock(folio: dn->inode_folio);
588
589	set_new_dnode(dn: &tdn, inode, NULL, NULL, nid: `0`);
590	if (f2fs_get_dnode_of_data(dn: &tdn, index: bidx, mode: LOOKUP_NODE))
591	goto out;
592
593	if (tdn.data_blkaddr == blkaddr)
594	f2fs_truncate_data_blocks_range(dn: &tdn, count: `1`);
595
596	f2fs_put_dnode(dn: &tdn);
597	out:
598	if (ino != dn->inode->i_ino)
599	iput(inode);
600	else if (dn->inode_folio_locked)
601	folio_lock(folio: dn->inode_folio);
602	return `0`;
603
604	truncate_out:
605	if (f2fs_data_blkaddr(dn: &tdn) == blkaddr)
606	f2fs_truncate_data_blocks_range(dn: &tdn, count: `1`);
607	if (dn->inode->i_ino == nid && !dn->inode_folio_locked)
608	folio_unlock(folio: dn->inode_folio);
609	return `0`;
610	}
611
612	static int f2fs_reserve_new_block_retry(struct dnode_of_data *dn)
613	{
614	int i, err = `0`;
615
616	for (i = DEFAULT_FAILURE_RETRY_COUNT; i > `0`; i--) {
617	err = f2fs_reserve_new_block(dn);
618	if (!err)
619	break;
620	}
621
622	return err;
623	}
624
625	static int do_recover_data(struct f2fs_sb_info sbi, struct* inode *inode,
626	struct folio *folio)
627	{
628	struct dnode_of_data dn;
629	struct node_info ni;
630	unsigned int start = `0`, end = `0`, index;
631	int err = `0`, recovered = `0`;
632
633	/ step 1: recover xattr /
634	if (IS_INODE(folio)) {
635	err = f2fs_recover_inline_xattr(inode, folio);
636	if (err)
637	goto out;
638	} else if (f2fs_has_xattr_block(ofs: ofs_of_node(node_folio: folio))) {
639	err = f2fs_recover_xattr_data(inode, folio);
640	if (!err)
641	recovered++;
642	goto out;
643	}
644
645	/ step 2: recover inline data /
646	err = f2fs_recover_inline_data(inode, nfolio: folio);
647	if (err) {
648	if (err == `1`)
649	err = `0`;
650	goto out;
651	}
652
653	/ step 3: recover data indices /
654	start = f2fs_start_bidx_of_node(node_ofs: ofs_of_node(node_folio: folio), inode);
655	end = start + ADDRS_PER_PAGE(folio, inode);
656
657	set_new_dnode(dn: &dn, inode, NULL, NULL, nid: `0`);
658	retry_dn:
659	err = f2fs_get_dnode_of_data(dn: &dn, index: start, mode: ALLOC_NODE);
660	if (err) {
661	if (err == -ENOMEM) {
662	memalloc_retry_wait(GFP_NOFS);
663	goto retry_dn;
664	}
665	goto out;
666	}
667
668	f2fs_folio_wait_writeback(folio: dn.node_folio, type: NODE, ordered: true, locked: true);
669
670	err = f2fs_get_node_info(sbi, nid: dn.nid, ni: &ni, checkpoint_context: false);
671	if (err)
672	goto err;
673
674	f2fs_bug_on(sbi, ni.ino != ino_of_node(folio));
675
676	if (ofs_of_node(node_folio: dn.node_folio) != ofs_of_node(node_folio: folio)) {
677	f2fs_warn(sbi, "Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
678	inode->i_ino, ofs_of_node(dn.node_folio),
679	ofs_of_node(folio));
680	err = -EFSCORRUPTED;
681	f2fs_handle_error(sbi, error: ERROR_INCONSISTENT_FOOTER);
682	goto err;
683	}
684
685	for (index = start; index < end; index++, dn.ofs_in_node++) {
686	block_t src, dest;
687
688	src = f2fs_data_blkaddr(dn: &dn);
689	dest = data_blkaddr(inode: dn.inode, node_folio: folio, offset: dn.ofs_in_node);
690
691	if (__is_valid_data_blkaddr(blkaddr: src) &&
692	!f2fs_is_valid_blkaddr(sbi, blkaddr: src, type: META_POR)) {
693	err = -EFSCORRUPTED;
694	goto err;
695	}
696
697	if (__is_valid_data_blkaddr(blkaddr: dest) &&
698	!f2fs_is_valid_blkaddr(sbi, blkaddr: dest, type: META_POR)) {
699	err = -EFSCORRUPTED;
700	goto err;
701	}
702
703	/ skip recovering if dest is the same as src /
704	if (src == dest)
705	continue;
706
707	/ dest is invalid, just invalidate src block /
708	if (dest == NULL_ADDR) {
709	f2fs_truncate_data_blocks_range(dn: &dn, count: `1`);
710	continue;
711	}
712
713	if (!file_keep_isize(inode) &&
714	(i_size_read(inode) <= ((loff_t)index << PAGE_SHIFT)))
715	f2fs_i_size_write(inode,
716	i_size: (loff_t)(index + `1`) << PAGE_SHIFT);
717
718	/*
719	* dest is reserved block, invalidate src block
720	* and then reserve one new block in dnode page.
721	*/
722	if (dest == NEW_ADDR) {
723	f2fs_truncate_data_blocks_range(dn: &dn, count: `1`);
724
725	err = f2fs_reserve_new_block_retry(dn: &dn);
726	if (err)
727	goto err;
728	continue;
729	}
730
731	/ dest is valid block, try to recover from src to dest /
732	if (f2fs_is_valid_blkaddr(sbi, blkaddr: dest, type: META_POR)) {
733	if (src == NULL_ADDR) {
734	err = f2fs_reserve_new_block_retry(dn: &dn);
735	if (err)
736	goto err;
737	}
738	retry_prev:
739	/ Check the previous node page having this index /
740	err = check_index_in_prev_nodes(sbi, blkaddr: dest, dn: &dn);
741	if (err) {
742	if (err == -ENOMEM) {
743	memalloc_retry_wait(GFP_NOFS);
744	goto retry_prev;
745	}
746	goto err;
747	}
748
749	if (f2fs_is_valid_blkaddr(sbi, blkaddr: dest,
750	type: DATA_GENERIC_ENHANCE_UPDATE)) {
751	f2fs_err(sbi, "Inconsistent dest blkaddr:%u, ino:%lu, ofs:%u",
752	dest, inode->i_ino, dn.ofs_in_node);
753	err = -EFSCORRUPTED;
754	goto err;
755	}
756
757	/ write dummy data page /
758	f2fs_replace_block(sbi, dn: &dn, old_addr: src, new_addr: dest,
759	version: ni.version, recover_curseg: false, recover_newaddr: false);
760	recovered++;
761	}
762	}
763
764	copy_node_footer(dst: dn.node_folio, src: folio);
765	fill_node_footer(folio: dn.node_folio, nid: dn.nid, ino: ni.ino,
766	ofs: ofs_of_node(node_folio: folio), reset: false);
767	folio_mark_dirty(folio: dn.node_folio);
768	err:
769	f2fs_put_dnode(dn: &dn);
770	out:
771	f2fs_notice(sbi, "recover_data: ino = %lx, nid = %x (i_size: %s), "
772	"range (%u, %u), recovered = %d, err = %d",
773	inode->i_ino, nid_of_node(folio),
774	file_keep_isize(inode) ? "keep" : "recover",
775	start, end, recovered, err);
776	return err;
777	}
778
779	static int recover_data(struct f2fs_sb_info sbi, struct* list_head *inode_list,
780	struct list_head tmp_inode_list, struct* list_head *dir_list)
781	{
782	struct curseg_info *curseg;
783	int err = `0`;
784	block_t blkaddr;
785	unsigned int ra_blocks = RECOVERY_MAX_RA_BLOCKS;
786	unsigned int recoverable_dnode = `0`;
787	unsigned int fsynced_dnode = `0`;
788	unsigned int total_dnode = `0`;
789	unsigned int recovered_inode = `0`;
790	unsigned int recovered_dentry = `0`;
791	unsigned int recovered_dnode = `0`;
792
793	f2fs_notice(sbi, "do_recover_data: start to recover dnode");
794
795	/ get node pages in the current segment /
796	curseg = CURSEG_I(sbi, type: CURSEG_WARM_NODE);
797	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
798
799	while (`1`) {
800	struct fsync_inode_entry *entry;
801	struct folio *folio;
802
803	if (!f2fs_is_valid_blkaddr(sbi, blkaddr, type: META_POR))
804	break;
805
806	folio = f2fs_get_tmp_folio(sbi, index: blkaddr);
807	if (IS_ERR(ptr: folio)) {
808	err = PTR_ERR(ptr: folio);
809	break;
810	}
811
812	if (!is_recoverable_dnode(folio)) {
813	f2fs_folio_put(folio, unlock: true);
814	break;
815	}
816	recoverable_dnode++;
817
818	entry = get_fsync_inode(head: inode_list, ino: ino_of_node(node_folio: folio));
819	if (!entry)
820	goto next;
821	fsynced_dnode++;
822	/*
823	* inode(x) \| CP \| inode(x) \| dnode(F)
824	* In this case, we can lose the latest inode(x).
825	* So, call recover_inode for the inode update.
826	*/
827	if (IS_INODE(folio)) {
828	err = recover_inode(inode: entry->inode, folio);
829	if (err) {
830	f2fs_folio_put(folio, unlock: true);
831	break;
832	}
833	recovered_inode++;
834	}
835	if (entry->last_dentry == blkaddr) {
836	err = recover_dentry(inode: entry->inode, ifolio: folio, dir_list);
837	if (err) {
838	f2fs_folio_put(folio, unlock: true);
839	break;
840	}
841	recovered_dentry++;
842	}
843	err = do_recover_data(sbi, inode: entry->inode, folio);
844	if (err) {
845	f2fs_folio_put(folio, unlock: true);
846	break;
847	}
848	recovered_dnode++;
849
850	if (entry->blkaddr == blkaddr)
851	list_move_tail(list: &entry->list, head: tmp_inode_list);
852	next:
853	ra_blocks = adjust_por_ra_blocks(sbi, ra_blocks, blkaddr,
854	next_blkaddr: next_blkaddr_of_node(node_folio: folio));
855
856	/ check next segment /
857	blkaddr = next_blkaddr_of_node(node_folio: folio);
858	f2fs_folio_put(folio, unlock: true);
859
860	f2fs_ra_meta_pages_cond(sbi, index: blkaddr, ra_blocks);
861	total_dnode++;
862	}
863	if (!err)
864	err = f2fs_allocate_new_segments(sbi);
865
866	f2fs_notice(sbi, "do_recover_data: dnode: (recoverable: %u, fsynced: %u, "
867	"total: %u), recovered: (inode: %u, dentry: %u, dnode: %u), err: %d",
868	recoverable_dnode, fsynced_dnode, total_dnode, recovered_inode,
869	recovered_dentry, recovered_dnode, err);
870	return err;
871	}
872
873	int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
874	{
875	LIST_HEAD(inode_list);
876	LIST_HEAD(tmp_inode_list);
877	LIST_HEAD(dir_list);
878	int err;
879	int ret = `0`;
880	unsigned long s_flags = sbi->sb->s_flags;
881	bool need_writecp = false;
882	bool new_inode = false;
883
884	f2fs_notice(sbi, "f2fs_recover_fsync_data: recovery fsync data, "
885	"check_only: %d", check_only);
886
887	if (is_sbi_flag_set(sbi, type: SBI_IS_WRITABLE))
888	f2fs_info(sbi, "recover fsync data on readonly fs");
889
890	/ prevent checkpoint /
891	f2fs_down_write(sem: &sbi->cp_global_sem);
892
893	/ step #1: find fsynced inode numbers /
894	err = find_fsync_dnodes(sbi, head: &inode_list, check_only, new_inode: &new_inode);
895	if (err < `0` \|\| (list_empty(head: &inode_list) && (!check_only \|\| !new_inode)))
896	goto skip;
897
898	if (check_only) {
899	ret = `1`;
900	goto skip;
901	}
902
903	need_writecp = true;
904
905	/ step #2: recover data /
906	err = recover_data(sbi, inode_list: &inode_list, tmp_inode_list: &tmp_inode_list, dir_list: &dir_list);
907	if (!err)
908	f2fs_bug_on(sbi, !list_empty(&inode_list));
909	else
910	f2fs_bug_on(sbi, sbi->sb->s_flags & SB_ACTIVE);
911	skip:
912	destroy_fsync_dnodes(head: &inode_list, drop: err);
913	destroy_fsync_dnodes(head: &tmp_inode_list, drop: err);
914
915	/ truncate meta pages to be used by the recovery /
916	truncate_inode_pages_range(mapping: META_MAPPING(sbi),
917	lstart: (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, lend: -`1`);
918
919	if (err) {
920	truncate_inode_pages_final(mapping: NODE_MAPPING(sbi));
921	truncate_inode_pages_final(mapping: META_MAPPING(sbi));
922	}
923
924	/*
925	* If fsync data succeeds or there is no fsync data to recover,
926	* and the f2fs is not read only, check and fix zoned block devices'
927	* write pointer consistency.
928	*/
929	if (!err)
930	err = f2fs_check_and_fix_write_pointer(sbi);
931
932	if (!err)
933	clear_sbi_flag(sbi, type: SBI_POR_DOING);
934
935	f2fs_up_write(sem: &sbi->cp_global_sem);
936
937	/ let's drop all the directory inodes for clean checkpoint /
938	destroy_fsync_dnodes(head: &dir_list, drop: err);
939
940	if (need_writecp) {
941	set_sbi_flag(sbi, type: SBI_IS_RECOVERED);
942
943	if (!err) {
944	struct cp_control cpc = {
945	.reason = CP_RECOVERY,
946	};
947	stat_inc_cp_call_count(sbi, TOTAL_CALL);
948	err = f2fs_write_checkpoint(sbi, cpc: &cpc);
949	}
950	}
951
952	sbi->sb->s_flags = s_flags; / Restore SB_RDONLY status /
953
954	return ret ? ret : err;
955	}
956
957	int __init f2fs_create_recovery_cache(void)
958	{
959	fsync_entry_slab = f2fs_kmem_cache_create(name: "f2fs_fsync_inode_entry",
960	size: sizeof(struct fsync_inode_entry));
961	return fsync_entry_slab ? `0` : -ENOMEM;
962	}
963
964	void f2fs_destroy_recovery_cache(void)
965	{
966	kmem_cache_destroy(s: fsync_entry_slab);
967	}
968

source code of linux/fs/f2fs/recovery.c