recoverd.c source code [linux/fs/dlm/recoverd.c]

1	// SPDX-License-Identifier: GPL-2.0-only
2	/******************************************************************************
3	*******************************************************************************
4	**
5	** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
6	** Copyright (C) 2004-2011 Red Hat, Inc. All rights reserved.
7	**
8	**
9	*******************************************************************************
10	******************************************************************************/
11
12	#include "dlm_internal.h"
13	#include "lockspace.h"
14	#include "member.h"
15	#include "dir.h"
16	#include "ast.h"
17	#include "recover.h"
18	#include "lowcomms.h"
19	#include "lock.h"
20	#include "requestqueue.h"
21	#include "recoverd.h"
22
23	static int dlm_create_masters_list(struct dlm_ls *ls)
24	{
25	struct dlm_rsb *r;
26	int error = `0`;
27
28	write_lock_bh(&ls->ls_masters_lock);
29	if (!list_empty(head: &ls->ls_masters_list)) {
30	log_error(ls, "root list not empty");
31	error = -EINVAL;
32	goto out;
33	}
34
35	read_lock_bh(&ls->ls_rsbtbl_lock);
36	list_for_each_entry(r, &ls->ls_slow_active, res_slow_list) {
37	if (r->res_nodeid)
38	continue;
39
40	list_add(new: &r->res_masters_list, head: &ls->ls_masters_list);
41	dlm_hold_rsb(r);
42	}
43	read_unlock_bh(&ls->ls_rsbtbl_lock);
44	out:
45	write_unlock_bh(&ls->ls_masters_lock);
46	return error;
47	}
48
49	static void dlm_release_masters_list(struct dlm_ls *ls)
50	{
51	struct dlm_rsb r, safe;
52
53	write_lock_bh(&ls->ls_masters_lock);
54	list_for_each_entry_safe(r, safe, &ls->ls_masters_list, res_masters_list) {
55	list_del_init(entry: &r->res_masters_list);
56	dlm_put_rsb(r);
57	}
58	write_unlock_bh(&ls->ls_masters_lock);
59	}
60
61	static void dlm_create_root_list(struct dlm_ls ls, struct* list_head *root_list)
62	{
63	struct dlm_rsb *r;
64
65	read_lock_bh(&ls->ls_rsbtbl_lock);
66	list_for_each_entry(r, &ls->ls_slow_active, res_slow_list) {
67	list_add(new: &r->res_root_list, head: root_list);
68	dlm_hold_rsb(r);
69	}
70
71	WARN_ON_ONCE(!list_empty(&ls->ls_slow_inactive));
72	read_unlock_bh(&ls->ls_rsbtbl_lock);
73	}
74
75	static void dlm_release_root_list(struct list_head *root_list)
76	{
77	struct dlm_rsb r, safe;
78
79	list_for_each_entry_safe(r, safe, root_list, res_root_list) {
80	list_del_init(entry: &r->res_root_list);
81	dlm_put_rsb(r);
82	}
83	}
84
85	/ If the start for which we're re-enabling locking (seq) has been superseded*
86	by a newer stop (ls_recover_seq), we need to leave locking disabled.
87
88	We suspend dlm_recv threads here to avoid the race where dlm_recv a) sees
89	locking stopped and b) adds a message to the requestqueue, but dlm_recoverd
90	enables locking and clears the requestqueue between a and b. /*
91
92	static int enable_locking(struct dlm_ls *ls, uint64_t seq)
93	{
94	int error = -EINTR;
95
96	write_lock_bh(&ls->ls_recv_active);
97
98	spin_lock_bh(lock: &ls->ls_recover_lock);
99	if (ls->ls_recover_seq == seq) {
100	set_bit(LSFL_RUNNING, addr: &ls->ls_flags);
101	/ Schedule next timer if recovery put something on inactive.*
102	*
103	* The rsbs that was queued while recovery on toss hasn't
104	* started yet because LSFL_RUNNING was set everything
105	* else recovery hasn't started as well because ls_in_recovery
106	* is still hold. So we should not run into the case that
107	* resume_scan_timer() queues a timer that can occur in
108	* a no op.
109	*/
110	resume_scan_timer(ls);
111	/ unblocks processes waiting to enter the dlm /
112	up_write(sem: &ls->ls_in_recovery);
113	clear_bit(LSFL_RECOVER_LOCK, addr: &ls->ls_flags);
114	error = `0`;
115	}
116	spin_unlock_bh(lock: &ls->ls_recover_lock);
117
118	write_unlock_bh(&ls->ls_recv_active);
119	return error;
120	}
121
122	static int ls_recover(struct dlm_ls ls, struct* dlm_recover *rv)
123	{
124	LIST_HEAD(root_list);
125	unsigned long start;
126	int error, neg = `0`;
127
128	log_rinfo(ls, "dlm_recover %llu", (unsigned long long)rv->seq);
129
130	mutex_lock(&ls->ls_recoverd_active);
131
132	dlm_callback_suspend(ls);
133
134	dlm_clear_inactive(ls);
135
136	/*
137	* This list of root rsb's will be the basis of most of the recovery
138	* routines.
139	*/
140
141	dlm_create_root_list(ls, root_list: &root_list);
142
143	/*
144	* Add or remove nodes from the lockspace's ls_nodes list.
145	*
146	* Due to the fact that we must report all membership changes to lsops
147	* or midcomms layer, it is not permitted to abort ls_recover() until
148	* this is done.
149	*/
150
151	error = dlm_recover_members(ls, rv, neg_out: &neg);
152	if (error) {
153	log_rinfo(ls, "dlm_recover_members error %d", error);
154	goto fail_root_list;
155	}
156
157	dlm_recover_dir_nodeid(ls, root_list: &root_list);
158
159	/ Create a snapshot of all active rsbs were we are the master of.*
160	* During the barrier between dlm_recover_members_wait() and
161	* dlm_recover_directory() other nodes can dump their necessary
162	* directory dlm_rsb (r->res_dir_nodeid == nodeid) in rcom
163	* communication dlm_copy_master_names() handling.
164	*
165	* TODO We should create a per lockspace list that contains rsbs
166	* that we are the master of. Instead of creating this list while
167	* recovery we keep track of those rsbs while locking handling and
168	* recovery can use it when necessary.
169	*/
170	error = dlm_create_masters_list(ls);
171	if (error) {
172	log_rinfo(ls, "dlm_create_masters_list error %d", error);
173	goto fail_root_list;
174	}
175
176	ls->ls_recover_locks_in = `0`;
177
178	dlm_set_recover_status(ls, DLM_RS_NODES);
179
180	error = dlm_recover_members_wait(ls, seq: rv->seq);
181	if (error) {
182	log_rinfo(ls, "dlm_recover_members_wait error %d", error);
183	dlm_release_masters_list(ls);
184	goto fail_root_list;
185	}
186
187	start = jiffies;
188
189	/*
190	* Rebuild our own share of the directory by collecting from all other
191	* nodes their master rsb names that hash to us.
192	*/
193
194	error = dlm_recover_directory(ls, seq: rv->seq);
195	if (error) {
196	log_rinfo(ls, "dlm_recover_directory error %d", error);
197	dlm_release_masters_list(ls);
198	goto fail_root_list;
199	}
200
201	dlm_set_recover_status(ls, DLM_RS_DIR);
202
203	error = dlm_recover_directory_wait(ls, seq: rv->seq);
204	if (error) {
205	log_rinfo(ls, "dlm_recover_directory_wait error %d", error);
206	dlm_release_masters_list(ls);
207	goto fail_root_list;
208	}
209
210	dlm_release_masters_list(ls);
211
212	/*
213	* We may have outstanding operations that are waiting for a reply from
214	* a failed node. Mark these to be resent after recovery. Unlock and
215	* cancel ops can just be completed.
216	*/
217
218	dlm_recover_waiters_pre(ls);
219
220	if (dlm_recovery_stopped(ls)) {
221	error = -EINTR;
222	goto fail_root_list;
223	}
224
225	if (neg \|\| dlm_no_directory(ls)) {
226	/*
227	* Clear lkb's for departed nodes.
228	*/
229
230	dlm_recover_purge(ls, root_list: &root_list);
231
232	/*
233	* Get new master nodeid's for rsb's that were mastered on
234	* departed nodes.
235	*/
236
237	error = dlm_recover_masters(ls, seq: rv->seq, root_list: &root_list);
238	if (error) {
239	log_rinfo(ls, "dlm_recover_masters error %d", error);
240	goto fail_root_list;
241	}
242
243	/*
244	* Send our locks on remastered rsb's to the new masters.
245	*/
246
247	error = dlm_recover_locks(ls, seq: rv->seq, root_list: &root_list);
248	if (error) {
249	log_rinfo(ls, "dlm_recover_locks error %d", error);
250	goto fail_root_list;
251	}
252
253	dlm_set_recover_status(ls, DLM_RS_LOCKS);
254
255	error = dlm_recover_locks_wait(ls, seq: rv->seq);
256	if (error) {
257	log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
258	goto fail_root_list;
259	}
260
261	log_rinfo(ls, "dlm_recover_locks %u in",
262	ls->ls_recover_locks_in);
263
264	/*
265	* Finalize state in master rsb's now that all locks can be
266	* checked. This includes conversion resolution and lvb
267	* settings.
268	*/
269
270	dlm_recover_rsbs(ls, root_list: &root_list);
271	} else {
272	/*
273	* Other lockspace members may be going through the "neg" steps
274	* while also adding us to the lockspace, in which case they'll
275	* be doing the recover_locks (RS_LOCKS) barrier.
276	*/
277	dlm_set_recover_status(ls, DLM_RS_LOCKS);
278
279	error = dlm_recover_locks_wait(ls, seq: rv->seq);
280	if (error) {
281	log_rinfo(ls, "dlm_recover_locks_wait error %d", error);
282	goto fail_root_list;
283	}
284	}
285
286	dlm_release_root_list(root_list: &root_list);
287
288	/*
289	* Purge directory-related requests that are saved in requestqueue.
290	* All dir requests from before recovery are invalid now due to the dir
291	* rebuild and will be resent by the requesting nodes.
292	*/
293
294	dlm_purge_requestqueue(ls);
295
296	dlm_set_recover_status(ls, DLM_RS_DONE);
297
298	error = dlm_recover_done_wait(ls, seq: rv->seq);
299	if (error) {
300	log_rinfo(ls, "dlm_recover_done_wait error %d", error);
301	goto fail;
302	}
303
304	dlm_clear_members_gone(ls);
305
306	dlm_callback_resume(ls);
307
308	error = enable_locking(ls, seq: rv->seq);
309	if (error) {
310	log_rinfo(ls, "enable_locking error %d", error);
311	goto fail;
312	}
313
314	error = dlm_process_requestqueue(ls);
315	if (error) {
316	log_rinfo(ls, "dlm_process_requestqueue error %d", error);
317	goto fail;
318	}
319
320	error = dlm_recover_waiters_post(ls);
321	if (error) {
322	log_rinfo(ls, "dlm_recover_waiters_post error %d", error);
323	goto fail;
324	}
325
326	dlm_recover_grant(ls);
327
328	log_rinfo(ls, "dlm_recover %llu generation %u done: %u ms",
329	(unsigned long long)rv->seq, ls->ls_generation,
330	jiffies_to_msecs(jiffies - start));
331	mutex_unlock(lock: &ls->ls_recoverd_active);
332
333	return `0`;
334
335	fail_root_list:
336	dlm_release_root_list(root_list: &root_list);
337	fail:
338	mutex_unlock(lock: &ls->ls_recoverd_active);
339
340	return error;
341	}
342
343	/ The dlm_ls_start() that created the rv we take here may already have been*
344	stopped via dlm_ls_stop(); in that case we need to leave the RECOVERY_STOP
345	flag set. /*
346
347	static void do_ls_recovery(struct dlm_ls *ls)
348	{
349	struct dlm_recover *rv = NULL;
350	int error;
351
352	spin_lock_bh(lock: &ls->ls_recover_lock);
353	rv = ls->ls_recover_args;
354	ls->ls_recover_args = NULL;
355	if (rv && ls->ls_recover_seq == rv->seq)
356	clear_bit(LSFL_RECOVER_STOP, addr: &ls->ls_flags);
357	spin_unlock_bh(lock: &ls->ls_recover_lock);
358
359	if (rv) {
360	error = ls_recover(ls, rv);
361	switch (error) {
362	case `0`:
363	ls->ls_recovery_result = `0`;
364	complete(&ls->ls_recovery_done);
365
366	dlm_lsop_recover_done(ls);
367	break;
368	case -EINTR:
369	/ if recovery was interrupted -EINTR we wait for the next*
370	* ls_recover() iteration until it hopefully succeeds.
371	*/
372	log_rinfo(ls, "%s %llu interrupted and should be queued to run again",
373	__func__, (unsigned long long)rv->seq);
374	break;
375	default:
376	log_rinfo(ls, "%s %llu error %d", __func__,
377	(unsigned long long)rv->seq, error);
378
379	/ let new_lockspace() get aware of critical error /
380	ls->ls_recovery_result = error;
381	complete(&ls->ls_recovery_done);
382	break;
383	}
384
385	kfree(objp: rv->nodes);
386	kfree(objp: rv);
387	}
388	}
389
390	static int dlm_recoverd(void *arg)
391	{
392	struct dlm_ls *ls;
393
394	ls = dlm_find_lockspace_local(id: arg);
395	if (!ls) {
396	log_print("dlm_recoverd: no lockspace %p", arg);
397	return -`1`;
398	}
399
400	down_write(sem: &ls->ls_in_recovery);
401	set_bit(LSFL_RECOVER_LOCK, addr: &ls->ls_flags);
402	wake_up(&ls->ls_recover_lock_wait);
403
404	while (`1`) {
405	/*
406	* We call kthread_should_stop() after set_current_state().
407	* This is because it works correctly if kthread_stop() is
408	* called just before set_current_state().
409	*/
410	set_current_state(TASK_INTERRUPTIBLE);
411	if (kthread_should_stop()) {
412	set_current_state(TASK_RUNNING);
413	break;
414	}
415	if (!test_bit(LSFL_RECOVER_WORK, &ls->ls_flags) &&
416	!test_bit(LSFL_RECOVER_DOWN, &ls->ls_flags)) {
417	if (kthread_should_stop())
418	break;
419	schedule();
420	}
421	set_current_state(TASK_RUNNING);
422
423	if (test_and_clear_bit(LSFL_RECOVER_DOWN, addr: &ls->ls_flags)) {
424	down_write(sem: &ls->ls_in_recovery);
425	set_bit(LSFL_RECOVER_LOCK, addr: &ls->ls_flags);
426	wake_up(&ls->ls_recover_lock_wait);
427	}
428
429	if (test_and_clear_bit(LSFL_RECOVER_WORK, addr: &ls->ls_flags))
430	do_ls_recovery(ls);
431	}
432
433	if (test_bit(LSFL_RECOVER_LOCK, &ls->ls_flags))
434	up_write(sem: &ls->ls_in_recovery);
435
436	dlm_put_lockspace(ls);
437	return `0`;
438	}
439
440	int dlm_recoverd_start(struct dlm_ls *ls)
441	{
442	struct task_struct *p;
443	int error = `0`;
444
445	p = kthread_run(dlm_recoverd, ls, "dlm_recoverd");
446	if (IS_ERR(ptr: p))
447	error = PTR_ERR(ptr: p);
448	else
449	ls->ls_recoverd_task = p;
450	return error;
451	}
452
453	void dlm_recoverd_stop(struct dlm_ls *ls)
454	{
455	kthread_stop(k: ls->ls_recoverd_task);
456	}
457
458	void dlm_recoverd_suspend(struct dlm_ls *ls)
459	{
460	wake_up(&ls->ls_wait_general);
461	mutex_lock(&ls->ls_recoverd_active);
462	}
463
464	void dlm_recoverd_resume(struct dlm_ls *ls)
465	{
466	mutex_unlock(lock: &ls->ls_recoverd_active);
467	}
468
469

source code of linux/fs/dlm/recoverd.c