2 * Copyright (C) 2015, SUSE
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2, or (at your option)
12 #include <linux/module.h>
13 #include <linux/dlm.h>
14 #include <linux/sched.h>
16 #include "md-cluster.h"
20 struct dlm_lock_resource
{
23 char *name
; /* lock name. */
24 uint32_t flags
; /* flags to pass to dlm_lock() */
25 struct completion completion
; /* completion for synchronized locking */
26 void (*bast
)(void *arg
, int mode
); /* blocking AST function pointer*/
27 struct mddev
*mddev
; /* pointing back to mddev. */
34 struct list_head list
;
42 struct md_cluster_info
{
43 /* dlm lock space and resources for clustered raid. */
44 dlm_lockspace_t
*lockspace
;
46 struct completion completion
;
47 struct dlm_lock_resource
*sb_lock
;
48 struct mutex sb_mutex
;
49 struct dlm_lock_resource
*bitmap_lockres
;
50 struct list_head suspend_list
;
51 spinlock_t suspend_lock
;
54 static void sync_ast(void *arg
)
56 struct dlm_lock_resource
*res
;
58 res
= (struct dlm_lock_resource
*) arg
;
59 complete(&res
->completion
);
62 static int dlm_lock_sync(struct dlm_lock_resource
*res
, int mode
)
66 init_completion(&res
->completion
);
67 ret
= dlm_lock(res
->ls
, mode
, &res
->lksb
,
68 res
->flags
, res
->name
, strlen(res
->name
),
69 0, sync_ast
, res
, res
->bast
);
72 wait_for_completion(&res
->completion
);
73 return res
->lksb
.sb_status
;
76 static int dlm_unlock_sync(struct dlm_lock_resource
*res
)
78 return dlm_lock_sync(res
, DLM_LOCK_NL
);
81 static struct dlm_lock_resource
*lockres_init(struct mddev
*mddev
,
82 char *name
, void (*bastfn
)(void *arg
, int mode
), int with_lvb
)
84 struct dlm_lock_resource
*res
= NULL
;
86 struct md_cluster_info
*cinfo
= mddev
->cluster_info
;
88 res
= kzalloc(sizeof(struct dlm_lock_resource
), GFP_KERNEL
);
91 res
->ls
= cinfo
->lockspace
;
93 namelen
= strlen(name
);
94 res
->name
= kzalloc(namelen
+ 1, GFP_KERNEL
);
96 pr_err("md-cluster: Unable to allocate resource name for resource %s\n", name
);
99 strlcpy(res
->name
, name
, namelen
+ 1);
101 res
->lksb
.sb_lvbptr
= kzalloc(LVB_SIZE
, GFP_KERNEL
);
102 if (!res
->lksb
.sb_lvbptr
) {
103 pr_err("md-cluster: Unable to allocate LVB for resource %s\n", name
);
106 res
->flags
= DLM_LKF_VALBLK
;
112 res
->flags
|= DLM_LKF_EXPEDITE
;
114 ret
= dlm_lock_sync(res
, DLM_LOCK_NL
);
116 pr_err("md-cluster: Unable to lock NL on new lock resource %s\n", name
);
119 res
->flags
&= ~DLM_LKF_EXPEDITE
;
120 res
->flags
|= DLM_LKF_CONVERT
;
124 kfree(res
->lksb
.sb_lvbptr
);
130 static void lockres_free(struct dlm_lock_resource
*res
)
135 init_completion(&res
->completion
);
136 dlm_unlock(res
->ls
, res
->lksb
.sb_lkid
, 0, &res
->lksb
, res
);
137 wait_for_completion(&res
->completion
);
140 kfree(res
->lksb
.sb_lvbptr
);
144 static char *pretty_uuid(char *dest
, char *src
)
148 for (i
= 0; i
< 16; i
++) {
149 if (i
== 4 || i
== 6 || i
== 8 || i
== 10)
150 len
+= sprintf(dest
+ len
, "-");
151 len
+= sprintf(dest
+ len
, "%02x", (__u8
)src
[i
]);
156 static void add_resync_info(struct mddev
*mddev
, struct dlm_lock_resource
*lockres
,
157 sector_t lo
, sector_t hi
)
159 struct resync_info
*ri
;
161 ri
= (struct resync_info
*)lockres
->lksb
.sb_lvbptr
;
162 ri
->lo
= cpu_to_le64(lo
);
163 ri
->hi
= cpu_to_le64(hi
);
166 static struct suspend_info
*read_resync_info(struct mddev
*mddev
, struct dlm_lock_resource
*lockres
)
168 struct resync_info ri
;
169 struct suspend_info
*s
= NULL
;
172 dlm_lock_sync(lockres
, DLM_LOCK_CR
);
173 memcpy(&ri
, lockres
->lksb
.sb_lvbptr
, sizeof(struct resync_info
));
174 hi
= le64_to_cpu(ri
.hi
);
176 s
= kzalloc(sizeof(struct suspend_info
), GFP_KERNEL
);
180 s
->lo
= le64_to_cpu(ri
.lo
);
182 dlm_unlock_sync(lockres
);
187 static void recover_prep(void *arg
)
191 static void recover_slot(void *arg
, struct dlm_slot
*slot
)
193 struct mddev
*mddev
= arg
;
194 struct md_cluster_info
*cinfo
= mddev
->cluster_info
;
196 pr_info("md-cluster: %s Node %d/%d down. My slot: %d. Initiating recovery.\n",
197 mddev
->bitmap_info
.cluster_name
,
198 slot
->nodeid
, slot
->slot
,
202 static void recover_done(void *arg
, struct dlm_slot
*slots
,
203 int num_slots
, int our_slot
,
206 struct mddev
*mddev
= arg
;
207 struct md_cluster_info
*cinfo
= mddev
->cluster_info
;
209 cinfo
->slot_number
= our_slot
;
210 complete(&cinfo
->completion
);
213 static const struct dlm_lockspace_ops md_ls_ops
= {
214 .recover_prep
= recover_prep
,
215 .recover_slot
= recover_slot
,
216 .recover_done
= recover_done
,
219 static int gather_all_resync_info(struct mddev
*mddev
, int total_slots
)
221 struct md_cluster_info
*cinfo
= mddev
->cluster_info
;
223 struct dlm_lock_resource
*bm_lockres
;
224 struct suspend_info
*s
;
228 for (i
= 0; i
< total_slots
; i
++) {
229 memset(str
, '\0', 64);
230 snprintf(str
, 64, "bitmap%04d", i
);
231 bm_lockres
= lockres_init(mddev
, str
, NULL
, 1);
234 if (i
== (cinfo
->slot_number
- 1))
237 bm_lockres
->flags
|= DLM_LKF_NOQUEUE
;
238 ret
= dlm_lock_sync(bm_lockres
, DLM_LOCK_PW
);
239 if (ret
== -EAGAIN
) {
240 memset(bm_lockres
->lksb
.sb_lvbptr
, '\0', LVB_SIZE
);
241 s
= read_resync_info(mddev
, bm_lockres
);
243 pr_info("%s:%d Resync[%llu..%llu] in progress on %d\n",
245 (unsigned long long) s
->lo
,
246 (unsigned long long) s
->hi
, i
);
247 spin_lock_irq(&cinfo
->suspend_lock
);
249 list_add(&s
->list
, &cinfo
->suspend_list
);
250 spin_unlock_irq(&cinfo
->suspend_lock
);
253 lockres_free(bm_lockres
);
258 /* TODO: Read the disk bitmap sb and check if it needs recovery */
259 dlm_unlock_sync(bm_lockres
);
260 lockres_free(bm_lockres
);
266 static int join(struct mddev
*mddev
, int nodes
)
268 struct md_cluster_info
*cinfo
;
272 if (!try_module_get(THIS_MODULE
))
275 cinfo
= kzalloc(sizeof(struct md_cluster_info
), GFP_KERNEL
);
279 init_completion(&cinfo
->completion
);
281 mutex_init(&cinfo
->sb_mutex
);
282 mddev
->cluster_info
= cinfo
;
285 pretty_uuid(str
, mddev
->uuid
);
286 ret
= dlm_new_lockspace(str
, mddev
->bitmap_info
.cluster_name
,
287 DLM_LSFL_FS
, LVB_SIZE
,
288 &md_ls_ops
, mddev
, &ops_rv
, &cinfo
->lockspace
);
291 wait_for_completion(&cinfo
->completion
);
292 if (nodes
<= cinfo
->slot_number
) {
293 pr_err("md-cluster: Slot allotted(%d) greater than available slots(%d)", cinfo
->slot_number
- 1,
298 cinfo
->sb_lock
= lockres_init(mddev
, "cmd-super",
300 if (!cinfo
->sb_lock
) {
305 pr_info("md-cluster: Joined cluster %s slot %d\n", str
, cinfo
->slot_number
);
306 snprintf(str
, 64, "bitmap%04d", cinfo
->slot_number
- 1);
307 cinfo
->bitmap_lockres
= lockres_init(mddev
, str
, NULL
, 1);
308 if (!cinfo
->bitmap_lockres
)
310 if (dlm_lock_sync(cinfo
->bitmap_lockres
, DLM_LOCK_PW
)) {
311 pr_err("Failed to get bitmap lock\n");
316 INIT_LIST_HEAD(&cinfo
->suspend_list
);
317 spin_lock_init(&cinfo
->suspend_lock
);
319 ret
= gather_all_resync_info(mddev
, nodes
);
325 lockres_free(cinfo
->bitmap_lockres
);
326 lockres_free(cinfo
->sb_lock
);
327 if (cinfo
->lockspace
)
328 dlm_release_lockspace(cinfo
->lockspace
, 2);
329 mddev
->cluster_info
= NULL
;
331 module_put(THIS_MODULE
);
335 static int leave(struct mddev
*mddev
)
337 struct md_cluster_info
*cinfo
= mddev
->cluster_info
;
341 lockres_free(cinfo
->sb_lock
);
342 lockres_free(cinfo
->bitmap_lockres
);
343 dlm_release_lockspace(cinfo
->lockspace
, 2);
347 /* slot_number(): Returns the MD slot number to use
348 * DLM starts the slot numbers from 1, wheras cluster-md
349 * wants the number to be from zero, so we deduct one
351 static int slot_number(struct mddev
*mddev
)
353 struct md_cluster_info
*cinfo
= mddev
->cluster_info
;
355 return cinfo
->slot_number
- 1;
358 static void resync_info_update(struct mddev
*mddev
, sector_t lo
, sector_t hi
)
360 struct md_cluster_info
*cinfo
= mddev
->cluster_info
;
362 add_resync_info(mddev
, cinfo
->bitmap_lockres
, lo
, hi
);
363 /* Re-acquire the lock to refresh LVB */
364 dlm_lock_sync(cinfo
->bitmap_lockres
, DLM_LOCK_PW
);
367 static struct md_cluster_operations cluster_ops
= {
370 .slot_number
= slot_number
,
371 .resync_info_update
= resync_info_update
,
374 static int __init
cluster_init(void)
376 pr_warn("md-cluster: EXPERIMENTAL. Use with caution\n");
377 pr_info("Registering Cluster MD functions\n");
378 register_md_cluster_operations(&cluster_ops
, THIS_MODULE
);
382 static void cluster_exit(void)
384 unregister_md_cluster_operations();
387 module_init(cluster_init
);
388 module_exit(cluster_exit
);
389 MODULE_LICENSE("GPL");
390 MODULE_DESCRIPTION("Clustering support for MD");