4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
46 #define PRINT_CMD CDEBUG
48 #include "../include/obd.h"
49 #include "../include/linux/lustre_compat25.h"
50 #include "../include/obd_class.h"
51 #include "../include/lustre/lustre_user.h"
52 #include "../include/lustre_log.h"
53 #include "../include/lustre_disk.h"
54 #include "../include/lustre_param.h"
56 static int (*client_fill_super
)(struct super_block
*sb
,
57 struct vfsmount
*mnt
);
59 static void (*kill_super_cb
)(struct super_block
*sb
);
61 /**************** config llog ********************/
63 /** Get a config log from the MGS and process it.
64 * This func is called for both clients and servers.
65 * Continue to process new statements appended to the logs
66 * (whenever the config lock is revoked) until lustre_end_log
68 * @param sb The superblock is used by the MGC to write to the local copy of
70 * @param logname The name of the llog to replicate from the MGS
71 * @param cfg Since the same mgc may be used to follow multiple config logs
72 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
73 * this log, and is added to the mgc's list of logs to follow.
75 int lustre_process_log(struct super_block
*sb
, char *logname
,
76 struct config_llog_instance
*cfg
)
78 struct lustre_cfg
*lcfg
;
79 struct lustre_cfg_bufs
*bufs
;
80 struct lustre_sb_info
*lsi
= s2lsi(sb
);
81 struct obd_device
*mgc
= lsi
->lsi_mgc
;
87 bufs
= kzalloc(sizeof(*bufs
), GFP_NOFS
);
91 /* mgc_process_config */
92 lustre_cfg_bufs_reset(bufs
, mgc
->obd_name
);
93 lustre_cfg_bufs_set_string(bufs
, 1, logname
);
94 lustre_cfg_bufs_set(bufs
, 2, cfg
, sizeof(*cfg
));
95 lustre_cfg_bufs_set(bufs
, 3, &sb
, sizeof(sb
));
96 lcfg
= lustre_cfg_new(LCFG_LOG_START
, bufs
);
97 rc
= obd_process_config(mgc
, sizeof(*lcfg
), lcfg
);
98 lustre_cfg_free(lcfg
);
103 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n",
104 mgc
->obd_name
, logname
, rc
);
107 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
108 mgc
->obd_name
, logname
,
111 /* class_obd_list(); */
114 EXPORT_SYMBOL(lustre_process_log
);
116 /* Stop watching this config log for updates */
117 int lustre_end_log(struct super_block
*sb
, char *logname
,
118 struct config_llog_instance
*cfg
)
120 struct lustre_cfg
*lcfg
;
121 struct lustre_cfg_bufs bufs
;
122 struct lustre_sb_info
*lsi
= s2lsi(sb
);
123 struct obd_device
*mgc
= lsi
->lsi_mgc
;
129 /* mgc_process_config */
130 lustre_cfg_bufs_reset(&bufs
, mgc
->obd_name
);
131 lustre_cfg_bufs_set_string(&bufs
, 1, logname
);
133 lustre_cfg_bufs_set(&bufs
, 2, cfg
, sizeof(*cfg
));
134 lcfg
= lustre_cfg_new(LCFG_LOG_END
, &bufs
);
135 rc
= obd_process_config(mgc
, sizeof(*lcfg
), lcfg
);
136 lustre_cfg_free(lcfg
);
139 EXPORT_SYMBOL(lustre_end_log
);
141 /**************** obd start *******************/
143 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
144 * lctl (and do for echo cli/srv.
146 int do_lcfg(char *cfgname
, lnet_nid_t nid
, int cmd
,
147 char *s1
, char *s2
, char *s3
, char *s4
)
149 struct lustre_cfg_bufs bufs
;
150 struct lustre_cfg
*lcfg
= NULL
;
153 CDEBUG(D_TRACE
, "lcfg %s %#x %s %s %s %s\n", cfgname
,
154 cmd
, s1
, s2
, s3
, s4
);
156 lustre_cfg_bufs_reset(&bufs
, cfgname
);
158 lustre_cfg_bufs_set_string(&bufs
, 1, s1
);
160 lustre_cfg_bufs_set_string(&bufs
, 2, s2
);
162 lustre_cfg_bufs_set_string(&bufs
, 3, s3
);
164 lustre_cfg_bufs_set_string(&bufs
, 4, s4
);
166 lcfg
= lustre_cfg_new(cmd
, &bufs
);
167 lcfg
->lcfg_nid
= nid
;
168 rc
= class_process_config(lcfg
);
169 lustre_cfg_free(lcfg
);
172 EXPORT_SYMBOL(do_lcfg
);
174 /** Call class_attach and class_setup. These methods in turn call
175 * obd type-specific methods.
177 int lustre_start_simple(char *obdname
, char *type
, char *uuid
,
178 char *s1
, char *s2
, char *s3
, char *s4
)
181 CDEBUG(D_MOUNT
, "Starting obd %s (typ=%s)\n", obdname
, type
);
183 rc
= do_lcfg(obdname
, 0, LCFG_ATTACH
, type
, uuid
, NULL
, NULL
);
185 CERROR("%s attach error %d\n", obdname
, rc
);
188 rc
= do_lcfg(obdname
, 0, LCFG_SETUP
, s1
, s2
, s3
, s4
);
190 CERROR("%s setup error %d\n", obdname
, rc
);
191 do_lcfg(obdname
, 0, LCFG_DETACH
, NULL
, NULL
, NULL
, NULL
);
196 DEFINE_MUTEX(mgc_start_lock
);
198 /** Set up a mgc obd to process startup logs
200 * \param sb [in] super block of the mgc obd
202 * \retval 0 success, otherwise error code
204 int lustre_start_mgc(struct super_block
*sb
)
206 struct obd_connect_data
*data
= NULL
;
207 struct lustre_sb_info
*lsi
= s2lsi(sb
);
208 struct obd_device
*obd
;
209 struct obd_export
*exp
;
210 struct obd_uuid
*uuid
;
213 char *mgcname
= NULL
, *niduuid
= NULL
, *mgssec
= NULL
;
215 int rc
= 0, i
= 0, j
, len
;
217 LASSERT(lsi
->lsi_lmd
);
219 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
220 ptr
= lsi
->lsi_lmd
->lmd_dev
;
221 if (class_parse_nid(ptr
, &nid
, &ptr
) == 0)
224 CERROR("No valid MGS nids found.\n");
228 mutex_lock(&mgc_start_lock
);
230 len
= strlen(LUSTRE_MGC_OBDNAME
) + strlen(libcfs_nid2str(nid
)) + 1;
231 mgcname
= kasprintf(GFP_NOFS
,
232 "%s%s", LUSTRE_MGC_OBDNAME
, libcfs_nid2str(nid
));
233 niduuid
= kasprintf(GFP_NOFS
, "%s_%x", mgcname
, i
);
234 if (!mgcname
|| !niduuid
) {
239 mgssec
= lsi
->lsi_lmd
->lmd_mgssec
? lsi
->lsi_lmd
->lmd_mgssec
: "";
241 data
= kzalloc(sizeof(*data
), GFP_NOFS
);
247 obd
= class_name2obd(mgcname
);
248 if (obd
&& !obd
->obd_stopping
) {
251 rc
= obd_set_info_async(NULL
, obd
->obd_self_export
,
252 strlen(KEY_MGSSEC
), KEY_MGSSEC
,
253 strlen(mgssec
), mgssec
, NULL
);
257 /* Re-using an existing MGC */
258 atomic_inc(&obd
->u
.cli
.cl_mgc_refcount
);
260 /* IR compatibility check, only for clients */
261 if (lmd_is_client(lsi
->lsi_lmd
)) {
263 int vallen
= sizeof(*data
);
264 __u32
*flags
= &lsi
->lsi_lmd
->lmd_flags
;
266 rc
= obd_get_info(NULL
, obd
->obd_self_export
,
267 strlen(KEY_CONN_DATA
), KEY_CONN_DATA
,
268 &vallen
, data
, NULL
);
270 has_ir
= OCD_HAS_FLAG(data
, IMP_RECOV
);
271 if (has_ir
^ !(*flags
& LMD_FLG_NOIR
)) {
272 /* LMD_FLG_NOIR is for test purpose only */
274 "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
275 has_ir
? "enabled" : "disabled");
277 *flags
&= ~LMD_FLG_NOIR
;
279 *flags
|= LMD_FLG_NOIR
;
285 /* Try all connections, but only once (again).
286 We don't want to block another target from starting
287 (using its local copy of the log), but we do want to connect
288 if at all possible. */
290 CDEBUG(D_MOUNT
, "%s: Set MGC reconnect %d\n", mgcname
,
292 rc
= obd_set_info_async(NULL
, obd
->obd_self_export
,
293 sizeof(KEY_INIT_RECOV_BACKUP
),
294 KEY_INIT_RECOV_BACKUP
,
295 sizeof(recov_bk
), &recov_bk
, NULL
);
300 CDEBUG(D_MOUNT
, "Start MGC '%s'\n", mgcname
);
302 /* Add the primary nids for the MGS */
304 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
305 ptr
= lsi
->lsi_lmd
->lmd_dev
;
306 while (class_parse_nid(ptr
, &nid
, &ptr
) == 0) {
307 rc
= do_lcfg(mgcname
, nid
,
308 LCFG_ADD_UUID
, niduuid
, NULL
, NULL
, NULL
);
310 /* Stop at the first failover nid */
315 CERROR("No valid MGS nids found.\n");
319 lsi
->lsi_lmd
->lmd_mgs_failnodes
= 1;
321 /* Random uuid for MGC allows easier reconnects */
322 uuid
= kzalloc(sizeof(*uuid
), GFP_NOFS
);
328 ll_generate_random_uuid(uuidc
);
329 class_uuid_unparse(uuidc
, uuid
);
332 rc
= lustre_start_simple(mgcname
, LUSTRE_MGC_NAME
,
333 (char *)uuid
->uuid
, LUSTRE_MGS_OBDNAME
,
334 niduuid
, NULL
, NULL
);
339 /* Add any failover MGS nids */
341 while (ptr
&& ((*ptr
== ':' ||
342 class_find_param(ptr
, PARAM_MGSNODE
, &ptr
) == 0))) {
343 /* New failover node */
344 sprintf(niduuid
, "%s_%x", mgcname
, i
);
346 while (class_parse_nid_quiet(ptr
, &nid
, &ptr
) == 0) {
348 rc
= do_lcfg(mgcname
, nid
,
349 LCFG_ADD_UUID
, niduuid
, NULL
, NULL
, NULL
);
354 rc
= do_lcfg(mgcname
, 0, LCFG_ADD_CONN
,
355 niduuid
, NULL
, NULL
, NULL
);
362 lsi
->lsi_lmd
->lmd_mgs_failnodes
= i
;
364 obd
= class_name2obd(mgcname
);
366 CERROR("Can't find mgcobd %s\n", mgcname
);
371 rc
= obd_set_info_async(NULL
, obd
->obd_self_export
,
372 strlen(KEY_MGSSEC
), KEY_MGSSEC
,
373 strlen(mgssec
), mgssec
, NULL
);
377 /* Keep a refcount of servers/clients who started with "mount",
378 so we know when we can get rid of the mgc. */
379 atomic_set(&obd
->u
.cli
.cl_mgc_refcount
, 1);
381 /* We connect to the MGS at setup, and don't disconnect until cleanup */
382 data
->ocd_connect_flags
= OBD_CONNECT_VERSION
| OBD_CONNECT_AT
|
383 OBD_CONNECT_FULL20
| OBD_CONNECT_IMP_RECOV
|
384 OBD_CONNECT_LVB_TYPE
;
386 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
387 data
->ocd_connect_flags
|= OBD_CONNECT_MNE_SWAB
;
389 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
392 if (lmd_is_client(lsi
->lsi_lmd
) &&
393 lsi
->lsi_lmd
->lmd_flags
& LMD_FLG_NOIR
)
394 data
->ocd_connect_flags
&= ~OBD_CONNECT_IMP_RECOV
;
395 data
->ocd_version
= LUSTRE_VERSION_CODE
;
396 rc
= obd_connect(NULL
, &exp
, obd
, &(obd
->obd_uuid
), data
, NULL
);
398 CERROR("connect failed %d\n", rc
);
402 obd
->u
.cli
.cl_mgc_mgsexp
= exp
;
405 /* Keep the mgc info in the sb. Note that many lsi's can point
409 mutex_unlock(&mgc_start_lock
);
417 static int lustre_stop_mgc(struct super_block
*sb
)
419 struct lustre_sb_info
*lsi
= s2lsi(sb
);
420 struct obd_device
*obd
;
421 char *niduuid
= NULL
, *ptr
= NULL
;
422 int i
, rc
= 0, len
= 0;
431 mutex_lock(&mgc_start_lock
);
432 LASSERT(atomic_read(&obd
->u
.cli
.cl_mgc_refcount
) > 0);
433 if (!atomic_dec_and_test(&obd
->u
.cli
.cl_mgc_refcount
)) {
434 /* This is not fatal, every client that stops
435 will call in here. */
436 CDEBUG(D_MOUNT
, "mgc still has %d references.\n",
437 atomic_read(&obd
->u
.cli
.cl_mgc_refcount
));
442 /* The MGC has no recoverable data in any case.
443 * force shutdown set in umount_begin */
444 obd
->obd_no_recov
= 1;
446 if (obd
->u
.cli
.cl_mgc_mgsexp
) {
447 /* An error is not fatal, if we are unable to send the
448 disconnect mgs ping evictor cleans up the export */
449 rc
= obd_disconnect(obd
->u
.cli
.cl_mgc_mgsexp
);
451 CDEBUG(D_MOUNT
, "disconnect failed %d\n", rc
);
454 /* Save the obdname for cleaning the nid uuids, which are
456 len
= strlen(obd
->obd_name
) + 6;
457 niduuid
= kzalloc(len
, GFP_NOFS
);
459 strcpy(niduuid
, obd
->obd_name
);
460 ptr
= niduuid
+ strlen(niduuid
);
463 rc
= class_manual_cleanup(obd
);
467 /* Clean the nid uuids */
473 for (i
= 0; i
< lsi
->lsi_lmd
->lmd_mgs_failnodes
; i
++) {
474 sprintf(ptr
, "_%x", i
);
475 rc
= do_lcfg(LUSTRE_MGC_OBDNAME
, 0, LCFG_DEL_UUID
,
476 niduuid
, NULL
, NULL
, NULL
);
478 CERROR("del MDC UUID %s failed: rc = %d\n",
484 /* class_import_put will get rid of the additional connections */
485 mutex_unlock(&mgc_start_lock
);
489 /***************** lustre superblock **************/
491 struct lustre_sb_info
*lustre_init_lsi(struct super_block
*sb
)
493 struct lustre_sb_info
*lsi
;
495 lsi
= kzalloc(sizeof(*lsi
), GFP_NOFS
);
498 lsi
->lsi_lmd
= kzalloc(sizeof(*lsi
->lsi_lmd
), GFP_NOFS
);
504 lsi
->lsi_lmd
->lmd_exclude_count
= 0;
505 lsi
->lsi_lmd
->lmd_recovery_time_soft
= 0;
506 lsi
->lsi_lmd
->lmd_recovery_time_hard
= 0;
507 s2lsi_nocast(sb
) = lsi
;
508 /* we take 1 extra ref for our setup */
509 atomic_set(&lsi
->lsi_mounts
, 1);
511 /* Default umount style */
512 lsi
->lsi_flags
= LSI_UMOUNT_FAILOVER
;
517 static int lustre_free_lsi(struct super_block
*sb
)
519 struct lustre_sb_info
*lsi
= s2lsi(sb
);
521 LASSERT(lsi
!= NULL
);
522 CDEBUG(D_MOUNT
, "Freeing lsi %p\n", lsi
);
524 /* someone didn't call server_put_mount. */
525 LASSERT(atomic_read(&lsi
->lsi_mounts
) == 0);
527 if (lsi
->lsi_lmd
!= NULL
) {
528 kfree(lsi
->lsi_lmd
->lmd_dev
);
529 kfree(lsi
->lsi_lmd
->lmd_profile
);
530 kfree(lsi
->lsi_lmd
->lmd_mgssec
);
531 kfree(lsi
->lsi_lmd
->lmd_opts
);
532 if (lsi
->lsi_lmd
->lmd_exclude_count
)
533 kfree(lsi
->lsi_lmd
->lmd_exclude
);
534 kfree(lsi
->lsi_lmd
->lmd_mgs
);
535 kfree(lsi
->lsi_lmd
->lmd_osd_type
);
536 kfree(lsi
->lsi_lmd
->lmd_params
);
541 LASSERT(lsi
->lsi_llsbi
== NULL
);
543 s2lsi_nocast(sb
) = NULL
;
548 /* The lsi has one reference for every server that is using the disk -
549 e.g. MDT, MGS, and potentially MGC */
550 int lustre_put_lsi(struct super_block
*sb
)
552 struct lustre_sb_info
*lsi
= s2lsi(sb
);
554 LASSERT(lsi
!= NULL
);
556 CDEBUG(D_MOUNT
, "put %p %d\n", sb
, atomic_read(&lsi
->lsi_mounts
));
557 if (atomic_dec_and_test(&lsi
->lsi_mounts
)) {
565 * <FSNAME><SEPARATOR><TYPE><INDEX>
566 * FSNAME is between 1 and 8 characters (inclusive).
567 * Excluded characters are '/' and ':'
568 * SEPARATOR is either ':' or '-'
569 * TYPE: "OST", "MDT", etc.
570 * INDEX: Hex representation of the index
573 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
574 * @param [in] svname server name including type and index
575 * @param [out] fsname Buffer to copy filesystem name prefix into.
576 * Must have at least 'strlen(fsname) + 1' chars.
577 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
580 int server_name2fsname(const char *svname
, char *fsname
, const char **endptr
)
584 dash
= svname
+ strnlen(svname
, 8); /* max fsname length is 8 */
585 for (; dash
> svname
&& *dash
!= '-' && *dash
!= ':'; dash
--)
590 if (fsname
!= NULL
) {
591 strncpy(fsname
, svname
, dash
- svname
);
592 fsname
[dash
- svname
] = '\0';
600 EXPORT_SYMBOL(server_name2fsname
);
603 * Get service name (svname) from string
605 * if endptr isn't NULL it is set to end of fsname *
607 int server_name2svname(const char *label
, char *svname
, const char **endptr
,
613 /* We use server_name2fsname() just for parsing */
614 rc
= server_name2fsname(label
, NULL
, &dash
);
621 if (strlcpy(svname
, dash
+ 1, svsize
) >= svsize
)
626 EXPORT_SYMBOL(server_name2svname
);
629 /* Get the index from the obd name.
632 if endptr isn't NULL it is set to end of name */
633 int server_name2index(const char *svname
, __u32
*idx
, const char **endptr
)
639 /* We use server_name2fsname() just for parsing */
640 rc
= server_name2fsname(svname
, NULL
, &dash
);
646 if (strncmp(dash
, "MDT", 3) == 0)
647 rc
= LDD_F_SV_TYPE_MDT
;
648 else if (strncmp(dash
, "OST", 3) == 0)
649 rc
= LDD_F_SV_TYPE_OST
;
655 if (strncmp(dash
, "all", 3) == 0) {
658 return rc
| LDD_F_SV_ALL
;
661 index
= simple_strtoul(dash
, (char **)endptr
, 16);
665 /* Account for -mdc after index that is possible when specifying mdt */
666 if (endptr
!= NULL
&& strncmp(LUSTRE_MDC_NAME
, *endptr
+ 1,
667 sizeof(LUSTRE_MDC_NAME
)-1) == 0)
668 *endptr
+= sizeof(LUSTRE_MDC_NAME
);
672 EXPORT_SYMBOL(server_name2index
);
674 /*************** mount common between server and client ***************/
677 int lustre_common_put_super(struct super_block
*sb
)
681 CDEBUG(D_MOUNT
, "dropping sb %p\n", sb
);
683 /* Drop a ref to the MGC */
684 rc
= lustre_stop_mgc(sb
);
685 if (rc
&& (rc
!= -ENOENT
)) {
687 CERROR("Can't stop MGC: %d\n", rc
);
690 /* BUSY just means that there's some other obd that
691 needs the mgc. Let him clean it up. */
692 CDEBUG(D_MOUNT
, "MGC still in use\n");
694 /* Drop a ref to the mounted disk */
699 EXPORT_SYMBOL(lustre_common_put_super
);
701 static void lmd_print(struct lustre_mount_data
*lmd
)
705 PRINT_CMD(D_MOUNT
, " mount data:\n");
706 if (lmd_is_client(lmd
))
707 PRINT_CMD(D_MOUNT
, "profile: %s\n", lmd
->lmd_profile
);
708 PRINT_CMD(D_MOUNT
, "device: %s\n", lmd
->lmd_dev
);
709 PRINT_CMD(D_MOUNT
, "flags: %x\n", lmd
->lmd_flags
);
712 PRINT_CMD(D_MOUNT
, "options: %s\n", lmd
->lmd_opts
);
714 if (lmd
->lmd_recovery_time_soft
)
715 PRINT_CMD(D_MOUNT
, "recovery time soft: %d\n",
716 lmd
->lmd_recovery_time_soft
);
718 if (lmd
->lmd_recovery_time_hard
)
719 PRINT_CMD(D_MOUNT
, "recovery time hard: %d\n",
720 lmd
->lmd_recovery_time_hard
);
722 for (i
= 0; i
< lmd
->lmd_exclude_count
; i
++) {
723 PRINT_CMD(D_MOUNT
, "exclude %d: OST%04x\n", i
,
724 lmd
->lmd_exclude
[i
]);
728 /* Is this server on the exclusion list */
729 int lustre_check_exclusion(struct super_block
*sb
, char *svname
)
731 struct lustre_sb_info
*lsi
= s2lsi(sb
);
732 struct lustre_mount_data
*lmd
= lsi
->lsi_lmd
;
736 rc
= server_name2index(svname
, &index
, NULL
);
737 if (rc
!= LDD_F_SV_TYPE_OST
)
738 /* Only exclude OSTs */
741 CDEBUG(D_MOUNT
, "Check exclusion %s (%d) in %d of %s\n", svname
,
742 index
, lmd
->lmd_exclude_count
, lmd
->lmd_dev
);
744 for (i
= 0; i
< lmd
->lmd_exclude_count
; i
++) {
745 if (index
== lmd
->lmd_exclude
[i
]) {
746 CWARN("Excluding %s (on exclusion list)\n", svname
);
753 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
754 static int lmd_make_exclusion(struct lustre_mount_data
*lmd
, const char *ptr
)
756 const char *s1
= ptr
, *s2
;
757 __u32 index
, *exclude_list
;
760 /* The shortest an ost name can be is 8 chars: -OST0000.
761 We don't actually know the fsname at this time, so in fact
762 a user could specify any fsname. */
763 devmax
= strlen(ptr
) / 8 + 1;
765 /* temp storage until we figure out how many we have */
766 exclude_list
= kcalloc(devmax
, sizeof(index
), GFP_NOFS
);
770 /* we enter this fn pointing at the '=' */
771 while (*s1
&& *s1
!= ' ' && *s1
!= ',') {
773 rc
= server_name2index(s1
, &index
, &s2
);
775 CERROR("Can't parse server name '%s': rc = %d\n",
779 if (rc
== LDD_F_SV_TYPE_OST
)
780 exclude_list
[lmd
->lmd_exclude_count
++] = index
;
782 CDEBUG(D_MOUNT
, "ignoring exclude %.*s: type = %#x\n",
783 (uint
)(s2
-s1
), s1
, rc
);
785 /* now we are pointing at ':' (next exclude)
786 or ',' (end of excludes) */
787 if (lmd
->lmd_exclude_count
>= devmax
)
790 if (rc
>= 0) /* non-err */
793 if (lmd
->lmd_exclude_count
) {
794 /* permanent, freed in lustre_free_lsi */
795 lmd
->lmd_exclude
= kcalloc(lmd
->lmd_exclude_count
,
796 sizeof(index
), GFP_NOFS
);
797 if (lmd
->lmd_exclude
) {
798 memcpy(lmd
->lmd_exclude
, exclude_list
,
799 sizeof(index
) * lmd
->lmd_exclude_count
);
802 lmd
->lmd_exclude_count
= 0;
809 static int lmd_parse_mgssec(struct lustre_mount_data
*lmd
, char *ptr
)
814 kfree(lmd
->lmd_mgssec
);
815 lmd
->lmd_mgssec
= NULL
;
817 tail
= strchr(ptr
, ',');
819 length
= strlen(ptr
);
823 lmd
->lmd_mgssec
= kzalloc(length
+ 1, GFP_NOFS
);
824 if (!lmd
->lmd_mgssec
)
827 memcpy(lmd
->lmd_mgssec
, ptr
, length
);
828 lmd
->lmd_mgssec
[length
] = '\0';
832 static int lmd_parse_string(char **handle
, char *ptr
)
837 if ((handle
== NULL
) || (ptr
== NULL
))
843 tail
= strchr(ptr
, ',');
845 length
= strlen(ptr
);
849 *handle
= kzalloc(length
+ 1, GFP_NOFS
);
853 memcpy(*handle
, ptr
, length
);
854 (*handle
)[length
] = '\0';
859 /* Collect multiple values for mgsnid specifiers */
860 static int lmd_parse_mgs(struct lustre_mount_data
*lmd
, char **ptr
)
868 /* Find end of nidlist */
869 while (class_parse_nid_quiet(tail
, &nid
, &tail
) == 0)
871 length
= tail
- *ptr
;
873 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr
);
877 if (lmd
->lmd_mgs
!= NULL
)
878 oldlen
= strlen(lmd
->lmd_mgs
) + 1;
880 mgsnid
= kzalloc(oldlen
+ length
+ 1, GFP_NOFS
);
884 if (lmd
->lmd_mgs
!= NULL
) {
885 /* Multiple mgsnid= are taken to mean failover locations */
886 memcpy(mgsnid
, lmd
->lmd_mgs
, oldlen
);
887 mgsnid
[oldlen
- 1] = ':';
890 memcpy(mgsnid
+ oldlen
, *ptr
, length
);
891 mgsnid
[oldlen
+ length
] = '\0';
892 lmd
->lmd_mgs
= mgsnid
;
898 /** Parse mount line options
899 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
900 * dev is passed as device=uml1:/lustre by mount.lustre
902 static int lmd_parse(char *options
, struct lustre_mount_data
*lmd
)
904 char *s1
, *s2
, *devname
= NULL
;
905 struct lustre_mount_data
*raw
= (struct lustre_mount_data
*)options
;
910 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n");
914 /* Options should be a string - try to detect old lmd data */
915 if ((raw
->lmd_magic
& 0xffffff00) == (LMD_MAGIC
& 0xffffff00)) {
916 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre. Please install version %s\n",
917 LUSTRE_VERSION_STRING
);
920 lmd
->lmd_magic
= LMD_MAGIC
;
922 lmd
->lmd_params
= kzalloc(4096, GFP_NOFS
);
923 if (!lmd
->lmd_params
)
925 lmd
->lmd_params
[0] = '\0';
927 /* Set default flags here */
932 int time_min
= OBD_RECOVERY_TIME_MIN
;
934 /* Skip whitespace and extra commas */
935 while (*s1
== ' ' || *s1
== ',')
938 /* Client options are parsed in ll_options: eg. flock,
941 /* Parse non-ldiskfs options here. Rather than modifying
942 ldiskfs, we just zero these out here */
943 if (strncmp(s1
, "abort_recov", 11) == 0) {
944 lmd
->lmd_flags
|= LMD_FLG_ABORT_RECOV
;
946 } else if (strncmp(s1
, "recovery_time_soft=", 19) == 0) {
947 lmd
->lmd_recovery_time_soft
= max_t(int,
948 simple_strtoul(s1
+ 19, NULL
, 10), time_min
);
950 } else if (strncmp(s1
, "recovery_time_hard=", 19) == 0) {
951 lmd
->lmd_recovery_time_hard
= max_t(int,
952 simple_strtoul(s1
+ 19, NULL
, 10), time_min
);
954 } else if (strncmp(s1
, "noir", 4) == 0) {
955 lmd
->lmd_flags
|= LMD_FLG_NOIR
; /* test purpose only. */
957 } else if (strncmp(s1
, "nosvc", 5) == 0) {
958 lmd
->lmd_flags
|= LMD_FLG_NOSVC
;
960 } else if (strncmp(s1
, "nomgs", 5) == 0) {
961 lmd
->lmd_flags
|= LMD_FLG_NOMGS
;
963 } else if (strncmp(s1
, "noscrub", 7) == 0) {
964 lmd
->lmd_flags
|= LMD_FLG_NOSCRUB
;
966 } else if (strncmp(s1
, PARAM_MGSNODE
,
967 sizeof(PARAM_MGSNODE
) - 1) == 0) {
968 s2
= s1
+ sizeof(PARAM_MGSNODE
) - 1;
969 /* Assume the next mount opt is the first
970 invalid nid we get to. */
971 rc
= lmd_parse_mgs(lmd
, &s2
);
975 } else if (strncmp(s1
, "writeconf", 9) == 0) {
976 lmd
->lmd_flags
|= LMD_FLG_WRITECONF
;
978 } else if (strncmp(s1
, "update", 6) == 0) {
979 lmd
->lmd_flags
|= LMD_FLG_UPDATE
;
981 } else if (strncmp(s1
, "virgin", 6) == 0) {
982 lmd
->lmd_flags
|= LMD_FLG_VIRGIN
;
984 } else if (strncmp(s1
, "noprimnode", 10) == 0) {
985 lmd
->lmd_flags
|= LMD_FLG_NO_PRIMNODE
;
987 } else if (strncmp(s1
, "mgssec=", 7) == 0) {
988 rc
= lmd_parse_mgssec(lmd
, s1
+ 7);
992 /* ost exclusion list */
993 } else if (strncmp(s1
, "exclude=", 8) == 0) {
994 rc
= lmd_make_exclusion(lmd
, s1
+ 7);
998 } else if (strncmp(s1
, "mgs", 3) == 0) {
1000 lmd
->lmd_flags
|= LMD_FLG_MGS
;
1002 } else if (strncmp(s1
, "svname=", 7) == 0) {
1003 rc
= lmd_parse_string(&lmd
->lmd_profile
, s1
+ 7);
1007 } else if (strncmp(s1
, "param=", 6) == 0) {
1009 char *tail
= strchr(s1
+ 6, ',');
1011 length
= strlen(s1
);
1015 strncat(lmd
->lmd_params
, s1
+ 6, length
);
1016 strcat(lmd
->lmd_params
, " ");
1018 } else if (strncmp(s1
, "osd=", 4) == 0) {
1019 rc
= lmd_parse_string(&lmd
->lmd_osd_type
, s1
+ 4);
1024 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1025 end of the options. */
1026 else if (strncmp(s1
, "device=", 7) == 0) {
1028 /* terminate options right before device. device
1029 must be the last one. */
1035 s2
= strchr(s1
, ',');
1043 memmove(s1
, s2
, strlen(s2
) + 1);
1049 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n");
1053 s1
= strstr(devname
, ":/");
1056 lmd
->lmd_flags
|= LMD_FLG_CLIENT
;
1057 /* Remove leading /s from fsname */
1058 while (*++s1
== '/')
1060 /* Freed in lustre_free_lsi */
1061 lmd
->lmd_profile
= kasprintf(GFP_NOFS
, "%s-client", s1
);
1062 if (!lmd
->lmd_profile
)
1066 /* Freed in lustre_free_lsi */
1067 lmd
->lmd_dev
= kzalloc(strlen(devname
) + 1, GFP_NOFS
);
1070 strcpy(lmd
->lmd_dev
, devname
);
1072 /* Save mount options */
1073 s1
= options
+ strlen(options
) - 1;
1074 while (s1
>= options
&& (*s1
== ',' || *s1
== ' '))
1076 if (*options
!= 0) {
1077 /* Freed in lustre_free_lsi */
1078 lmd
->lmd_opts
= kzalloc(strlen(options
) + 1, GFP_NOFS
);
1081 strcpy(lmd
->lmd_opts
, options
);
1085 lmd
->lmd_magic
= LMD_MAGIC
;
1090 CERROR("Bad mount options %s\n", options
);
1094 struct lustre_mount_data2
{
1096 struct vfsmount
*lmd2_mnt
;
1099 /** This is the entry point for the mount call into Lustre.
1100 * This is called when a server or client is mounted,
1101 * and this is where we start setting things up.
1102 * @param data Mount options (e.g. -o flock,abort_recov)
1104 int lustre_fill_super(struct super_block
*sb
, void *data
, int silent
)
1106 struct lustre_mount_data
*lmd
;
1107 struct lustre_mount_data2
*lmd2
= data
;
1108 struct lustre_sb_info
*lsi
;
1111 CDEBUG(D_MOUNT
|D_VFSTRACE
, "VFS Op: sb %p\n", sb
);
1113 lsi
= lustre_init_lsi(sb
);
1119 * Disable lockdep during mount, because mount locking patterns are
1125 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1127 obd_zombie_barrier();
1129 /* Figure out the lmd from the mount options */
1130 if (lmd_parse((char *)(lmd2
->lmd2_data
), lmd
)) {
1136 if (lmd_is_client(lmd
)) {
1137 CDEBUG(D_MOUNT
, "Mounting client %s\n", lmd
->lmd_profile
);
1138 if (client_fill_super
== NULL
)
1139 request_module("lustre");
1140 if (client_fill_super
== NULL
) {
1141 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n");
1145 rc
= lustre_start_mgc(sb
);
1150 /* Connect and start */
1151 /* (should always be ll_fill_super) */
1152 rc
= (*client_fill_super
)(sb
, lmd2
->lmd2_mnt
);
1153 /* c_f_s will call lustre_common_put_super on failure */
1156 CERROR("This is client-side-only module, cannot handle server mount.\n");
1160 /* If error happens in fill_super() call, @lsi will be killed there.
1161 * This is why we do not put it here. */
1165 CERROR("Unable to mount %s (%d)\n",
1166 s2lsi(sb
) ? lmd
->lmd_dev
: "", rc
);
1168 CDEBUG(D_SUPER
, "Mount %s complete\n",
1176 /* We can't call ll_fill_super by name because it lives in a module that
1177 must be loaded after this one. */
1178 void lustre_register_client_fill_super(int (*cfs
)(struct super_block
*sb
,
1179 struct vfsmount
*mnt
))
1181 client_fill_super
= cfs
;
1183 EXPORT_SYMBOL(lustre_register_client_fill_super
);
1185 void lustre_register_kill_super_cb(void (*cfs
)(struct super_block
*sb
))
1187 kill_super_cb
= cfs
;
1189 EXPORT_SYMBOL(lustre_register_kill_super_cb
);
1191 /***************** FS registration ******************/
1192 struct dentry
*lustre_mount(struct file_system_type
*fs_type
, int flags
,
1193 const char *devname
, void *data
)
1195 struct lustre_mount_data2 lmd2
= {
1200 return mount_nodev(fs_type
, flags
, &lmd2
, lustre_fill_super
);
1203 static void lustre_kill_super(struct super_block
*sb
)
1205 struct lustre_sb_info
*lsi
= s2lsi(sb
);
1207 if (kill_super_cb
&& lsi
)
1208 (*kill_super_cb
)(sb
);
1210 kill_anon_super(sb
);
1213 /** Register the "lustre" fs type
1215 struct file_system_type lustre_fs_type
= {
1216 .owner
= THIS_MODULE
,
1218 .mount
= lustre_mount
,
1219 .kill_sb
= lustre_kill_super
,
1220 .fs_flags
= FS_BINARY_MOUNTDATA
| FS_REQUIRES_DEV
|
1221 FS_RENAME_DOES_D_MOVE
,
1223 MODULE_ALIAS_FS("lustre");
1225 int lustre_register_fs(void)
1227 return register_filesystem(&lustre_fs_type
);
1230 int lustre_unregister_fs(void)
1232 return unregister_filesystem(&lustre_fs_type
);