4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License version 2 only,
8 * as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License version 2 for more details (a copy is included
14 * in the LICENSE file that accompanied this code).
16 * You should have received a copy of the GNU General Public License
17 * version 2 along with this program; If not, see
18 * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf
20 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
21 * CA 95054 USA or visit www.sun.com if you need additional information or
27 * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
28 * Use is subject to license terms.
30 * Copyright (c) 2011, 2012, Intel Corporation.
33 * This file is part of Lustre, http://www.lustre.org/
34 * Lustre is a trademark of Sun Microsystems, Inc.
36 * lustre/obdclass/obd_mount.c
38 * Client mount routines
40 * Author: Nathan Rutman <nathan@clusterfs.com>
44 #define DEBUG_SUBSYSTEM S_CLASS
45 #define D_MOUNT (D_SUPER|D_CONFIG/*|D_WARNING */)
46 #define PRINT_CMD CDEBUG
48 #include "../include/obd.h"
49 #include "../include/linux/lustre_compat25.h"
50 #include "../include/obd_class.h"
51 #include "../include/lustre/lustre_user.h"
52 #include "../include/lustre_log.h"
53 #include "../include/lustre_disk.h"
54 #include "../include/lustre_param.h"
56 static int (*client_fill_super
)(struct super_block
*sb
,
57 struct vfsmount
*mnt
);
59 static void (*kill_super_cb
)(struct super_block
*sb
);
61 /**************** config llog ********************/
63 /** Get a config log from the MGS and process it.
64 * This func is called for both clients and servers.
65 * Continue to process new statements appended to the logs
66 * (whenever the config lock is revoked) until lustre_end_log
68 * @param sb The superblock is used by the MGC to write to the local copy of
70 * @param logname The name of the llog to replicate from the MGS
71 * @param cfg Since the same mgc may be used to follow multiple config logs
72 * (e.g. ost1, ost2, client), the config_llog_instance keeps the state for
73 * this log, and is added to the mgc's list of logs to follow.
75 int lustre_process_log(struct super_block
*sb
, char *logname
,
76 struct config_llog_instance
*cfg
)
78 struct lustre_cfg
*lcfg
;
79 struct lustre_cfg_bufs
*bufs
;
80 struct lustre_sb_info
*lsi
= s2lsi(sb
);
81 struct obd_device
*mgc
= lsi
->lsi_mgc
;
87 bufs
= kzalloc(sizeof(*bufs
), GFP_NOFS
);
91 /* mgc_process_config */
92 lustre_cfg_bufs_reset(bufs
, mgc
->obd_name
);
93 lustre_cfg_bufs_set_string(bufs
, 1, logname
);
94 lustre_cfg_bufs_set(bufs
, 2, cfg
, sizeof(*cfg
));
95 lustre_cfg_bufs_set(bufs
, 3, &sb
, sizeof(sb
));
96 lcfg
= lustre_cfg_new(LCFG_LOG_START
, bufs
);
97 rc
= obd_process_config(mgc
, sizeof(*lcfg
), lcfg
);
98 lustre_cfg_free(lcfg
);
103 LCONSOLE_ERROR_MSG(0x15b, "%s: The configuration from log '%s' failed from the MGS (%d). Make sure this client and the MGS are running compatible versions of Lustre.\n",
104 mgc
->obd_name
, logname
, rc
);
107 LCONSOLE_ERROR_MSG(0x15c, "%s: The configuration from log '%s' failed (%d). This may be the result of communication errors between this node and the MGS, a bad configuration, or other errors. See the syslog for more information.\n",
108 mgc
->obd_name
, logname
,
111 /* class_obd_list(); */
114 EXPORT_SYMBOL(lustre_process_log
);
116 /* Stop watching this config log for updates */
117 int lustre_end_log(struct super_block
*sb
, char *logname
,
118 struct config_llog_instance
*cfg
)
120 struct lustre_cfg
*lcfg
;
121 struct lustre_cfg_bufs bufs
;
122 struct lustre_sb_info
*lsi
= s2lsi(sb
);
123 struct obd_device
*mgc
= lsi
->lsi_mgc
;
129 /* mgc_process_config */
130 lustre_cfg_bufs_reset(&bufs
, mgc
->obd_name
);
131 lustre_cfg_bufs_set_string(&bufs
, 1, logname
);
133 lustre_cfg_bufs_set(&bufs
, 2, cfg
, sizeof(*cfg
));
134 lcfg
= lustre_cfg_new(LCFG_LOG_END
, &bufs
);
135 rc
= obd_process_config(mgc
, sizeof(*lcfg
), lcfg
);
136 lustre_cfg_free(lcfg
);
139 EXPORT_SYMBOL(lustre_end_log
);
141 /**************** obd start *******************/
143 /** lustre_cfg_bufs are a holdover from 1.4; we can still set these up from
144 * lctl (and do for echo cli/srv.
146 int do_lcfg(char *cfgname
, lnet_nid_t nid
, int cmd
,
147 char *s1
, char *s2
, char *s3
, char *s4
)
149 struct lustre_cfg_bufs bufs
;
150 struct lustre_cfg
*lcfg
= NULL
;
153 CDEBUG(D_TRACE
, "lcfg %s %#x %s %s %s %s\n", cfgname
,
154 cmd
, s1
, s2
, s3
, s4
);
156 lustre_cfg_bufs_reset(&bufs
, cfgname
);
158 lustre_cfg_bufs_set_string(&bufs
, 1, s1
);
160 lustre_cfg_bufs_set_string(&bufs
, 2, s2
);
162 lustre_cfg_bufs_set_string(&bufs
, 3, s3
);
164 lustre_cfg_bufs_set_string(&bufs
, 4, s4
);
166 lcfg
= lustre_cfg_new(cmd
, &bufs
);
167 lcfg
->lcfg_nid
= nid
;
168 rc
= class_process_config(lcfg
);
169 lustre_cfg_free(lcfg
);
172 EXPORT_SYMBOL(do_lcfg
);
174 /** Call class_attach and class_setup. These methods in turn call
175 * obd type-specific methods.
177 int lustre_start_simple(char *obdname
, char *type
, char *uuid
,
178 char *s1
, char *s2
, char *s3
, char *s4
)
181 CDEBUG(D_MOUNT
, "Starting obd %s (typ=%s)\n", obdname
, type
);
183 rc
= do_lcfg(obdname
, 0, LCFG_ATTACH
, type
, uuid
, NULL
, NULL
);
185 CERROR("%s attach error %d\n", obdname
, rc
);
188 rc
= do_lcfg(obdname
, 0, LCFG_SETUP
, s1
, s2
, s3
, s4
);
190 CERROR("%s setup error %d\n", obdname
, rc
);
191 do_lcfg(obdname
, 0, LCFG_DETACH
, NULL
, NULL
, NULL
, NULL
);
196 DEFINE_MUTEX(mgc_start_lock
);
198 /** Set up a mgc obd to process startup logs
200 * \param sb [in] super block of the mgc obd
202 * \retval 0 success, otherwise error code
204 int lustre_start_mgc(struct super_block
*sb
)
206 struct obd_connect_data
*data
= NULL
;
207 struct lustre_sb_info
*lsi
= s2lsi(sb
);
208 struct obd_device
*obd
;
209 struct obd_export
*exp
;
210 struct obd_uuid
*uuid
;
213 char *mgcname
= NULL
, *niduuid
= NULL
, *mgssec
= NULL
;
215 int rc
= 0, i
= 0, j
, len
;
217 LASSERT(lsi
->lsi_lmd
);
219 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
220 ptr
= lsi
->lsi_lmd
->lmd_dev
;
221 if (class_parse_nid(ptr
, &nid
, &ptr
) == 0)
224 CERROR("No valid MGS nids found.\n");
228 mutex_lock(&mgc_start_lock
);
230 len
= strlen(LUSTRE_MGC_OBDNAME
) + strlen(libcfs_nid2str(nid
)) + 1;
231 mgcname
= kasprintf(GFP_NOFS
,
232 "%s%s", LUSTRE_MGC_OBDNAME
, libcfs_nid2str(nid
));
233 niduuid
= kasprintf(GFP_NOFS
, "%s_%x", mgcname
, i
);
234 if (!mgcname
|| !niduuid
) {
239 mgssec
= lsi
->lsi_lmd
->lmd_mgssec
? lsi
->lsi_lmd
->lmd_mgssec
: "";
241 data
= kzalloc(sizeof(*data
), GFP_NOFS
);
247 obd
= class_name2obd(mgcname
);
248 if (obd
&& !obd
->obd_stopping
) {
251 rc
= obd_set_info_async(NULL
, obd
->obd_self_export
,
252 strlen(KEY_MGSSEC
), KEY_MGSSEC
,
253 strlen(mgssec
), mgssec
, NULL
);
257 /* Re-using an existing MGC */
258 atomic_inc(&obd
->u
.cli
.cl_mgc_refcount
);
260 /* IR compatibility check, only for clients */
261 if (lmd_is_client(lsi
->lsi_lmd
)) {
263 int vallen
= sizeof(*data
);
264 __u32
*flags
= &lsi
->lsi_lmd
->lmd_flags
;
266 rc
= obd_get_info(NULL
, obd
->obd_self_export
,
267 strlen(KEY_CONN_DATA
), KEY_CONN_DATA
,
268 &vallen
, data
, NULL
);
270 has_ir
= OCD_HAS_FLAG(data
, IMP_RECOV
);
271 if (has_ir
^ !(*flags
& LMD_FLG_NOIR
)) {
272 /* LMD_FLG_NOIR is for test purpose only */
274 "Trying to mount a client with IR setting not compatible with current mgc. Force to use current mgc setting that is IR %s.\n",
275 has_ir
? "enabled" : "disabled");
277 *flags
&= ~LMD_FLG_NOIR
;
279 *flags
|= LMD_FLG_NOIR
;
284 /* If we are restarting the MGS, don't try to keep the MGC's
285 old connection, or registration will fail. */
287 CDEBUG(D_MOUNT
, "New MGS with live MGC\n");
291 /* Try all connections, but only once (again).
292 We don't want to block another target from starting
293 (using its local copy of the log), but we do want to connect
294 if at all possible. */
296 CDEBUG(D_MOUNT
, "%s: Set MGC reconnect %d\n", mgcname
,
298 rc
= obd_set_info_async(NULL
, obd
->obd_self_export
,
299 sizeof(KEY_INIT_RECOV_BACKUP
),
300 KEY_INIT_RECOV_BACKUP
,
301 sizeof(recov_bk
), &recov_bk
, NULL
);
306 CDEBUG(D_MOUNT
, "Start MGC '%s'\n", mgcname
);
308 /* Add the primary nids for the MGS */
310 /* Use nids from mount line: uml1,1@elan:uml2,2@elan:/lustre */
311 ptr
= lsi
->lsi_lmd
->lmd_dev
;
312 while (class_parse_nid(ptr
, &nid
, &ptr
) == 0) {
313 rc
= do_lcfg(mgcname
, nid
,
314 LCFG_ADD_UUID
, niduuid
, NULL
, NULL
, NULL
);
316 /* Stop at the first failover nid */
321 CERROR("No valid MGS nids found.\n");
325 lsi
->lsi_lmd
->lmd_mgs_failnodes
= 1;
327 /* Random uuid for MGC allows easier reconnects */
328 uuid
= kzalloc(sizeof(*uuid
), GFP_NOFS
);
334 ll_generate_random_uuid(uuidc
);
335 class_uuid_unparse(uuidc
, uuid
);
338 rc
= lustre_start_simple(mgcname
, LUSTRE_MGC_NAME
,
339 (char *)uuid
->uuid
, LUSTRE_MGS_OBDNAME
,
340 niduuid
, NULL
, NULL
);
345 /* Add any failover MGS nids */
347 while (ptr
&& ((*ptr
== ':' ||
348 class_find_param(ptr
, PARAM_MGSNODE
, &ptr
) == 0))) {
349 /* New failover node */
350 sprintf(niduuid
, "%s_%x", mgcname
, i
);
352 while (class_parse_nid_quiet(ptr
, &nid
, &ptr
) == 0) {
354 rc
= do_lcfg(mgcname
, nid
,
355 LCFG_ADD_UUID
, niduuid
, NULL
, NULL
, NULL
);
360 rc
= do_lcfg(mgcname
, 0, LCFG_ADD_CONN
,
361 niduuid
, NULL
, NULL
, NULL
);
368 lsi
->lsi_lmd
->lmd_mgs_failnodes
= i
;
370 obd
= class_name2obd(mgcname
);
372 CERROR("Can't find mgcobd %s\n", mgcname
);
377 rc
= obd_set_info_async(NULL
, obd
->obd_self_export
,
378 strlen(KEY_MGSSEC
), KEY_MGSSEC
,
379 strlen(mgssec
), mgssec
, NULL
);
383 /* Keep a refcount of servers/clients who started with "mount",
384 so we know when we can get rid of the mgc. */
385 atomic_set(&obd
->u
.cli
.cl_mgc_refcount
, 1);
387 /* We connect to the MGS at setup, and don't disconnect until cleanup */
388 data
->ocd_connect_flags
= OBD_CONNECT_VERSION
| OBD_CONNECT_AT
|
389 OBD_CONNECT_FULL20
| OBD_CONNECT_IMP_RECOV
|
390 OBD_CONNECT_LVB_TYPE
;
392 #if LUSTRE_VERSION_CODE < OBD_OCD_VERSION(3, 2, 50, 0)
393 data
->ocd_connect_flags
|= OBD_CONNECT_MNE_SWAB
;
395 #warning "LU-1644: Remove old OBD_CONNECT_MNE_SWAB fixup and imp_need_mne_swab"
398 if (lmd_is_client(lsi
->lsi_lmd
) &&
399 lsi
->lsi_lmd
->lmd_flags
& LMD_FLG_NOIR
)
400 data
->ocd_connect_flags
&= ~OBD_CONNECT_IMP_RECOV
;
401 data
->ocd_version
= LUSTRE_VERSION_CODE
;
402 rc
= obd_connect(NULL
, &exp
, obd
, &(obd
->obd_uuid
), data
, NULL
);
404 CERROR("connect failed %d\n", rc
);
408 obd
->u
.cli
.cl_mgc_mgsexp
= exp
;
411 /* Keep the mgc info in the sb. Note that many lsi's can point
415 mutex_unlock(&mgc_start_lock
);
423 static int lustre_stop_mgc(struct super_block
*sb
)
425 struct lustre_sb_info
*lsi
= s2lsi(sb
);
426 struct obd_device
*obd
;
427 char *niduuid
= NULL
, *ptr
= NULL
;
428 int i
, rc
= 0, len
= 0;
437 mutex_lock(&mgc_start_lock
);
438 LASSERT(atomic_read(&obd
->u
.cli
.cl_mgc_refcount
) > 0);
439 if (!atomic_dec_and_test(&obd
->u
.cli
.cl_mgc_refcount
)) {
440 /* This is not fatal, every client that stops
441 will call in here. */
442 CDEBUG(D_MOUNT
, "mgc still has %d references.\n",
443 atomic_read(&obd
->u
.cli
.cl_mgc_refcount
));
448 /* The MGC has no recoverable data in any case.
449 * force shutdown set in umount_begin */
450 obd
->obd_no_recov
= 1;
452 if (obd
->u
.cli
.cl_mgc_mgsexp
) {
453 /* An error is not fatal, if we are unable to send the
454 disconnect mgs ping evictor cleans up the export */
455 rc
= obd_disconnect(obd
->u
.cli
.cl_mgc_mgsexp
);
457 CDEBUG(D_MOUNT
, "disconnect failed %d\n", rc
);
460 /* Save the obdname for cleaning the nid uuids, which are
462 len
= strlen(obd
->obd_name
) + 6;
463 niduuid
= kzalloc(len
, GFP_NOFS
);
465 strcpy(niduuid
, obd
->obd_name
);
466 ptr
= niduuid
+ strlen(niduuid
);
469 rc
= class_manual_cleanup(obd
);
473 /* Clean the nid uuids */
479 for (i
= 0; i
< lsi
->lsi_lmd
->lmd_mgs_failnodes
; i
++) {
480 sprintf(ptr
, "_%x", i
);
481 rc
= do_lcfg(LUSTRE_MGC_OBDNAME
, 0, LCFG_DEL_UUID
,
482 niduuid
, NULL
, NULL
, NULL
);
484 CERROR("del MDC UUID %s failed: rc = %d\n",
490 /* class_import_put will get rid of the additional connections */
491 mutex_unlock(&mgc_start_lock
);
495 /***************** lustre superblock **************/
497 struct lustre_sb_info
*lustre_init_lsi(struct super_block
*sb
)
499 struct lustre_sb_info
*lsi
;
501 lsi
= kzalloc(sizeof(*lsi
), GFP_NOFS
);
504 lsi
->lsi_lmd
= kzalloc(sizeof(*lsi
->lsi_lmd
), GFP_NOFS
);
510 lsi
->lsi_lmd
->lmd_exclude_count
= 0;
511 lsi
->lsi_lmd
->lmd_recovery_time_soft
= 0;
512 lsi
->lsi_lmd
->lmd_recovery_time_hard
= 0;
513 s2lsi_nocast(sb
) = lsi
;
514 /* we take 1 extra ref for our setup */
515 atomic_set(&lsi
->lsi_mounts
, 1);
517 /* Default umount style */
518 lsi
->lsi_flags
= LSI_UMOUNT_FAILOVER
;
523 static int lustre_free_lsi(struct super_block
*sb
)
525 struct lustre_sb_info
*lsi
= s2lsi(sb
);
527 LASSERT(lsi
!= NULL
);
528 CDEBUG(D_MOUNT
, "Freeing lsi %p\n", lsi
);
530 /* someone didn't call server_put_mount. */
531 LASSERT(atomic_read(&lsi
->lsi_mounts
) == 0);
533 if (lsi
->lsi_lmd
!= NULL
) {
534 kfree(lsi
->lsi_lmd
->lmd_dev
);
535 kfree(lsi
->lsi_lmd
->lmd_profile
);
536 kfree(lsi
->lsi_lmd
->lmd_mgssec
);
537 kfree(lsi
->lsi_lmd
->lmd_opts
);
538 if (lsi
->lsi_lmd
->lmd_exclude_count
)
539 kfree(lsi
->lsi_lmd
->lmd_exclude
);
540 kfree(lsi
->lsi_lmd
->lmd_mgs
);
541 kfree(lsi
->lsi_lmd
->lmd_osd_type
);
542 kfree(lsi
->lsi_lmd
->lmd_params
);
547 LASSERT(lsi
->lsi_llsbi
== NULL
);
549 s2lsi_nocast(sb
) = NULL
;
554 /* The lsi has one reference for every server that is using the disk -
555 e.g. MDT, MGS, and potentially MGC */
556 int lustre_put_lsi(struct super_block
*sb
)
558 struct lustre_sb_info
*lsi
= s2lsi(sb
);
560 LASSERT(lsi
!= NULL
);
562 CDEBUG(D_MOUNT
, "put %p %d\n", sb
, atomic_read(&lsi
->lsi_mounts
));
563 if (atomic_dec_and_test(&lsi
->lsi_mounts
)) {
571 * <FSNAME><SEPARATOR><TYPE><INDEX>
572 * FSNAME is between 1 and 8 characters (inclusive).
573 * Excluded characters are '/' and ':'
574 * SEPARATOR is either ':' or '-'
575 * TYPE: "OST", "MDT", etc.
576 * INDEX: Hex representation of the index
579 /** Get the fsname ("lustre") from the server name ("lustre-OST003F").
580 * @param [in] svname server name including type and index
581 * @param [out] fsname Buffer to copy filesystem name prefix into.
582 * Must have at least 'strlen(fsname) + 1' chars.
583 * @param [out] endptr if endptr isn't NULL it is set to end of fsname
586 int server_name2fsname(const char *svname
, char *fsname
, const char **endptr
)
590 dash
= svname
+ strnlen(svname
, 8); /* max fsname length is 8 */
591 for (; dash
> svname
&& *dash
!= '-' && *dash
!= ':'; dash
--)
596 if (fsname
!= NULL
) {
597 strncpy(fsname
, svname
, dash
- svname
);
598 fsname
[dash
- svname
] = '\0';
606 EXPORT_SYMBOL(server_name2fsname
);
609 * Get service name (svname) from string
611 * if endptr isn't NULL it is set to end of fsname *
613 int server_name2svname(const char *label
, char *svname
, const char **endptr
,
619 /* We use server_name2fsname() just for parsing */
620 rc
= server_name2fsname(label
, NULL
, &dash
);
627 if (strlcpy(svname
, dash
+ 1, svsize
) >= svsize
)
632 EXPORT_SYMBOL(server_name2svname
);
635 /* Get the index from the obd name.
638 if endptr isn't NULL it is set to end of name */
639 int server_name2index(const char *svname
, __u32
*idx
, const char **endptr
)
645 /* We use server_name2fsname() just for parsing */
646 rc
= server_name2fsname(svname
, NULL
, &dash
);
652 if (strncmp(dash
, "MDT", 3) == 0)
653 rc
= LDD_F_SV_TYPE_MDT
;
654 else if (strncmp(dash
, "OST", 3) == 0)
655 rc
= LDD_F_SV_TYPE_OST
;
661 if (strncmp(dash
, "all", 3) == 0) {
664 return rc
| LDD_F_SV_ALL
;
667 index
= simple_strtoul(dash
, (char **)endptr
, 16);
671 /* Account for -mdc after index that is possible when specifying mdt */
672 if (endptr
!= NULL
&& strncmp(LUSTRE_MDC_NAME
, *endptr
+ 1,
673 sizeof(LUSTRE_MDC_NAME
)-1) == 0)
674 *endptr
+= sizeof(LUSTRE_MDC_NAME
);
678 EXPORT_SYMBOL(server_name2index
);
680 /*************** mount common between server and client ***************/
683 int lustre_common_put_super(struct super_block
*sb
)
687 CDEBUG(D_MOUNT
, "dropping sb %p\n", sb
);
689 /* Drop a ref to the MGC */
690 rc
= lustre_stop_mgc(sb
);
691 if (rc
&& (rc
!= -ENOENT
)) {
693 CERROR("Can't stop MGC: %d\n", rc
);
696 /* BUSY just means that there's some other obd that
697 needs the mgc. Let him clean it up. */
698 CDEBUG(D_MOUNT
, "MGC still in use\n");
700 /* Drop a ref to the mounted disk */
705 EXPORT_SYMBOL(lustre_common_put_super
);
707 static void lmd_print(struct lustre_mount_data
*lmd
)
711 PRINT_CMD(D_MOUNT
, " mount data:\n");
712 if (lmd_is_client(lmd
))
713 PRINT_CMD(D_MOUNT
, "profile: %s\n", lmd
->lmd_profile
);
714 PRINT_CMD(D_MOUNT
, "device: %s\n", lmd
->lmd_dev
);
715 PRINT_CMD(D_MOUNT
, "flags: %x\n", lmd
->lmd_flags
);
718 PRINT_CMD(D_MOUNT
, "options: %s\n", lmd
->lmd_opts
);
720 if (lmd
->lmd_recovery_time_soft
)
721 PRINT_CMD(D_MOUNT
, "recovery time soft: %d\n",
722 lmd
->lmd_recovery_time_soft
);
724 if (lmd
->lmd_recovery_time_hard
)
725 PRINT_CMD(D_MOUNT
, "recovery time hard: %d\n",
726 lmd
->lmd_recovery_time_hard
);
728 for (i
= 0; i
< lmd
->lmd_exclude_count
; i
++) {
729 PRINT_CMD(D_MOUNT
, "exclude %d: OST%04x\n", i
,
730 lmd
->lmd_exclude
[i
]);
734 /* Is this server on the exclusion list */
735 int lustre_check_exclusion(struct super_block
*sb
, char *svname
)
737 struct lustre_sb_info
*lsi
= s2lsi(sb
);
738 struct lustre_mount_data
*lmd
= lsi
->lsi_lmd
;
742 rc
= server_name2index(svname
, &index
, NULL
);
743 if (rc
!= LDD_F_SV_TYPE_OST
)
744 /* Only exclude OSTs */
747 CDEBUG(D_MOUNT
, "Check exclusion %s (%d) in %d of %s\n", svname
,
748 index
, lmd
->lmd_exclude_count
, lmd
->lmd_dev
);
750 for (i
= 0; i
< lmd
->lmd_exclude_count
; i
++) {
751 if (index
== lmd
->lmd_exclude
[i
]) {
752 CWARN("Excluding %s (on exclusion list)\n", svname
);
759 /* mount -v -o exclude=lustre-OST0001:lustre-OST0002 -t lustre ... */
760 static int lmd_make_exclusion(struct lustre_mount_data
*lmd
, const char *ptr
)
762 const char *s1
= ptr
, *s2
;
763 __u32 index
, *exclude_list
;
766 /* The shortest an ost name can be is 8 chars: -OST0000.
767 We don't actually know the fsname at this time, so in fact
768 a user could specify any fsname. */
769 devmax
= strlen(ptr
) / 8 + 1;
771 /* temp storage until we figure out how many we have */
772 exclude_list
= kcalloc(devmax
, sizeof(index
), GFP_NOFS
);
776 /* we enter this fn pointing at the '=' */
777 while (*s1
&& *s1
!= ' ' && *s1
!= ',') {
779 rc
= server_name2index(s1
, &index
, &s2
);
781 CERROR("Can't parse server name '%s': rc = %d\n",
785 if (rc
== LDD_F_SV_TYPE_OST
)
786 exclude_list
[lmd
->lmd_exclude_count
++] = index
;
788 CDEBUG(D_MOUNT
, "ignoring exclude %.*s: type = %#x\n",
789 (uint
)(s2
-s1
), s1
, rc
);
791 /* now we are pointing at ':' (next exclude)
792 or ',' (end of excludes) */
793 if (lmd
->lmd_exclude_count
>= devmax
)
796 if (rc
>= 0) /* non-err */
799 if (lmd
->lmd_exclude_count
) {
800 /* permanent, freed in lustre_free_lsi */
801 lmd
->lmd_exclude
= kcalloc(lmd
->lmd_exclude_count
,
802 sizeof(index
), GFP_NOFS
);
803 if (lmd
->lmd_exclude
) {
804 memcpy(lmd
->lmd_exclude
, exclude_list
,
805 sizeof(index
) * lmd
->lmd_exclude_count
);
808 lmd
->lmd_exclude_count
= 0;
815 static int lmd_parse_mgssec(struct lustre_mount_data
*lmd
, char *ptr
)
820 kfree(lmd
->lmd_mgssec
);
821 lmd
->lmd_mgssec
= NULL
;
823 tail
= strchr(ptr
, ',');
825 length
= strlen(ptr
);
829 lmd
->lmd_mgssec
= kzalloc(length
+ 1, GFP_NOFS
);
830 if (!lmd
->lmd_mgssec
)
833 memcpy(lmd
->lmd_mgssec
, ptr
, length
);
834 lmd
->lmd_mgssec
[length
] = '\0';
838 static int lmd_parse_string(char **handle
, char *ptr
)
843 if ((handle
== NULL
) || (ptr
== NULL
))
849 tail
= strchr(ptr
, ',');
851 length
= strlen(ptr
);
855 *handle
= kzalloc(length
+ 1, GFP_NOFS
);
859 memcpy(*handle
, ptr
, length
);
860 (*handle
)[length
] = '\0';
865 /* Collect multiple values for mgsnid specifiers */
866 static int lmd_parse_mgs(struct lustre_mount_data
*lmd
, char **ptr
)
874 /* Find end of nidlist */
875 while (class_parse_nid_quiet(tail
, &nid
, &tail
) == 0)
877 length
= tail
- *ptr
;
879 LCONSOLE_ERROR_MSG(0x159, "Can't parse NID '%s'\n", *ptr
);
883 if (lmd
->lmd_mgs
!= NULL
)
884 oldlen
= strlen(lmd
->lmd_mgs
) + 1;
886 mgsnid
= kzalloc(oldlen
+ length
+ 1, GFP_NOFS
);
890 if (lmd
->lmd_mgs
!= NULL
) {
891 /* Multiple mgsnid= are taken to mean failover locations */
892 memcpy(mgsnid
, lmd
->lmd_mgs
, oldlen
);
893 mgsnid
[oldlen
- 1] = ':';
896 memcpy(mgsnid
+ oldlen
, *ptr
, length
);
897 mgsnid
[oldlen
+ length
] = '\0';
898 lmd
->lmd_mgs
= mgsnid
;
904 /** Parse mount line options
905 * e.g. mount -v -t lustre -o abort_recov uml1:uml2:/lustre-client /mnt/lustre
906 * dev is passed as device=uml1:/lustre by mount.lustre
908 static int lmd_parse(char *options
, struct lustre_mount_data
*lmd
)
910 char *s1
, *s2
, *devname
= NULL
;
911 struct lustre_mount_data
*raw
= (struct lustre_mount_data
*)options
;
916 LCONSOLE_ERROR_MSG(0x162, "Missing mount data: check that /sbin/mount.lustre is installed.\n");
920 /* Options should be a string - try to detect old lmd data */
921 if ((raw
->lmd_magic
& 0xffffff00) == (LMD_MAGIC
& 0xffffff00)) {
922 LCONSOLE_ERROR_MSG(0x163, "You're using an old version of /sbin/mount.lustre. Please install version %s\n",
923 LUSTRE_VERSION_STRING
);
926 lmd
->lmd_magic
= LMD_MAGIC
;
928 lmd
->lmd_params
= kzalloc(4096, GFP_NOFS
);
929 if (!lmd
->lmd_params
)
931 lmd
->lmd_params
[0] = '\0';
933 /* Set default flags here */
938 int time_min
= OBD_RECOVERY_TIME_MIN
;
940 /* Skip whitespace and extra commas */
941 while (*s1
== ' ' || *s1
== ',')
944 /* Client options are parsed in ll_options: eg. flock,
947 /* Parse non-ldiskfs options here. Rather than modifying
948 ldiskfs, we just zero these out here */
949 if (strncmp(s1
, "abort_recov", 11) == 0) {
950 lmd
->lmd_flags
|= LMD_FLG_ABORT_RECOV
;
952 } else if (strncmp(s1
, "recovery_time_soft=", 19) == 0) {
953 lmd
->lmd_recovery_time_soft
= max_t(int,
954 simple_strtoul(s1
+ 19, NULL
, 10), time_min
);
956 } else if (strncmp(s1
, "recovery_time_hard=", 19) == 0) {
957 lmd
->lmd_recovery_time_hard
= max_t(int,
958 simple_strtoul(s1
+ 19, NULL
, 10), time_min
);
960 } else if (strncmp(s1
, "noir", 4) == 0) {
961 lmd
->lmd_flags
|= LMD_FLG_NOIR
; /* test purpose only. */
963 } else if (strncmp(s1
, "nosvc", 5) == 0) {
964 lmd
->lmd_flags
|= LMD_FLG_NOSVC
;
966 } else if (strncmp(s1
, "nomgs", 5) == 0) {
967 lmd
->lmd_flags
|= LMD_FLG_NOMGS
;
969 } else if (strncmp(s1
, "noscrub", 7) == 0) {
970 lmd
->lmd_flags
|= LMD_FLG_NOSCRUB
;
972 } else if (strncmp(s1
, PARAM_MGSNODE
,
973 sizeof(PARAM_MGSNODE
) - 1) == 0) {
974 s2
= s1
+ sizeof(PARAM_MGSNODE
) - 1;
975 /* Assume the next mount opt is the first
976 invalid nid we get to. */
977 rc
= lmd_parse_mgs(lmd
, &s2
);
981 } else if (strncmp(s1
, "writeconf", 9) == 0) {
982 lmd
->lmd_flags
|= LMD_FLG_WRITECONF
;
984 } else if (strncmp(s1
, "update", 6) == 0) {
985 lmd
->lmd_flags
|= LMD_FLG_UPDATE
;
987 } else if (strncmp(s1
, "virgin", 6) == 0) {
988 lmd
->lmd_flags
|= LMD_FLG_VIRGIN
;
990 } else if (strncmp(s1
, "noprimnode", 10) == 0) {
991 lmd
->lmd_flags
|= LMD_FLG_NO_PRIMNODE
;
993 } else if (strncmp(s1
, "mgssec=", 7) == 0) {
994 rc
= lmd_parse_mgssec(lmd
, s1
+ 7);
998 /* ost exclusion list */
999 } else if (strncmp(s1
, "exclude=", 8) == 0) {
1000 rc
= lmd_make_exclusion(lmd
, s1
+ 7);
1004 } else if (strncmp(s1
, "mgs", 3) == 0) {
1006 lmd
->lmd_flags
|= LMD_FLG_MGS
;
1008 } else if (strncmp(s1
, "svname=", 7) == 0) {
1009 rc
= lmd_parse_string(&lmd
->lmd_profile
, s1
+ 7);
1013 } else if (strncmp(s1
, "param=", 6) == 0) {
1015 char *tail
= strchr(s1
+ 6, ',');
1017 length
= strlen(s1
);
1021 strncat(lmd
->lmd_params
, s1
+ 6, length
);
1022 strcat(lmd
->lmd_params
, " ");
1024 } else if (strncmp(s1
, "osd=", 4) == 0) {
1025 rc
= lmd_parse_string(&lmd
->lmd_osd_type
, s1
+ 4);
1030 /* Linux 2.4 doesn't pass the device, so we stuck it at the
1031 end of the options. */
1032 else if (strncmp(s1
, "device=", 7) == 0) {
1034 /* terminate options right before device. device
1035 must be the last one. */
1041 s2
= strchr(s1
, ',');
1049 memmove(s1
, s2
, strlen(s2
) + 1);
1055 LCONSOLE_ERROR_MSG(0x164, "Can't find the device name (need mount option 'device=...')\n");
1059 s1
= strstr(devname
, ":/");
1062 lmd
->lmd_flags
|= LMD_FLG_CLIENT
;
1063 /* Remove leading /s from fsname */
1064 while (*++s1
== '/')
1066 /* Freed in lustre_free_lsi */
1067 lmd
->lmd_profile
= kasprintf(GFP_NOFS
, "%s-client", s1
);
1068 if (!lmd
->lmd_profile
)
1072 /* Freed in lustre_free_lsi */
1073 lmd
->lmd_dev
= kzalloc(strlen(devname
) + 1, GFP_NOFS
);
1076 strcpy(lmd
->lmd_dev
, devname
);
1078 /* Save mount options */
1079 s1
= options
+ strlen(options
) - 1;
1080 while (s1
>= options
&& (*s1
== ',' || *s1
== ' '))
1082 if (*options
!= 0) {
1083 /* Freed in lustre_free_lsi */
1084 lmd
->lmd_opts
= kzalloc(strlen(options
) + 1, GFP_NOFS
);
1087 strcpy(lmd
->lmd_opts
, options
);
1091 lmd
->lmd_magic
= LMD_MAGIC
;
1096 CERROR("Bad mount options %s\n", options
);
1100 struct lustre_mount_data2
{
1102 struct vfsmount
*lmd2_mnt
;
1105 /** This is the entry point for the mount call into Lustre.
1106 * This is called when a server or client is mounted,
1107 * and this is where we start setting things up.
1108 * @param data Mount options (e.g. -o flock,abort_recov)
1110 int lustre_fill_super(struct super_block
*sb
, void *data
, int silent
)
1112 struct lustre_mount_data
*lmd
;
1113 struct lustre_mount_data2
*lmd2
= data
;
1114 struct lustre_sb_info
*lsi
;
1117 CDEBUG(D_MOUNT
|D_VFSTRACE
, "VFS Op: sb %p\n", sb
);
1119 lsi
= lustre_init_lsi(sb
);
1125 * Disable lockdep during mount, because mount locking patterns are
1131 * LU-639: the obd cleanup of last mount may not finish yet, wait here.
1133 obd_zombie_barrier();
1135 /* Figure out the lmd from the mount options */
1136 if (lmd_parse((char *)(lmd2
->lmd2_data
), lmd
)) {
1142 if (lmd_is_client(lmd
)) {
1143 CDEBUG(D_MOUNT
, "Mounting client %s\n", lmd
->lmd_profile
);
1144 if (client_fill_super
== NULL
)
1145 request_module("lustre");
1146 if (client_fill_super
== NULL
) {
1147 LCONSOLE_ERROR_MSG(0x165, "Nothing registered for client mount! Is the 'lustre' module loaded?\n");
1151 rc
= lustre_start_mgc(sb
);
1156 /* Connect and start */
1157 /* (should always be ll_fill_super) */
1158 rc
= (*client_fill_super
)(sb
, lmd2
->lmd2_mnt
);
1159 /* c_f_s will call lustre_common_put_super on failure */
1162 CERROR("This is client-side-only module, cannot handle server mount.\n");
1166 /* If error happens in fill_super() call, @lsi will be killed there.
1167 * This is why we do not put it here. */
1171 CERROR("Unable to mount %s (%d)\n",
1172 s2lsi(sb
) ? lmd
->lmd_dev
: "", rc
);
1174 CDEBUG(D_SUPER
, "Mount %s complete\n",
1182 /* We can't call ll_fill_super by name because it lives in a module that
1183 must be loaded after this one. */
1184 void lustre_register_client_fill_super(int (*cfs
)(struct super_block
*sb
,
1185 struct vfsmount
*mnt
))
1187 client_fill_super
= cfs
;
1189 EXPORT_SYMBOL(lustre_register_client_fill_super
);
1191 void lustre_register_kill_super_cb(void (*cfs
)(struct super_block
*sb
))
1193 kill_super_cb
= cfs
;
1195 EXPORT_SYMBOL(lustre_register_kill_super_cb
);
1197 /***************** FS registration ******************/
1198 struct dentry
*lustre_mount(struct file_system_type
*fs_type
, int flags
,
1199 const char *devname
, void *data
)
1201 struct lustre_mount_data2 lmd2
= {
1206 return mount_nodev(fs_type
, flags
, &lmd2
, lustre_fill_super
);
1209 static void lustre_kill_super(struct super_block
*sb
)
1211 struct lustre_sb_info
*lsi
= s2lsi(sb
);
1213 if (kill_super_cb
&& lsi
)
1214 (*kill_super_cb
)(sb
);
1216 kill_anon_super(sb
);
1219 /** Register the "lustre" fs type
1221 struct file_system_type lustre_fs_type
= {
1222 .owner
= THIS_MODULE
,
1224 .mount
= lustre_mount
,
1225 .kill_sb
= lustre_kill_super
,
1226 .fs_flags
= FS_BINARY_MOUNTDATA
| FS_REQUIRES_DEV
|
1227 FS_RENAME_DOES_D_MOVE
,
1229 MODULE_ALIAS_FS("lustre");
1231 int lustre_register_fs(void)
1233 return register_filesystem(&lustre_fs_type
);
1236 int lustre_unregister_fs(void)
1238 return unregister_filesystem(&lustre_fs_type
);