Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2010, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | */ | |
36 | ||
37 | /** | |
38 | * This file deals with various client/target related logic including recovery. | |
39 | * | |
40 | * TODO: This code more logically belongs in the ptlrpc module than in ldlm and | |
41 | * should be moved. | |
42 | */ | |
43 | ||
44 | #define DEBUG_SUBSYSTEM S_LDLM | |
45 | ||
9fdaf8c0 | 46 | #include "../../include/linux/libcfs/libcfs.h" |
d7e09d03 PT |
47 | #include <obd.h> |
48 | #include <obd_class.h> | |
49 | #include <lustre_dlm.h> | |
50 | #include <lustre_net.h> | |
51 | #include <lustre_sec.h> | |
52 | #include "ldlm_internal.h" | |
53 | ||
54 | /* @priority: If non-zero, move the selected connection to the list head. | |
55 | * @create: If zero, only search in existing connections. | |
56 | */ | |
57 | static int import_set_conn(struct obd_import *imp, struct obd_uuid *uuid, | |
58 | int priority, int create) | |
59 | { | |
60 | struct ptlrpc_connection *ptlrpc_conn; | |
61 | struct obd_import_conn *imp_conn = NULL, *item; | |
62 | int rc = 0; | |
d7e09d03 PT |
63 | |
64 | if (!create && !priority) { | |
65 | CDEBUG(D_HA, "Nothing to do\n"); | |
0a3bdb00 | 66 | return -EINVAL; |
d7e09d03 PT |
67 | } |
68 | ||
69 | ptlrpc_conn = ptlrpc_uuid_to_connection(uuid); | |
70 | if (!ptlrpc_conn) { | |
71 | CDEBUG(D_HA, "can't find connection %s\n", uuid->uuid); | |
0a3bdb00 | 72 | return -ENOENT; |
d7e09d03 PT |
73 | } |
74 | ||
75 | if (create) { | |
76 | OBD_ALLOC(imp_conn, sizeof(*imp_conn)); | |
b7cfd6d4 | 77 | if (!imp_conn) |
d7e09d03 | 78 | GOTO(out_put, rc = -ENOMEM); |
d7e09d03 PT |
79 | } |
80 | ||
81 | spin_lock(&imp->imp_lock); | |
82 | list_for_each_entry(item, &imp->imp_conn_list, oic_item) { | |
83 | if (obd_uuid_equals(uuid, &item->oic_uuid)) { | |
84 | if (priority) { | |
85 | list_del(&item->oic_item); | |
86 | list_add(&item->oic_item, | |
87 | &imp->imp_conn_list); | |
88 | item->oic_last_attempt = 0; | |
89 | } | |
90 | CDEBUG(D_HA, "imp %p@%s: found existing conn %s%s\n", | |
91 | imp, imp->imp_obd->obd_name, uuid->uuid, | |
92 | (priority ? ", moved to head" : "")); | |
93 | spin_unlock(&imp->imp_lock); | |
94 | GOTO(out_free, rc = 0); | |
95 | } | |
96 | } | |
97 | /* No existing import connection found for \a uuid. */ | |
98 | if (create) { | |
99 | imp_conn->oic_conn = ptlrpc_conn; | |
100 | imp_conn->oic_uuid = *uuid; | |
101 | imp_conn->oic_last_attempt = 0; | |
102 | if (priority) | |
103 | list_add(&imp_conn->oic_item, &imp->imp_conn_list); | |
104 | else | |
105 | list_add_tail(&imp_conn->oic_item, | |
106 | &imp->imp_conn_list); | |
107 | CDEBUG(D_HA, "imp %p@%s: add connection %s at %s\n", | |
108 | imp, imp->imp_obd->obd_name, uuid->uuid, | |
109 | (priority ? "head" : "tail")); | |
110 | } else { | |
111 | spin_unlock(&imp->imp_lock); | |
112 | GOTO(out_free, rc = -ENOENT); | |
113 | } | |
114 | ||
115 | spin_unlock(&imp->imp_lock); | |
0a3bdb00 | 116 | return 0; |
d7e09d03 PT |
117 | out_free: |
118 | if (imp_conn) | |
119 | OBD_FREE(imp_conn, sizeof(*imp_conn)); | |
120 | out_put: | |
121 | ptlrpc_connection_put(ptlrpc_conn); | |
0a3bdb00 | 122 | return rc; |
d7e09d03 PT |
123 | } |
124 | ||
125 | int import_set_conn_priority(struct obd_import *imp, struct obd_uuid *uuid) | |
126 | { | |
127 | return import_set_conn(imp, uuid, 1, 0); | |
128 | } | |
129 | ||
130 | int client_import_add_conn(struct obd_import *imp, struct obd_uuid *uuid, | |
131 | int priority) | |
132 | { | |
133 | return import_set_conn(imp, uuid, priority, 1); | |
134 | } | |
135 | EXPORT_SYMBOL(client_import_add_conn); | |
136 | ||
137 | int client_import_del_conn(struct obd_import *imp, struct obd_uuid *uuid) | |
138 | { | |
139 | struct obd_import_conn *imp_conn; | |
140 | struct obd_export *dlmexp; | |
141 | int rc = -ENOENT; | |
d7e09d03 PT |
142 | |
143 | spin_lock(&imp->imp_lock); | |
144 | if (list_empty(&imp->imp_conn_list)) { | |
145 | LASSERT(!imp->imp_connection); | |
146 | GOTO(out, rc); | |
147 | } | |
148 | ||
149 | list_for_each_entry(imp_conn, &imp->imp_conn_list, oic_item) { | |
150 | if (!obd_uuid_equals(uuid, &imp_conn->oic_uuid)) | |
151 | continue; | |
152 | LASSERT(imp_conn->oic_conn); | |
153 | ||
154 | if (imp_conn == imp->imp_conn_current) { | |
155 | LASSERT(imp_conn->oic_conn == imp->imp_connection); | |
156 | ||
157 | if (imp->imp_state != LUSTRE_IMP_CLOSED && | |
158 | imp->imp_state != LUSTRE_IMP_DISCON) { | |
159 | CERROR("can't remove current connection\n"); | |
160 | GOTO(out, rc = -EBUSY); | |
161 | } | |
162 | ||
163 | ptlrpc_connection_put(imp->imp_connection); | |
164 | imp->imp_connection = NULL; | |
165 | ||
166 | dlmexp = class_conn2export(&imp->imp_dlm_handle); | |
167 | if (dlmexp && dlmexp->exp_connection) { | |
168 | LASSERT(dlmexp->exp_connection == | |
169 | imp_conn->oic_conn); | |
170 | ptlrpc_connection_put(dlmexp->exp_connection); | |
171 | dlmexp->exp_connection = NULL; | |
172 | } | |
173 | } | |
174 | ||
175 | list_del(&imp_conn->oic_item); | |
176 | ptlrpc_connection_put(imp_conn->oic_conn); | |
177 | OBD_FREE(imp_conn, sizeof(*imp_conn)); | |
178 | CDEBUG(D_HA, "imp %p@%s: remove connection %s\n", | |
179 | imp, imp->imp_obd->obd_name, uuid->uuid); | |
180 | rc = 0; | |
181 | break; | |
182 | } | |
183 | out: | |
184 | spin_unlock(&imp->imp_lock); | |
185 | if (rc == -ENOENT) | |
186 | CERROR("connection %s not found\n", uuid->uuid); | |
0a3bdb00 | 187 | return rc; |
d7e09d03 PT |
188 | } |
189 | EXPORT_SYMBOL(client_import_del_conn); | |
190 | ||
191 | /** | |
192 | * Find conn UUID by peer NID. \a peer is a server NID. This function is used | |
193 | * to find a conn uuid of \a imp which can reach \a peer. | |
194 | */ | |
195 | int client_import_find_conn(struct obd_import *imp, lnet_nid_t peer, | |
196 | struct obd_uuid *uuid) | |
197 | { | |
198 | struct obd_import_conn *conn; | |
199 | int rc = -ENOENT; | |
d7e09d03 PT |
200 | |
201 | spin_lock(&imp->imp_lock); | |
202 | list_for_each_entry(conn, &imp->imp_conn_list, oic_item) { | |
203 | /* Check if conn UUID does have this peer NID. */ | |
204 | if (class_check_uuid(&conn->oic_uuid, peer)) { | |
205 | *uuid = conn->oic_uuid; | |
206 | rc = 0; | |
207 | break; | |
208 | } | |
209 | } | |
210 | spin_unlock(&imp->imp_lock); | |
0a3bdb00 | 211 | return rc; |
d7e09d03 PT |
212 | } |
213 | EXPORT_SYMBOL(client_import_find_conn); | |
214 | ||
215 | void client_destroy_import(struct obd_import *imp) | |
216 | { | |
217 | /* Drop security policy instance after all RPCs have finished/aborted | |
218 | * to let all busy contexts be released. */ | |
219 | class_import_get(imp); | |
220 | class_destroy_import(imp); | |
221 | sptlrpc_import_sec_put(imp); | |
222 | class_import_put(imp); | |
223 | } | |
224 | EXPORT_SYMBOL(client_destroy_import); | |
225 | ||
226 | /** | |
227 | * Check whether or not the OSC is on MDT. | |
228 | * In the config log, | |
229 | * osc on MDT | |
230 | * setup 0:{fsname}-OSTxxxx-osc[-MDTxxxx] 1:lustre-OST0000_UUID 2:NID | |
231 | * osc on client | |
232 | * setup 0:{fsname}-OSTxxxx-osc 1:lustre-OST0000_UUID 2:NID | |
233 | * | |
234 | **/ | |
235 | static int osc_on_mdt(char *obdname) | |
236 | { | |
237 | char *ptr; | |
238 | ||
239 | ptr = strrchr(obdname, '-'); | |
240 | if (ptr == NULL) | |
241 | return 0; | |
242 | ||
243 | if (strncmp(ptr + 1, "MDT", 3) == 0) | |
244 | return 1; | |
245 | ||
246 | return 0; | |
247 | } | |
248 | ||
249 | /* Configure an RPC client OBD device. | |
250 | * | |
251 | * lcfg parameters: | |
252 | * 1 - client UUID | |
253 | * 2 - server UUID | |
254 | * 3 - inactive-on-startup | |
255 | */ | |
256 | int client_obd_setup(struct obd_device *obddev, struct lustre_cfg *lcfg) | |
257 | { | |
258 | struct client_obd *cli = &obddev->u.cli; | |
259 | struct obd_import *imp; | |
260 | struct obd_uuid server_uuid; | |
261 | int rq_portal, rp_portal, connect_op; | |
262 | char *name = obddev->obd_type->typ_name; | |
263 | ldlm_ns_type_t ns_type = LDLM_NS_TYPE_UNKNOWN; | |
264 | int rc; | |
265 | char *cli_name = lustre_cfg_buf(lcfg, 0); | |
d7e09d03 PT |
266 | |
267 | /* In a more perfect world, we would hang a ptlrpc_client off of | |
268 | * obd_type and just use the values from there. */ | |
269 | if (!strcmp(name, LUSTRE_OSC_NAME) || | |
270 | (!(strcmp(name, LUSTRE_OSP_NAME)) && | |
271 | (is_osp_on_mdt(cli_name) && | |
272 | strstr(lustre_cfg_buf(lcfg, 1), "OST") != NULL))) { | |
273 | /* OSC or OSP_on_MDT for OSTs */ | |
274 | rq_portal = OST_REQUEST_PORTAL; | |
275 | rp_portal = OSC_REPLY_PORTAL; | |
276 | connect_op = OST_CONNECT; | |
277 | cli->cl_sp_me = LUSTRE_SP_CLI; | |
278 | cli->cl_sp_to = LUSTRE_SP_OST; | |
279 | ns_type = LDLM_NS_TYPE_OSC; | |
280 | } else if (!strcmp(name, LUSTRE_MDC_NAME) || | |
281 | !strcmp(name, LUSTRE_LWP_NAME) || | |
282 | (!strcmp(name, LUSTRE_OSP_NAME) && | |
283 | (is_osp_on_mdt(cli_name) && | |
284 | strstr(lustre_cfg_buf(lcfg, 1), "OST") == NULL))) { | |
285 | /* MDC or OSP_on_MDT for other MDTs */ | |
286 | rq_portal = MDS_REQUEST_PORTAL; | |
287 | rp_portal = MDC_REPLY_PORTAL; | |
288 | connect_op = MDS_CONNECT; | |
289 | cli->cl_sp_me = LUSTRE_SP_CLI; | |
290 | cli->cl_sp_to = LUSTRE_SP_MDT; | |
291 | ns_type = LDLM_NS_TYPE_MDC; | |
292 | } else if (!strcmp(name, LUSTRE_MGC_NAME)) { | |
293 | rq_portal = MGS_REQUEST_PORTAL; | |
294 | rp_portal = MGC_REPLY_PORTAL; | |
295 | connect_op = MGS_CONNECT; | |
296 | cli->cl_sp_me = LUSTRE_SP_MGC; | |
297 | cli->cl_sp_to = LUSTRE_SP_MGS; | |
298 | cli->cl_flvr_mgc.sf_rpc = SPTLRPC_FLVR_INVALID; | |
299 | ns_type = LDLM_NS_TYPE_MGC; | |
300 | } else { | |
301 | CERROR("unknown client OBD type \"%s\", can't setup\n", | |
302 | name); | |
0a3bdb00 | 303 | return -EINVAL; |
d7e09d03 PT |
304 | } |
305 | ||
306 | if (LUSTRE_CFG_BUFLEN(lcfg, 1) < 1) { | |
307 | CERROR("requires a TARGET UUID\n"); | |
0a3bdb00 | 308 | return -EINVAL; |
d7e09d03 PT |
309 | } |
310 | ||
311 | if (LUSTRE_CFG_BUFLEN(lcfg, 1) > 37) { | |
312 | CERROR("client UUID must be less than 38 characters\n"); | |
0a3bdb00 | 313 | return -EINVAL; |
d7e09d03 PT |
314 | } |
315 | ||
316 | if (LUSTRE_CFG_BUFLEN(lcfg, 2) < 1) { | |
317 | CERROR("setup requires a SERVER UUID\n"); | |
0a3bdb00 | 318 | return -EINVAL; |
d7e09d03 PT |
319 | } |
320 | ||
321 | if (LUSTRE_CFG_BUFLEN(lcfg, 2) > 37) { | |
322 | CERROR("target UUID must be less than 38 characters\n"); | |
0a3bdb00 | 323 | return -EINVAL; |
d7e09d03 PT |
324 | } |
325 | ||
326 | init_rwsem(&cli->cl_sem); | |
6246dab1 | 327 | mutex_init(&cli->cl_mgc_mutex); |
d7e09d03 PT |
328 | cli->cl_conn_count = 0; |
329 | memcpy(server_uuid.uuid, lustre_cfg_buf(lcfg, 2), | |
330 | min_t(unsigned int, LUSTRE_CFG_BUFLEN(lcfg, 2), | |
331 | sizeof(server_uuid))); | |
332 | ||
333 | cli->cl_dirty = 0; | |
334 | cli->cl_avail_grant = 0; | |
335 | /* FIXME: Should limit this for the sum of all cl_dirty_max. */ | |
336 | cli->cl_dirty_max = OSC_MAX_DIRTY_DEFAULT * 1024 * 1024; | |
4f6cc9ab PT |
337 | if (cli->cl_dirty_max >> PAGE_CACHE_SHIFT > totalram_pages / 8) |
338 | cli->cl_dirty_max = totalram_pages << (PAGE_CACHE_SHIFT - 3); | |
d7e09d03 PT |
339 | INIT_LIST_HEAD(&cli->cl_cache_waiters); |
340 | INIT_LIST_HEAD(&cli->cl_loi_ready_list); | |
341 | INIT_LIST_HEAD(&cli->cl_loi_hp_ready_list); | |
342 | INIT_LIST_HEAD(&cli->cl_loi_write_list); | |
343 | INIT_LIST_HEAD(&cli->cl_loi_read_list); | |
344 | client_obd_list_lock_init(&cli->cl_loi_list_lock); | |
345 | atomic_set(&cli->cl_pending_w_pages, 0); | |
346 | atomic_set(&cli->cl_pending_r_pages, 0); | |
347 | cli->cl_r_in_flight = 0; | |
348 | cli->cl_w_in_flight = 0; | |
349 | ||
350 | spin_lock_init(&cli->cl_read_rpc_hist.oh_lock); | |
351 | spin_lock_init(&cli->cl_write_rpc_hist.oh_lock); | |
352 | spin_lock_init(&cli->cl_read_page_hist.oh_lock); | |
353 | spin_lock_init(&cli->cl_write_page_hist.oh_lock); | |
354 | spin_lock_init(&cli->cl_read_offset_hist.oh_lock); | |
355 | spin_lock_init(&cli->cl_write_offset_hist.oh_lock); | |
356 | ||
357 | /* lru for osc. */ | |
358 | INIT_LIST_HEAD(&cli->cl_lru_osc); | |
359 | atomic_set(&cli->cl_lru_shrinkers, 0); | |
360 | atomic_set(&cli->cl_lru_busy, 0); | |
361 | atomic_set(&cli->cl_lru_in_list, 0); | |
362 | INIT_LIST_HEAD(&cli->cl_lru_list); | |
363 | client_obd_list_lock_init(&cli->cl_lru_list_lock); | |
364 | ||
365 | init_waitqueue_head(&cli->cl_destroy_waitq); | |
366 | atomic_set(&cli->cl_destroy_in_flight, 0); | |
367 | /* Turn on checksumming by default. */ | |
368 | cli->cl_checksum = 1; | |
369 | /* | |
370 | * The supported checksum types will be worked out at connect time | |
371 | * Set cl_chksum* to CRC32 for now to avoid returning screwed info | |
372 | * through procfs. | |
373 | */ | |
374 | cli->cl_cksum_type = cli->cl_supp_cksum_types = OBD_CKSUM_CRC32; | |
375 | atomic_set(&cli->cl_resends, OSC_DEFAULT_RESENDS); | |
376 | ||
377 | /* This value may be reduced at connect time in | |
378 | * ptlrpc_connect_interpret() . We initialize it to only | |
379 | * 1MB until we know what the performance looks like. | |
380 | * In the future this should likely be increased. LU-1431 */ | |
381 | cli->cl_max_pages_per_rpc = min_t(int, PTLRPC_MAX_BRW_PAGES, | |
382 | LNET_MTU >> PAGE_CACHE_SHIFT); | |
383 | ||
384 | if (!strcmp(name, LUSTRE_MDC_NAME)) { | |
385 | cli->cl_max_rpcs_in_flight = MDC_MAX_RIF_DEFAULT; | |
4f6cc9ab | 386 | } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 128 /* MB */) { |
d7e09d03 | 387 | cli->cl_max_rpcs_in_flight = 2; |
4f6cc9ab | 388 | } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 256 /* MB */) { |
d7e09d03 | 389 | cli->cl_max_rpcs_in_flight = 3; |
4f6cc9ab | 390 | } else if (totalram_pages >> (20 - PAGE_CACHE_SHIFT) <= 512 /* MB */) { |
d7e09d03 PT |
391 | cli->cl_max_rpcs_in_flight = 4; |
392 | } else { | |
393 | if (osc_on_mdt(obddev->obd_name)) | |
394 | cli->cl_max_rpcs_in_flight = MDS_OSC_MAX_RIF_DEFAULT; | |
395 | else | |
396 | cli->cl_max_rpcs_in_flight = OSC_MAX_RIF_DEFAULT; | |
397 | } | |
398 | rc = ldlm_get_ref(); | |
399 | if (rc) { | |
400 | CERROR("ldlm_get_ref failed: %d\n", rc); | |
401 | GOTO(err, rc); | |
402 | } | |
403 | ||
404 | ptlrpc_init_client(rq_portal, rp_portal, name, | |
405 | &obddev->obd_ldlm_client); | |
406 | ||
407 | imp = class_new_import(obddev); | |
408 | if (imp == NULL) | |
409 | GOTO(err_ldlm, rc = -ENOENT); | |
410 | imp->imp_client = &obddev->obd_ldlm_client; | |
411 | imp->imp_connect_op = connect_op; | |
412 | memcpy(cli->cl_target_uuid.uuid, lustre_cfg_buf(lcfg, 1), | |
413 | LUSTRE_CFG_BUFLEN(lcfg, 1)); | |
414 | class_import_put(imp); | |
415 | ||
416 | rc = client_import_add_conn(imp, &server_uuid, 1); | |
417 | if (rc) { | |
418 | CERROR("can't add initial connection\n"); | |
419 | GOTO(err_import, rc); | |
420 | } | |
421 | ||
422 | cli->cl_import = imp; | |
423 | /* cli->cl_max_mds_{easize,cookiesize} updated by mdc_init_ea_size() */ | |
424 | cli->cl_max_mds_easize = sizeof(struct lov_mds_md_v3); | |
425 | cli->cl_max_mds_cookiesize = sizeof(struct llog_cookie); | |
426 | ||
427 | if (LUSTRE_CFG_BUFLEN(lcfg, 3) > 0) { | |
428 | if (!strcmp(lustre_cfg_string(lcfg, 3), "inactive")) { | |
429 | CDEBUG(D_HA, "marking %s %s->%s as inactive\n", | |
430 | name, obddev->obd_name, | |
431 | cli->cl_target_uuid.uuid); | |
432 | spin_lock(&imp->imp_lock); | |
433 | imp->imp_deactive = 1; | |
434 | spin_unlock(&imp->imp_lock); | |
435 | } | |
436 | } | |
437 | ||
438 | obddev->obd_namespace = ldlm_namespace_new(obddev, obddev->obd_name, | |
439 | LDLM_NAMESPACE_CLIENT, | |
440 | LDLM_NAMESPACE_GREEDY, | |
441 | ns_type); | |
442 | if (obddev->obd_namespace == NULL) { | |
443 | CERROR("Unable to create client namespace - %s\n", | |
444 | obddev->obd_name); | |
445 | GOTO(err_import, rc = -ENOMEM); | |
446 | } | |
447 | ||
448 | cli->cl_qchk_stat = CL_NOT_QUOTACHECKED; | |
449 | ||
0a3bdb00 | 450 | return rc; |
d7e09d03 PT |
451 | |
452 | err_import: | |
453 | class_destroy_import(imp); | |
454 | err_ldlm: | |
455 | ldlm_put_ref(); | |
456 | err: | |
0a3bdb00 | 457 | return rc; |
d7e09d03 PT |
458 | |
459 | } | |
460 | EXPORT_SYMBOL(client_obd_setup); | |
461 | ||
462 | int client_obd_cleanup(struct obd_device *obddev) | |
463 | { | |
d7e09d03 PT |
464 | ldlm_namespace_free_post(obddev->obd_namespace); |
465 | obddev->obd_namespace = NULL; | |
466 | ||
467 | LASSERT(obddev->u.cli.cl_import == NULL); | |
468 | ||
469 | ldlm_put_ref(); | |
0a3bdb00 | 470 | return 0; |
d7e09d03 PT |
471 | } |
472 | EXPORT_SYMBOL(client_obd_cleanup); | |
473 | ||
474 | /* ->o_connect() method for client side (OSC and MDC and MGC) */ | |
475 | int client_connect_import(const struct lu_env *env, | |
476 | struct obd_export **exp, | |
477 | struct obd_device *obd, struct obd_uuid *cluuid, | |
478 | struct obd_connect_data *data, void *localdata) | |
479 | { | |
480 | struct client_obd *cli = &obd->u.cli; | |
481 | struct obd_import *imp = cli->cl_import; | |
482 | struct obd_connect_data *ocd; | |
483 | struct lustre_handle conn = { 0 }; | |
484 | int rc; | |
d7e09d03 PT |
485 | |
486 | *exp = NULL; | |
487 | down_write(&cli->cl_sem); | |
488 | if (cli->cl_conn_count > 0 ) | |
489 | GOTO(out_sem, rc = -EALREADY); | |
490 | ||
491 | rc = class_connect(&conn, obd, cluuid); | |
492 | if (rc) | |
493 | GOTO(out_sem, rc); | |
494 | ||
495 | cli->cl_conn_count++; | |
496 | *exp = class_conn2export(&conn); | |
497 | ||
498 | LASSERT(obd->obd_namespace); | |
499 | ||
500 | imp->imp_dlm_handle = conn; | |
501 | rc = ptlrpc_init_import(imp); | |
502 | if (rc != 0) | |
503 | GOTO(out_ldlm, rc); | |
504 | ||
505 | ocd = &imp->imp_connect_data; | |
506 | if (data) { | |
507 | *ocd = *data; | |
508 | imp->imp_connect_flags_orig = data->ocd_connect_flags; | |
509 | } | |
510 | ||
511 | rc = ptlrpc_connect_import(imp); | |
512 | if (rc != 0) { | |
05dca373 | 513 | LASSERT(imp->imp_state == LUSTRE_IMP_DISCON); |
d7e09d03 PT |
514 | GOTO(out_ldlm, rc); |
515 | } | |
f85065e5 | 516 | LASSERT(*exp != NULL && (*exp)->exp_connection); |
d7e09d03 PT |
517 | |
518 | if (data) { | |
519 | LASSERTF((ocd->ocd_connect_flags & data->ocd_connect_flags) == | |
520 | ocd->ocd_connect_flags, "old "LPX64", new "LPX64"\n", | |
521 | data->ocd_connect_flags, ocd->ocd_connect_flags); | |
522 | data->ocd_connect_flags = ocd->ocd_connect_flags; | |
523 | } | |
524 | ||
525 | ptlrpc_pinger_add_import(imp); | |
526 | ||
d7e09d03 PT |
527 | if (rc) { |
528 | out_ldlm: | |
529 | cli->cl_conn_count--; | |
530 | class_disconnect(*exp); | |
531 | *exp = NULL; | |
532 | } | |
533 | out_sem: | |
534 | up_write(&cli->cl_sem); | |
535 | ||
536 | return rc; | |
537 | } | |
538 | EXPORT_SYMBOL(client_connect_import); | |
539 | ||
540 | int client_disconnect_export(struct obd_export *exp) | |
541 | { | |
542 | struct obd_device *obd = class_exp2obd(exp); | |
543 | struct client_obd *cli; | |
544 | struct obd_import *imp; | |
545 | int rc = 0, err; | |
d7e09d03 PT |
546 | |
547 | if (!obd) { | |
548 | CERROR("invalid export for disconnect: exp %p cookie "LPX64"\n", | |
549 | exp, exp ? exp->exp_handle.h_cookie : -1); | |
0a3bdb00 | 550 | return -EINVAL; |
d7e09d03 PT |
551 | } |
552 | ||
553 | cli = &obd->u.cli; | |
554 | imp = cli->cl_import; | |
555 | ||
556 | down_write(&cli->cl_sem); | |
557 | CDEBUG(D_INFO, "disconnect %s - %d\n", obd->obd_name, | |
558 | cli->cl_conn_count); | |
559 | ||
560 | if (!cli->cl_conn_count) { | |
561 | CERROR("disconnecting disconnected device (%s)\n", | |
562 | obd->obd_name); | |
563 | GOTO(out_disconnect, rc = -EINVAL); | |
564 | } | |
565 | ||
566 | cli->cl_conn_count--; | |
567 | if (cli->cl_conn_count) | |
568 | GOTO(out_disconnect, rc = 0); | |
569 | ||
570 | /* Mark import deactivated now, so we don't try to reconnect if any | |
571 | * of the cleanup RPCs fails (e.g. LDLM cancel, etc). We don't | |
572 | * fully deactivate the import, or that would drop all requests. */ | |
573 | spin_lock(&imp->imp_lock); | |
574 | imp->imp_deactive = 1; | |
575 | spin_unlock(&imp->imp_lock); | |
576 | ||
577 | /* Some non-replayable imports (MDS's OSCs) are pinged, so just | |
578 | * delete it regardless. (It's safe to delete an import that was | |
579 | * never added.) */ | |
580 | (void)ptlrpc_pinger_del_import(imp); | |
581 | ||
582 | if (obd->obd_namespace != NULL) { | |
583 | /* obd_force == local only */ | |
584 | ldlm_cli_cancel_unused(obd->obd_namespace, NULL, | |
585 | obd->obd_force ? LCF_LOCAL : 0, NULL); | |
586 | ldlm_namespace_free_prior(obd->obd_namespace, imp, obd->obd_force); | |
587 | } | |
588 | ||
589 | /* There's no need to hold sem while disconnecting an import, | |
590 | * and it may actually cause deadlock in GSS. */ | |
591 | up_write(&cli->cl_sem); | |
592 | rc = ptlrpc_disconnect_import(imp, 0); | |
593 | down_write(&cli->cl_sem); | |
594 | ||
595 | ptlrpc_invalidate_import(imp); | |
596 | ||
d7e09d03 PT |
597 | out_disconnect: |
598 | /* Use server style - class_disconnect should be always called for | |
599 | * o_disconnect. */ | |
600 | err = class_disconnect(exp); | |
601 | if (!rc && err) | |
602 | rc = err; | |
603 | ||
604 | up_write(&cli->cl_sem); | |
605 | ||
0a3bdb00 | 606 | return rc; |
d7e09d03 PT |
607 | } |
608 | EXPORT_SYMBOL(client_disconnect_export); | |
609 | ||
610 | ||
611 | /** | |
612 | * Packs current SLV and Limit into \a req. | |
613 | */ | |
614 | int target_pack_pool_reply(struct ptlrpc_request *req) | |
615 | { | |
616 | struct obd_device *obd; | |
d7e09d03 PT |
617 | |
618 | /* Check that we still have all structures alive as this may | |
619 | * be some late RPC at shutdown time. */ | |
620 | if (unlikely(!req->rq_export || !req->rq_export->exp_obd || | |
621 | !exp_connect_lru_resize(req->rq_export))) { | |
622 | lustre_msg_set_slv(req->rq_repmsg, 0); | |
623 | lustre_msg_set_limit(req->rq_repmsg, 0); | |
0a3bdb00 | 624 | return 0; |
d7e09d03 PT |
625 | } |
626 | ||
627 | /* OBD is alive here as export is alive, which we checked above. */ | |
628 | obd = req->rq_export->exp_obd; | |
629 | ||
630 | read_lock(&obd->obd_pool_lock); | |
631 | lustre_msg_set_slv(req->rq_repmsg, obd->obd_pool_slv); | |
632 | lustre_msg_set_limit(req->rq_repmsg, obd->obd_pool_limit); | |
633 | read_unlock(&obd->obd_pool_lock); | |
634 | ||
0a3bdb00 | 635 | return 0; |
d7e09d03 PT |
636 | } |
637 | EXPORT_SYMBOL(target_pack_pool_reply); | |
638 | ||
639 | int target_send_reply_msg(struct ptlrpc_request *req, int rc, int fail_id) | |
640 | { | |
641 | if (OBD_FAIL_CHECK_ORSET(fail_id & ~OBD_FAIL_ONCE, OBD_FAIL_ONCE)) { | |
642 | DEBUG_REQ(D_ERROR, req, "dropping reply"); | |
643 | return (-ECOMM); | |
644 | } | |
645 | ||
646 | if (unlikely(rc)) { | |
647 | DEBUG_REQ(D_NET, req, "processing error (%d)", rc); | |
648 | req->rq_status = rc; | |
649 | return (ptlrpc_send_error(req, 1)); | |
650 | } else { | |
651 | DEBUG_REQ(D_NET, req, "sending reply"); | |
652 | } | |
653 | ||
654 | return (ptlrpc_send_reply(req, PTLRPC_REPLY_MAYBE_DIFFICULT)); | |
655 | } | |
656 | ||
657 | void target_send_reply(struct ptlrpc_request *req, int rc, int fail_id) | |
658 | { | |
659 | struct ptlrpc_service_part *svcpt; | |
660 | int netrc; | |
661 | struct ptlrpc_reply_state *rs; | |
662 | struct obd_export *exp; | |
d7e09d03 | 663 | |
b7cfd6d4 | 664 | if (req->rq_no_reply) |
d7e09d03 | 665 | return; |
d7e09d03 PT |
666 | |
667 | svcpt = req->rq_rqbd->rqbd_svcpt; | |
668 | rs = req->rq_reply_state; | |
669 | if (rs == NULL || !rs->rs_difficult) { | |
670 | /* no notifiers */ | |
05dca373 | 671 | target_send_reply_msg(req, rc, fail_id); |
d7e09d03 PT |
672 | return; |
673 | } | |
674 | ||
675 | /* must be an export if locks saved */ | |
05dca373 | 676 | LASSERT(req->rq_export != NULL); |
d7e09d03 PT |
677 | /* req/reply consistent */ |
678 | LASSERT(rs->rs_svcpt == svcpt); | |
679 | ||
680 | /* "fresh" reply */ | |
05dca373 AB |
681 | LASSERT(!rs->rs_scheduled); |
682 | LASSERT(!rs->rs_scheduled_ever); | |
683 | LASSERT(!rs->rs_handled); | |
684 | LASSERT(!rs->rs_on_net); | |
685 | LASSERT(rs->rs_export == NULL); | |
686 | LASSERT(list_empty(&rs->rs_obd_list)); | |
687 | LASSERT(list_empty(&rs->rs_exp_list)); | |
688 | ||
689 | exp = class_export_get(req->rq_export); | |
d7e09d03 PT |
690 | |
691 | /* disable reply scheduling while I'm setting up */ | |
692 | rs->rs_scheduled = 1; | |
693 | rs->rs_on_net = 1; | |
694 | rs->rs_xid = req->rq_xid; | |
695 | rs->rs_transno = req->rq_transno; | |
696 | rs->rs_export = exp; | |
697 | rs->rs_opc = lustre_msg_get_opc(req->rq_reqmsg); | |
698 | ||
699 | spin_lock(&exp->exp_uncommitted_replies_lock); | |
700 | CDEBUG(D_NET, "rs transno = "LPU64", last committed = "LPU64"\n", | |
701 | rs->rs_transno, exp->exp_last_committed); | |
702 | if (rs->rs_transno > exp->exp_last_committed) { | |
703 | /* not committed already */ | |
704 | list_add_tail(&rs->rs_obd_list, | |
705 | &exp->exp_uncommitted_replies); | |
706 | } | |
707 | spin_unlock(&exp->exp_uncommitted_replies_lock); | |
708 | ||
709 | spin_lock(&exp->exp_lock); | |
710 | list_add_tail(&rs->rs_exp_list, &exp->exp_outstanding_replies); | |
711 | spin_unlock(&exp->exp_lock); | |
712 | ||
713 | netrc = target_send_reply_msg(req, rc, fail_id); | |
714 | ||
715 | spin_lock(&svcpt->scp_rep_lock); | |
716 | ||
717 | atomic_inc(&svcpt->scp_nreps_difficult); | |
718 | ||
719 | if (netrc != 0) { | |
720 | /* error sending: reply is off the net. Also we need +1 | |
721 | * reply ref until ptlrpc_handle_rs() is done | |
722 | * with the reply state (if the send was successful, there | |
723 | * would have been +1 ref for the net, which | |
724 | * reply_out_callback leaves alone) */ | |
725 | rs->rs_on_net = 0; | |
726 | ptlrpc_rs_addref(rs); | |
727 | } | |
728 | ||
729 | spin_lock(&rs->rs_lock); | |
730 | if (rs->rs_transno <= exp->exp_last_committed || | |
731 | (!rs->rs_on_net && !rs->rs_no_ack) || | |
732 | list_empty(&rs->rs_exp_list) || /* completed already */ | |
733 | list_empty(&rs->rs_obd_list)) { | |
734 | CDEBUG(D_HA, "Schedule reply immediately\n"); | |
735 | ptlrpc_dispatch_difficult_reply(rs); | |
736 | } else { | |
737 | list_add(&rs->rs_list, &svcpt->scp_rep_active); | |
738 | rs->rs_scheduled = 0; /* allow notifier to schedule */ | |
739 | } | |
740 | spin_unlock(&rs->rs_lock); | |
741 | spin_unlock(&svcpt->scp_rep_lock); | |
d7e09d03 PT |
742 | } |
743 | EXPORT_SYMBOL(target_send_reply); | |
744 | ||
745 | ldlm_mode_t lck_compat_array[] = { | |
805e517a EG |
746 | [LCK_EX] = LCK_COMPAT_EX, |
747 | [LCK_PW] = LCK_COMPAT_PW, | |
748 | [LCK_PR] = LCK_COMPAT_PR, | |
749 | [LCK_CW] = LCK_COMPAT_CW, | |
750 | [LCK_CR] = LCK_COMPAT_CR, | |
751 | [LCK_NL] = LCK_COMPAT_NL, | |
752 | [LCK_GROUP] = LCK_COMPAT_GROUP, | |
753 | [LCK_COS] = LCK_COMPAT_COS, | |
d7e09d03 PT |
754 | }; |
755 | ||
756 | /** | |
757 | * Rather arbitrary mapping from LDLM error codes to errno values. This should | |
758 | * not escape to the user level. | |
759 | */ | |
760 | int ldlm_error2errno(ldlm_error_t error) | |
761 | { | |
762 | int result; | |
763 | ||
764 | switch (error) { | |
765 | case ELDLM_OK: | |
766 | result = 0; | |
767 | break; | |
768 | case ELDLM_LOCK_CHANGED: | |
769 | result = -ESTALE; | |
770 | break; | |
771 | case ELDLM_LOCK_ABORTED: | |
772 | result = -ENAVAIL; | |
773 | break; | |
774 | case ELDLM_LOCK_REPLACED: | |
775 | result = -ESRCH; | |
776 | break; | |
777 | case ELDLM_NO_LOCK_DATA: | |
778 | result = -ENOENT; | |
779 | break; | |
780 | case ELDLM_NAMESPACE_EXISTS: | |
781 | result = -EEXIST; | |
782 | break; | |
783 | case ELDLM_BAD_NAMESPACE: | |
784 | result = -EBADF; | |
785 | break; | |
786 | default: | |
787 | if (((int)error) < 0) /* cast to signed type */ | |
788 | result = error; /* as ldlm_error_t can be unsigned */ | |
789 | else { | |
790 | CERROR("Invalid DLM result code: %d\n", error); | |
791 | result = -EPROTO; | |
792 | } | |
793 | } | |
794 | return result; | |
795 | } | |
796 | EXPORT_SYMBOL(ldlm_error2errno); | |
797 | ||
798 | /** | |
799 | * Dual to ldlm_error2errno(): maps errno values back to ldlm_error_t. | |
800 | */ | |
801 | ldlm_error_t ldlm_errno2error(int err_no) | |
802 | { | |
803 | int error; | |
804 | ||
805 | switch (err_no) { | |
806 | case 0: | |
807 | error = ELDLM_OK; | |
808 | break; | |
809 | case -ESTALE: | |
810 | error = ELDLM_LOCK_CHANGED; | |
811 | break; | |
812 | case -ENAVAIL: | |
813 | error = ELDLM_LOCK_ABORTED; | |
814 | break; | |
815 | case -ESRCH: | |
816 | error = ELDLM_LOCK_REPLACED; | |
817 | break; | |
818 | case -ENOENT: | |
819 | error = ELDLM_NO_LOCK_DATA; | |
820 | break; | |
821 | case -EEXIST: | |
822 | error = ELDLM_NAMESPACE_EXISTS; | |
823 | break; | |
824 | case -EBADF: | |
825 | error = ELDLM_BAD_NAMESPACE; | |
826 | break; | |
827 | default: | |
828 | error = err_no; | |
829 | } | |
830 | return error; | |
831 | } | |
832 | EXPORT_SYMBOL(ldlm_errno2error); | |
833 | ||
834 | #if LUSTRE_TRACKS_LOCK_EXP_REFS | |
835 | void ldlm_dump_export_locks(struct obd_export *exp) | |
836 | { | |
837 | spin_lock(&exp->exp_locks_list_guard); | |
838 | if (!list_empty(&exp->exp_locks_list)) { | |
839 | struct ldlm_lock *lock; | |
840 | ||
841 | CERROR("dumping locks for export %p," | |
842 | "ignore if the unmount doesn't hang\n", exp); | |
843 | list_for_each_entry(lock, &exp->exp_locks_list, | |
844 | l_exp_refs_link) | |
845 | LDLM_ERROR(lock, "lock:"); | |
846 | } | |
847 | spin_unlock(&exp->exp_locks_list_guard); | |
848 | } | |
849 | #endif |