Commit | Line | Data |
---|---|---|
d7e09d03 PT |
1 | /* |
2 | * GPL HEADER START | |
3 | * | |
4 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License version 2 only, | |
8 | * as published by the Free Software Foundation. | |
9 | * | |
10 | * This program is distributed in the hope that it will be useful, but | |
11 | * WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 | * General Public License version 2 for more details (a copy is included | |
14 | * in the LICENSE file that accompanied this code). | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * version 2 along with this program; If not, see | |
18 | * http://www.sun.com/software/products/lustre/docs/GPLv2.pdf | |
19 | * | |
20 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, | |
21 | * CA 95054 USA or visit www.sun.com if you need additional information or | |
22 | * have any questions. | |
23 | * | |
24 | * GPL HEADER END | |
25 | */ | |
26 | /* | |
27 | * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. | |
28 | * Use is subject to license terms. | |
29 | * | |
30 | * Copyright (c) 2011, 2012, Intel Corporation. | |
31 | */ | |
32 | /* | |
33 | * This file is part of Lustre, http://www.lustre.org/ | |
34 | * Lustre is a trademark of Sun Microsystems, Inc. | |
35 | * | |
36 | * lustre/llite/llite_close.c | |
37 | * | |
38 | * Lustre Lite routines to issue a secondary close after writeback | |
39 | */ | |
40 | ||
41 | #include <linux/module.h> | |
42 | ||
43 | #define DEBUG_SUBSYSTEM S_LLITE | |
44 | ||
45 | #include <lustre_lite.h> | |
46 | #include "llite_internal.h" | |
47 | ||
48 | /** records that a write is in flight */ | |
49 | void vvp_write_pending(struct ccc_object *club, struct ccc_page *page) | |
50 | { | |
51 | struct ll_inode_info *lli = ll_i2info(club->cob_inode); | |
52 | ||
d7e09d03 PT |
53 | spin_lock(&lli->lli_lock); |
54 | lli->lli_flags |= LLIF_SOM_DIRTY; | |
55 | if (page != NULL && list_empty(&page->cpg_pending_linkage)) | |
56 | list_add(&page->cpg_pending_linkage, | |
57 | &club->cob_pending_list); | |
58 | spin_unlock(&lli->lli_lock); | |
d7e09d03 PT |
59 | } |
60 | ||
61 | /** records that a write has completed */ | |
62 | void vvp_write_complete(struct ccc_object *club, struct ccc_page *page) | |
63 | { | |
64 | struct ll_inode_info *lli = ll_i2info(club->cob_inode); | |
65 | int rc = 0; | |
66 | ||
d7e09d03 PT |
67 | spin_lock(&lli->lli_lock); |
68 | if (page != NULL && !list_empty(&page->cpg_pending_linkage)) { | |
69 | list_del_init(&page->cpg_pending_linkage); | |
70 | rc = 1; | |
71 | } | |
72 | spin_unlock(&lli->lli_lock); | |
73 | if (rc) | |
74 | ll_queue_done_writing(club->cob_inode, 0); | |
d7e09d03 PT |
75 | } |
76 | ||
77 | /** Queues DONE_WRITING if | |
78 | * - done writing is allowed; | |
79 | * - inode has no no dirty pages; */ | |
80 | void ll_queue_done_writing(struct inode *inode, unsigned long flags) | |
81 | { | |
82 | struct ll_inode_info *lli = ll_i2info(inode); | |
83 | struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); | |
d7e09d03 PT |
84 | |
85 | spin_lock(&lli->lli_lock); | |
86 | lli->lli_flags |= flags; | |
87 | ||
88 | if ((lli->lli_flags & LLIF_DONE_WRITING) && | |
89 | list_empty(&club->cob_pending_list)) { | |
90 | struct ll_close_queue *lcq = ll_i2sbi(inode)->ll_lcq; | |
91 | ||
92 | if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) | |
93 | CWARN("ino %lu/%u(flags %u) som valid it just after " | |
94 | "recovery\n", | |
95 | inode->i_ino, inode->i_generation, | |
96 | lli->lli_flags); | |
97 | /* DONE_WRITING is allowed and inode has no dirty page. */ | |
98 | spin_lock(&lcq->lcq_lock); | |
99 | ||
100 | LASSERT(list_empty(&lli->lli_close_list)); | |
101 | CDEBUG(D_INODE, "adding inode %lu/%u to close list\n", | |
102 | inode->i_ino, inode->i_generation); | |
103 | list_add_tail(&lli->lli_close_list, &lcq->lcq_head); | |
104 | ||
105 | /* Avoid a concurrent insertion into the close thread queue: | |
106 | * an inode is already in the close thread, open(), write(), | |
107 | * close() happen, epoch is closed as the inode is marked as | |
108 | * LLIF_EPOCH_PENDING. When pages are written inode should not | |
109 | * be inserted into the queue again, clear this flag to avoid | |
110 | * it. */ | |
111 | lli->lli_flags &= ~LLIF_DONE_WRITING; | |
112 | ||
113 | wake_up(&lcq->lcq_waitq); | |
114 | spin_unlock(&lcq->lcq_lock); | |
115 | } | |
116 | spin_unlock(&lli->lli_lock); | |
d7e09d03 PT |
117 | } |
118 | ||
119 | /** Pack SOM attributes info @opdata for CLOSE, DONE_WRITING rpc. */ | |
120 | void ll_done_writing_attr(struct inode *inode, struct md_op_data *op_data) | |
121 | { | |
122 | struct ll_inode_info *lli = ll_i2info(inode); | |
d7e09d03 PT |
123 | |
124 | op_data->op_flags |= MF_SOM_CHANGE; | |
125 | /* Check if Size-on-MDS attributes are valid. */ | |
126 | if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) | |
127 | CERROR("ino %lu/%u(flags %u) som valid it just after " | |
128 | "recovery\n", inode->i_ino, inode->i_generation, | |
129 | lli->lli_flags); | |
130 | ||
131 | if (!cl_local_size(inode)) { | |
132 | /* Send Size-on-MDS Attributes if valid. */ | |
133 | op_data->op_attr.ia_valid |= ATTR_MTIME_SET | ATTR_CTIME_SET | | |
134 | ATTR_ATIME_SET | ATTR_SIZE | ATTR_BLOCKS; | |
135 | } | |
d7e09d03 PT |
136 | } |
137 | ||
138 | /** Closes ioepoch and packs Size-on-MDS attribute if needed into @op_data. */ | |
139 | void ll_ioepoch_close(struct inode *inode, struct md_op_data *op_data, | |
140 | struct obd_client_handle **och, unsigned long flags) | |
141 | { | |
142 | struct ll_inode_info *lli = ll_i2info(inode); | |
143 | struct ccc_object *club = cl2ccc(ll_i2info(inode)->lli_clob); | |
d7e09d03 PT |
144 | |
145 | spin_lock(&lli->lli_lock); | |
146 | if (!(list_empty(&club->cob_pending_list))) { | |
147 | if (!(lli->lli_flags & LLIF_EPOCH_PENDING)) { | |
148 | LASSERT(*och != NULL); | |
149 | LASSERT(lli->lli_pending_och == NULL); | |
150 | /* Inode is dirty and there is no pending write done | |
151 | * request yet, DONE_WRITE is to be sent later. */ | |
152 | lli->lli_flags |= LLIF_EPOCH_PENDING; | |
153 | lli->lli_pending_och = *och; | |
154 | spin_unlock(&lli->lli_lock); | |
155 | ||
156 | inode = igrab(inode); | |
157 | LASSERT(inode); | |
158 | GOTO(out, 0); | |
159 | } | |
160 | if (flags & LLIF_DONE_WRITING) { | |
161 | /* Some pages are still dirty, it is early to send | |
162 | * DONE_WRITE. Wait untill all pages will be flushed | |
163 | * and try DONE_WRITE again later. */ | |
164 | LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING)); | |
165 | lli->lli_flags |= LLIF_DONE_WRITING; | |
166 | spin_unlock(&lli->lli_lock); | |
167 | ||
168 | inode = igrab(inode); | |
169 | LASSERT(inode); | |
170 | GOTO(out, 0); | |
171 | } | |
172 | } | |
173 | CDEBUG(D_INODE, "Epoch "LPU64" closed on "DFID"\n", | |
174 | ll_i2info(inode)->lli_ioepoch, PFID(&lli->lli_fid)); | |
175 | op_data->op_flags |= MF_EPOCH_CLOSE; | |
176 | ||
177 | if (flags & LLIF_DONE_WRITING) { | |
178 | LASSERT(lli->lli_flags & LLIF_SOM_DIRTY); | |
179 | LASSERT(!(lli->lli_flags & LLIF_DONE_WRITING)); | |
180 | *och = lli->lli_pending_och; | |
181 | lli->lli_pending_och = NULL; | |
182 | lli->lli_flags &= ~LLIF_EPOCH_PENDING; | |
183 | } else { | |
184 | /* Pack Size-on-MDS inode attributes only if they has changed */ | |
185 | if (!(lli->lli_flags & LLIF_SOM_DIRTY)) { | |
186 | spin_unlock(&lli->lli_lock); | |
187 | GOTO(out, 0); | |
188 | } | |
189 | ||
190 | /* There is a pending DONE_WRITE -- close epoch with no | |
191 | * attribute change. */ | |
192 | if (lli->lli_flags & LLIF_EPOCH_PENDING) { | |
193 | spin_unlock(&lli->lli_lock); | |
194 | GOTO(out, 0); | |
195 | } | |
196 | } | |
197 | ||
198 | LASSERT(list_empty(&club->cob_pending_list)); | |
199 | lli->lli_flags &= ~LLIF_SOM_DIRTY; | |
200 | spin_unlock(&lli->lli_lock); | |
201 | ll_done_writing_attr(inode, op_data); | |
202 | ||
d7e09d03 PT |
203 | out: |
204 | return; | |
205 | } | |
206 | ||
207 | /** | |
208 | * Cliens updates SOM attributes on MDS (including llog cookies): | |
209 | * obd_getattr with no lock and md_setattr. | |
210 | */ | |
211 | int ll_som_update(struct inode *inode, struct md_op_data *op_data) | |
212 | { | |
213 | struct ll_inode_info *lli = ll_i2info(inode); | |
214 | struct ptlrpc_request *request = NULL; | |
215 | __u32 old_flags; | |
216 | struct obdo *oa; | |
217 | int rc; | |
d7e09d03 PT |
218 | |
219 | LASSERT(op_data != NULL); | |
220 | if (lli->lli_flags & LLIF_MDS_SIZE_LOCK) | |
221 | CERROR("ino %lu/%u(flags %u) som valid it just after " | |
222 | "recovery\n", inode->i_ino, inode->i_generation, | |
223 | lli->lli_flags); | |
224 | ||
225 | OBDO_ALLOC(oa); | |
226 | if (!oa) { | |
227 | CERROR("can't allocate memory for Size-on-MDS update.\n"); | |
0a3bdb00 | 228 | return -ENOMEM; |
d7e09d03 PT |
229 | } |
230 | ||
231 | old_flags = op_data->op_flags; | |
232 | op_data->op_flags = MF_SOM_CHANGE; | |
233 | ||
234 | /* If inode is already in another epoch, skip getattr from OSTs. */ | |
235 | if (lli->lli_ioepoch == op_data->op_ioepoch) { | |
236 | rc = ll_inode_getattr(inode, oa, op_data->op_ioepoch, | |
237 | old_flags & MF_GETATTR_LOCK); | |
238 | if (rc) { | |
239 | oa->o_valid = 0; | |
240 | if (rc != -ENOENT) | |
241 | CERROR("inode_getattr failed (%d): unable to " | |
242 | "send a Size-on-MDS attribute update " | |
243 | "for inode %lu/%u\n", rc, inode->i_ino, | |
244 | inode->i_generation); | |
245 | } else { | |
246 | CDEBUG(D_INODE, "Size-on-MDS update on "DFID"\n", | |
247 | PFID(&lli->lli_fid)); | |
248 | } | |
249 | /* Install attributes into op_data. */ | |
250 | md_from_obdo(op_data, oa, oa->o_valid); | |
251 | } | |
252 | ||
253 | rc = md_setattr(ll_i2sbi(inode)->ll_md_exp, op_data, | |
254 | NULL, 0, NULL, 0, &request, NULL); | |
255 | ptlrpc_req_finished(request); | |
256 | ||
257 | OBDO_FREE(oa); | |
0a3bdb00 | 258 | return rc; |
d7e09d03 PT |
259 | } |
260 | ||
261 | /** | |
262 | * Closes the ioepoch and packs all the attributes into @op_data for | |
263 | * DONE_WRITING rpc. | |
264 | */ | |
265 | static void ll_prepare_done_writing(struct inode *inode, | |
266 | struct md_op_data *op_data, | |
267 | struct obd_client_handle **och) | |
268 | { | |
269 | ll_ioepoch_close(inode, op_data, och, LLIF_DONE_WRITING); | |
270 | /* If there is no @och, we do not do D_W yet. */ | |
271 | if (*och == NULL) | |
272 | return; | |
273 | ||
274 | ll_pack_inode2opdata(inode, op_data, &(*och)->och_fh); | |
275 | ll_prep_md_op_data(op_data, inode, NULL, NULL, | |
276 | 0, 0, LUSTRE_OPC_ANY, NULL); | |
277 | } | |
278 | ||
279 | /** Send a DONE_WRITING rpc. */ | |
280 | static void ll_done_writing(struct inode *inode) | |
281 | { | |
282 | struct obd_client_handle *och = NULL; | |
283 | struct md_op_data *op_data; | |
284 | int rc; | |
d7e09d03 PT |
285 | |
286 | LASSERT(exp_connect_som(ll_i2mdexp(inode))); | |
287 | ||
288 | OBD_ALLOC_PTR(op_data); | |
289 | if (op_data == NULL) { | |
290 | CERROR("can't allocate op_data\n"); | |
d7e09d03 PT |
291 | return; |
292 | } | |
293 | ||
294 | ll_prepare_done_writing(inode, op_data, &och); | |
295 | /* If there is no @och, we do not do D_W yet. */ | |
296 | if (och == NULL) | |
297 | GOTO(out, 0); | |
298 | ||
299 | rc = md_done_writing(ll_i2sbi(inode)->ll_md_exp, op_data, NULL); | |
300 | if (rc == -EAGAIN) { | |
301 | /* MDS has instructed us to obtain Size-on-MDS attribute from | |
302 | * OSTs and send setattr to back to MDS. */ | |
303 | rc = ll_som_update(inode, op_data); | |
304 | } else if (rc) { | |
305 | CERROR("inode %lu mdc done_writing failed: rc = %d\n", | |
306 | inode->i_ino, rc); | |
307 | } | |
308 | out: | |
309 | ll_finish_md_op_data(op_data); | |
310 | if (och) { | |
311 | md_clear_open_replay_data(ll_i2sbi(inode)->ll_md_exp, och); | |
312 | OBD_FREE_PTR(och); | |
313 | } | |
d7e09d03 PT |
314 | } |
315 | ||
316 | static struct ll_inode_info *ll_close_next_lli(struct ll_close_queue *lcq) | |
317 | { | |
318 | struct ll_inode_info *lli = NULL; | |
319 | ||
320 | spin_lock(&lcq->lcq_lock); | |
321 | ||
322 | if (!list_empty(&lcq->lcq_head)) { | |
323 | lli = list_entry(lcq->lcq_head.next, struct ll_inode_info, | |
324 | lli_close_list); | |
325 | list_del_init(&lli->lli_close_list); | |
326 | } else if (atomic_read(&lcq->lcq_stop)) | |
327 | lli = ERR_PTR(-EALREADY); | |
328 | ||
329 | spin_unlock(&lcq->lcq_lock); | |
330 | return lli; | |
331 | } | |
332 | ||
333 | static int ll_close_thread(void *arg) | |
334 | { | |
335 | struct ll_close_queue *lcq = arg; | |
d7e09d03 PT |
336 | |
337 | complete(&lcq->lcq_comp); | |
338 | ||
339 | while (1) { | |
340 | struct l_wait_info lwi = { 0 }; | |
341 | struct ll_inode_info *lli; | |
342 | struct inode *inode; | |
343 | ||
344 | l_wait_event_exclusive(lcq->lcq_waitq, | |
345 | (lli = ll_close_next_lli(lcq)) != NULL, | |
346 | &lwi); | |
347 | if (IS_ERR(lli)) | |
348 | break; | |
349 | ||
350 | inode = ll_info2i(lli); | |
351 | CDEBUG(D_INFO, "done_writting for inode %lu/%u\n", | |
352 | inode->i_ino, inode->i_generation); | |
353 | ll_done_writing(inode); | |
354 | iput(inode); | |
355 | } | |
356 | ||
357 | CDEBUG(D_INFO, "ll_close exiting\n"); | |
358 | complete(&lcq->lcq_comp); | |
0a3bdb00 | 359 | return 0; |
d7e09d03 PT |
360 | } |
361 | ||
362 | int ll_close_thread_start(struct ll_close_queue **lcq_ret) | |
363 | { | |
364 | struct ll_close_queue *lcq; | |
365 | task_t *task; | |
366 | ||
367 | if (OBD_FAIL_CHECK(OBD_FAIL_LDLM_CLOSE_THREAD)) | |
368 | return -EINTR; | |
369 | ||
370 | OBD_ALLOC(lcq, sizeof(*lcq)); | |
371 | if (lcq == NULL) | |
372 | return -ENOMEM; | |
373 | ||
374 | spin_lock_init(&lcq->lcq_lock); | |
375 | INIT_LIST_HEAD(&lcq->lcq_head); | |
376 | init_waitqueue_head(&lcq->lcq_waitq); | |
377 | init_completion(&lcq->lcq_comp); | |
378 | ||
379 | task = kthread_run(ll_close_thread, lcq, "ll_close"); | |
380 | if (IS_ERR(task)) { | |
381 | OBD_FREE(lcq, sizeof(*lcq)); | |
382 | return PTR_ERR(task); | |
383 | } | |
384 | ||
385 | wait_for_completion(&lcq->lcq_comp); | |
386 | *lcq_ret = lcq; | |
387 | return 0; | |
388 | } | |
389 | ||
390 | void ll_close_thread_shutdown(struct ll_close_queue *lcq) | |
391 | { | |
392 | init_completion(&lcq->lcq_comp); | |
393 | atomic_inc(&lcq->lcq_stop); | |
394 | wake_up(&lcq->lcq_waitq); | |
395 | wait_for_completion(&lcq->lcq_comp); | |
396 | OBD_FREE(lcq, sizeof(*lcq)); | |
397 | } |