Commit | Line | Data |
---|---|---|
1da177e4 | 1 | /* |
7b718769 NS |
2 | * Copyright (c) 2000-2005 Silicon Graphics, Inc. |
3 | * All Rights Reserved. | |
1da177e4 | 4 | * |
7b718769 NS |
5 | * This program is free software; you can redistribute it and/or |
6 | * modify it under the terms of the GNU General Public License as | |
1da177e4 LT |
7 | * published by the Free Software Foundation. |
8 | * | |
7b718769 NS |
9 | * This program is distributed in the hope that it would be useful, |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * GNU General Public License for more details. | |
1da177e4 | 13 | * |
7b718769 NS |
14 | * You should have received a copy of the GNU General Public License |
15 | * along with this program; if not, write the Free Software Foundation, | |
16 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
1da177e4 | 17 | */ |
1da177e4 | 18 | #include "xfs.h" |
a844f451 | 19 | #include "xfs_fs.h" |
1da177e4 | 20 | #include "xfs_types.h" |
a844f451 | 21 | #include "xfs_bit.h" |
1da177e4 | 22 | #include "xfs_log.h" |
a844f451 | 23 | #include "xfs_inum.h" |
1da177e4 LT |
24 | #include "xfs_trans.h" |
25 | #include "xfs_sb.h" | |
a844f451 | 26 | #include "xfs_ag.h" |
1da177e4 LT |
27 | #include "xfs_dir2.h" |
28 | #include "xfs_dmapi.h" | |
29 | #include "xfs_mount.h" | |
a844f451 | 30 | #include "xfs_da_btree.h" |
1da177e4 LT |
31 | #include "xfs_bmap_btree.h" |
32 | #include "xfs_ialloc_btree.h" | |
33 | #include "xfs_alloc_btree.h" | |
1da177e4 | 34 | #include "xfs_dir2_sf.h" |
a844f451 | 35 | #include "xfs_attr_sf.h" |
1da177e4 | 36 | #include "xfs_dinode.h" |
1da177e4 | 37 | #include "xfs_inode.h" |
a844f451 NS |
38 | #include "xfs_inode_item.h" |
39 | #include "xfs_btree.h" | |
40 | #include "xfs_alloc.h" | |
41 | #include "xfs_ialloc.h" | |
42 | #include "xfs_quota.h" | |
1da177e4 LT |
43 | #include "xfs_error.h" |
44 | #include "xfs_bmap.h" | |
1da177e4 | 45 | #include "xfs_rw.h" |
1da177e4 | 46 | #include "xfs_buf_item.h" |
a844f451 | 47 | #include "xfs_log_priv.h" |
1da177e4 | 48 | #include "xfs_dir2_trace.h" |
a844f451 | 49 | #include "xfs_extfree_item.h" |
1da177e4 LT |
50 | #include "xfs_acl.h" |
51 | #include "xfs_attr.h" | |
52 | #include "xfs_clnt.h" | |
2a82b8be DC |
53 | #include "xfs_mru_cache.h" |
54 | #include "xfs_filestream.h" | |
e13a73f0 | 55 | #include "xfs_fsops.h" |
739bfb2a | 56 | #include "xfs_vnodeops.h" |
48c872a9 | 57 | #include "xfs_vfsops.h" |
43355099 | 58 | #include "xfs_utils.h" |
739bfb2a | 59 | |
1da177e4 | 60 | |
3c85c36c | 61 | STATIC void |
f898d6c0 CH |
62 | xfs_quiesce_fs( |
63 | xfs_mount_t *mp) | |
64 | { | |
65 | int count = 0, pincount; | |
3758dee9 | 66 | |
f898d6c0 CH |
67 | xfs_flush_buftarg(mp->m_ddev_targp, 0); |
68 | xfs_finish_reclaim_all(mp, 0); | |
69 | ||
70 | /* This loop must run at least twice. | |
71 | * The first instance of the loop will flush | |
72 | * most meta data but that will generate more | |
73 | * meta data (typically directory updates). | |
74 | * Which then must be flushed and logged before | |
75 | * we can write the unmount record. | |
3758dee9 | 76 | */ |
f898d6c0 | 77 | do { |
516b2e7c | 78 | xfs_syncsub(mp, SYNC_INODE_QUIESCE, NULL); |
f898d6c0 CH |
79 | pincount = xfs_flush_buftarg(mp->m_ddev_targp, 1); |
80 | if (!pincount) { | |
81 | delay(50); | |
82 | count++; | |
83 | } | |
84 | } while (count < 2); | |
f898d6c0 | 85 | } |
1da177e4 | 86 | |
516b2e7c DC |
87 | /* |
88 | * Second stage of a quiesce. The data is already synced, now we have to take | |
89 | * care of the metadata. New transactions are already blocked, so we need to | |
90 | * wait for any remaining transactions to drain out before proceding. | |
91 | */ | |
9909c4aa | 92 | void |
516b2e7c DC |
93 | xfs_attr_quiesce( |
94 | xfs_mount_t *mp) | |
95 | { | |
e5720eec DC |
96 | int error = 0; |
97 | ||
516b2e7c DC |
98 | /* wait for all modifications to complete */ |
99 | while (atomic_read(&mp->m_active_trans) > 0) | |
100 | delay(100); | |
101 | ||
102 | /* flush inodes and push all remaining buffers out to disk */ | |
103 | xfs_quiesce_fs(mp); | |
104 | ||
105 | ASSERT_ALWAYS(atomic_read(&mp->m_active_trans) == 0); | |
106 | ||
107 | /* Push the superblock and write an unmount record */ | |
e5720eec DC |
108 | error = xfs_log_sbcount(mp, 1); |
109 | if (error) | |
110 | xfs_fs_cmn_err(CE_WARN, mp, | |
111 | "xfs_attr_quiesce: failed to log sb changes. " | |
112 | "Frozen image may not be consistent."); | |
516b2e7c DC |
113 | xfs_log_unmount_write(mp); |
114 | xfs_unmountfs_writesb(mp); | |
115 | } | |
116 | ||
1da177e4 LT |
117 | /* |
118 | * xfs_unmount_flush implements a set of flush operation on special | |
119 | * inodes, which are needed as a separate set of operations so that | |
120 | * they can be called as part of relocation process. | |
121 | */ | |
122 | int | |
123 | xfs_unmount_flush( | |
124 | xfs_mount_t *mp, /* Mount structure we are getting | |
125 | rid of. */ | |
126 | int relocation) /* Called from vfs relocation. */ | |
127 | { | |
128 | xfs_inode_t *rip = mp->m_rootip; | |
129 | xfs_inode_t *rbmip; | |
130 | xfs_inode_t *rsumip = NULL; | |
1da177e4 LT |
131 | int error; |
132 | ||
f7c66ce3 | 133 | xfs_ilock(rip, XFS_ILOCK_EXCL | XFS_ILOCK_PARENT); |
1da177e4 LT |
134 | xfs_iflock(rip); |
135 | ||
136 | /* | |
137 | * Flush out the real time inodes. | |
138 | */ | |
139 | if ((rbmip = mp->m_rbmip) != NULL) { | |
140 | xfs_ilock(rbmip, XFS_ILOCK_EXCL); | |
141 | xfs_iflock(rbmip); | |
142 | error = xfs_iflush(rbmip, XFS_IFLUSH_SYNC); | |
143 | xfs_iunlock(rbmip, XFS_ILOCK_EXCL); | |
144 | ||
145 | if (error == EFSCORRUPTED) | |
146 | goto fscorrupt_out; | |
147 | ||
e4f75291 | 148 | ASSERT(vn_count(VFS_I(rbmip)) == 1); |
1da177e4 LT |
149 | |
150 | rsumip = mp->m_rsumip; | |
151 | xfs_ilock(rsumip, XFS_ILOCK_EXCL); | |
152 | xfs_iflock(rsumip); | |
153 | error = xfs_iflush(rsumip, XFS_IFLUSH_SYNC); | |
154 | xfs_iunlock(rsumip, XFS_ILOCK_EXCL); | |
155 | ||
156 | if (error == EFSCORRUPTED) | |
157 | goto fscorrupt_out; | |
158 | ||
e4f75291 | 159 | ASSERT(vn_count(VFS_I(rsumip)) == 1); |
1da177e4 LT |
160 | } |
161 | ||
162 | /* | |
163 | * Synchronously flush root inode to disk | |
164 | */ | |
165 | error = xfs_iflush(rip, XFS_IFLUSH_SYNC); | |
166 | if (error == EFSCORRUPTED) | |
167 | goto fscorrupt_out2; | |
168 | ||
df80c933 | 169 | if (vn_count(VFS_I(rip)) != 1 && !relocation) { |
1da177e4 LT |
170 | xfs_iunlock(rip, XFS_ILOCK_EXCL); |
171 | return XFS_ERROR(EBUSY); | |
172 | } | |
173 | ||
174 | /* | |
175 | * Release dquot that rootinode, rbmino and rsumino might be holding, | |
176 | * flush and purge the quota inodes. | |
177 | */ | |
178 | error = XFS_QM_UNMOUNT(mp); | |
179 | if (error == EFSCORRUPTED) | |
180 | goto fscorrupt_out2; | |
181 | ||
182 | if (rbmip) { | |
43355099 CH |
183 | IRELE(rbmip); |
184 | IRELE(rsumip); | |
1da177e4 LT |
185 | } |
186 | ||
187 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | |
188 | return 0; | |
189 | ||
190 | fscorrupt_out: | |
191 | xfs_ifunlock(rip); | |
192 | ||
193 | fscorrupt_out2: | |
194 | xfs_iunlock(rip, XFS_ILOCK_EXCL); | |
195 | ||
196 | return XFS_ERROR(EFSCORRUPTED); | |
197 | } | |
198 | ||
1da177e4 LT |
199 | /* |
200 | * xfs_sync flushes any pending I/O to file system vfsp. | |
201 | * | |
202 | * This routine is called by vfs_sync() to make sure that things make it | |
203 | * out to disk eventually, on sync() system calls to flush out everything, | |
204 | * and when the file system is unmounted. For the vfs_sync() case, all | |
205 | * we really need to do is sync out the log to make all of our meta-data | |
206 | * updates permanent (except for timestamps). For calls from pflushd(), | |
207 | * dirty pages are kept moving by calling pdflush() on the inodes | |
208 | * containing them. We also flush the inodes that we can lock without | |
209 | * sleeping and the superblock if we can lock it without sleeping from | |
210 | * vfs_sync() so that items at the tail of the log are always moving out. | |
211 | * | |
212 | * Flags: | |
213 | * SYNC_BDFLUSH - We're being called from vfs_sync() so we don't want | |
214 | * to sleep if we can help it. All we really need | |
215 | * to do is ensure that the log is synced at least | |
216 | * periodically. We also push the inodes and | |
217 | * superblock if we can lock them without sleeping | |
218 | * and they are not pinned. | |
219 | * SYNC_ATTR - We need to flush the inodes. If SYNC_BDFLUSH is not | |
220 | * set, then we really want to lock each inode and flush | |
221 | * it. | |
222 | * SYNC_WAIT - All the flushes that take place in this call should | |
223 | * be synchronous. | |
224 | * SYNC_DELWRI - This tells us to push dirty pages associated with | |
225 | * inodes. SYNC_WAIT and SYNC_BDFLUSH are used to | |
226 | * determine if they should be flushed sync, async, or | |
227 | * delwri. | |
228 | * SYNC_CLOSE - This flag is passed when the system is being | |
c41564b5 | 229 | * unmounted. We should sync and invalidate everything. |
1da177e4 LT |
230 | * SYNC_FSDATA - This indicates that the caller would like to make |
231 | * sure the superblock is safe on disk. We can ensure | |
c41564b5 | 232 | * this by simply making sure the log gets flushed |
1da177e4 LT |
233 | * if SYNC_BDFLUSH is set, and by actually writing it |
234 | * out otherwise. | |
3c0dc77b DC |
235 | * SYNC_IOWAIT - The caller wants us to wait for all data I/O to complete |
236 | * before we return (including direct I/O). Forms the drain | |
237 | * side of the write barrier needed to safely quiesce the | |
238 | * filesystem. | |
1da177e4 LT |
239 | * |
240 | */ | |
48c872a9 | 241 | int |
1da177e4 | 242 | xfs_sync( |
48c872a9 CH |
243 | xfs_mount_t *mp, |
244 | int flags) | |
1da177e4 | 245 | { |
b09cc771 CH |
246 | int error; |
247 | ||
248 | /* | |
249 | * Get the Quota Manager to flush the dquots. | |
250 | * | |
251 | * If XFS quota support is not enabled or this filesystem | |
252 | * instance does not use quotas XFS_QM_DQSYNC will always | |
253 | * return zero. | |
254 | */ | |
255 | error = XFS_QM_DQSYNC(mp, flags); | |
256 | if (error) { | |
257 | /* | |
258 | * If we got an IO error, we will be shutting down. | |
259 | * So, there's nothing more for us to do here. | |
260 | */ | |
261 | ASSERT(error != EIO || XFS_FORCED_SHUTDOWN(mp)); | |
262 | if (XFS_FORCED_SHUTDOWN(mp)) | |
263 | return XFS_ERROR(error); | |
264 | } | |
1da177e4 | 265 | |
2a82b8be DC |
266 | if (flags & SYNC_IOWAIT) |
267 | xfs_filestream_flush(mp); | |
268 | ||
2823945f | 269 | return xfs_syncsub(mp, flags, NULL); |
1da177e4 LT |
270 | } |
271 | ||
272 | /* | |
273 | * xfs sync routine for internal use | |
274 | * | |
b83bd138 | 275 | * This routine supports all of the flags defined for the generic vfs_sync |
1f9b3b64 | 276 | * interface as explained above under xfs_sync. |
1da177e4 LT |
277 | * |
278 | */ | |
ee34807a | 279 | int |
1da177e4 LT |
280 | xfs_sync_inodes( |
281 | xfs_mount_t *mp, | |
282 | int flags, | |
1da177e4 LT |
283 | int *bypassed) |
284 | { | |
285 | xfs_inode_t *ip = NULL; | |
5ec7f8c7 | 286 | struct inode *vp = NULL; |
1da177e4 LT |
287 | int error; |
288 | int last_error; | |
289 | uint64_t fflag; | |
290 | uint lock_flags; | |
291 | uint base_lock_flags; | |
292 | boolean_t mount_locked; | |
293 | boolean_t vnode_refed; | |
294 | int preempt; | |
1da177e4 LT |
295 | xfs_iptr_t *ipointer; |
296 | #ifdef DEBUG | |
297 | boolean_t ipointer_in = B_FALSE; | |
298 | ||
299 | #define IPOINTER_SET ipointer_in = B_TRUE | |
300 | #define IPOINTER_CLR ipointer_in = B_FALSE | |
301 | #else | |
302 | #define IPOINTER_SET | |
303 | #define IPOINTER_CLR | |
304 | #endif | |
305 | ||
306 | ||
307 | /* Insert a marker record into the inode list after inode ip. The list | |
308 | * must be locked when this is called. After the call the list will no | |
309 | * longer be locked. | |
310 | */ | |
311 | #define IPOINTER_INSERT(ip, mp) { \ | |
312 | ASSERT(ipointer_in == B_FALSE); \ | |
313 | ipointer->ip_mnext = ip->i_mnext; \ | |
314 | ipointer->ip_mprev = ip; \ | |
315 | ip->i_mnext = (xfs_inode_t *)ipointer; \ | |
316 | ipointer->ip_mnext->i_mprev = (xfs_inode_t *)ipointer; \ | |
317 | preempt = 0; \ | |
318 | XFS_MOUNT_IUNLOCK(mp); \ | |
319 | mount_locked = B_FALSE; \ | |
320 | IPOINTER_SET; \ | |
321 | } | |
322 | ||
323 | /* Remove the marker from the inode list. If the marker was the only item | |
324 | * in the list then there are no remaining inodes and we should zero out | |
325 | * the whole list. If we are the current head of the list then move the head | |
326 | * past us. | |
327 | */ | |
328 | #define IPOINTER_REMOVE(ip, mp) { \ | |
329 | ASSERT(ipointer_in == B_TRUE); \ | |
330 | if (ipointer->ip_mnext != (xfs_inode_t *)ipointer) { \ | |
331 | ip = ipointer->ip_mnext; \ | |
332 | ip->i_mprev = ipointer->ip_mprev; \ | |
333 | ipointer->ip_mprev->i_mnext = ip; \ | |
334 | if (mp->m_inodes == (xfs_inode_t *)ipointer) { \ | |
335 | mp->m_inodes = ip; \ | |
336 | } \ | |
337 | } else { \ | |
338 | ASSERT(mp->m_inodes == (xfs_inode_t *)ipointer); \ | |
339 | mp->m_inodes = NULL; \ | |
340 | ip = NULL; \ | |
341 | } \ | |
342 | IPOINTER_CLR; \ | |
343 | } | |
344 | ||
345 | #define XFS_PREEMPT_MASK 0x7f | |
346 | ||
44866d39 LM |
347 | ASSERT(!(flags & SYNC_BDFLUSH)); |
348 | ||
1da177e4 LT |
349 | if (bypassed) |
350 | *bypassed = 0; | |
bd186aa9 | 351 | if (mp->m_flags & XFS_MOUNT_RDONLY) |
1da177e4 LT |
352 | return 0; |
353 | error = 0; | |
354 | last_error = 0; | |
355 | preempt = 0; | |
356 | ||
357 | /* Allocate a reference marker */ | |
358 | ipointer = (xfs_iptr_t *)kmem_zalloc(sizeof(xfs_iptr_t), KM_SLEEP); | |
359 | ||
360 | fflag = XFS_B_ASYNC; /* default is don't wait */ | |
44866d39 | 361 | if (flags & SYNC_DELWRI) |
1da177e4 LT |
362 | fflag = XFS_B_DELWRI; |
363 | if (flags & SYNC_WAIT) | |
364 | fflag = 0; /* synchronous overrides all */ | |
365 | ||
366 | base_lock_flags = XFS_ILOCK_SHARED; | |
367 | if (flags & (SYNC_DELWRI | SYNC_CLOSE)) { | |
368 | /* | |
369 | * We need the I/O lock if we're going to call any of | |
370 | * the flush/inval routines. | |
371 | */ | |
372 | base_lock_flags |= XFS_IOLOCK_SHARED; | |
373 | } | |
374 | ||
375 | XFS_MOUNT_ILOCK(mp); | |
376 | ||
377 | ip = mp->m_inodes; | |
378 | ||
379 | mount_locked = B_TRUE; | |
380 | vnode_refed = B_FALSE; | |
381 | ||
382 | IPOINTER_CLR; | |
383 | ||
384 | do { | |
385 | ASSERT(ipointer_in == B_FALSE); | |
386 | ASSERT(vnode_refed == B_FALSE); | |
387 | ||
388 | lock_flags = base_lock_flags; | |
389 | ||
390 | /* | |
391 | * There were no inodes in the list, just break out | |
392 | * of the loop. | |
393 | */ | |
394 | if (ip == NULL) { | |
395 | break; | |
396 | } | |
397 | ||
398 | /* | |
399 | * We found another sync thread marker - skip it | |
400 | */ | |
401 | if (ip->i_mount == NULL) { | |
402 | ip = ip->i_mnext; | |
403 | continue; | |
404 | } | |
405 | ||
705db4a2 | 406 | vp = VFS_I(ip); |
1da177e4 LT |
407 | |
408 | /* | |
409 | * If the vnode is gone then this is being torn down, | |
410 | * call reclaim if it is flushed, else let regular flush | |
411 | * code deal with it later in the loop. | |
412 | */ | |
413 | ||
414 | if (vp == NULL) { | |
415 | /* Skip ones already in reclaim */ | |
416 | if (ip->i_flags & XFS_IRECLAIM) { | |
417 | ip = ip->i_mnext; | |
418 | continue; | |
419 | } | |
420 | if (xfs_ilock_nowait(ip, XFS_ILOCK_EXCL) == 0) { | |
421 | ip = ip->i_mnext; | |
422 | } else if ((xfs_ipincount(ip) == 0) && | |
423 | xfs_iflock_nowait(ip)) { | |
424 | IPOINTER_INSERT(ip, mp); | |
425 | ||
426 | xfs_finish_reclaim(ip, 1, | |
427 | XFS_IFLUSH_DELWRI_ELSE_ASYNC); | |
428 | ||
429 | XFS_MOUNT_ILOCK(mp); | |
430 | mount_locked = B_TRUE; | |
431 | IPOINTER_REMOVE(ip, mp); | |
432 | } else { | |
433 | xfs_iunlock(ip, XFS_ILOCK_EXCL); | |
434 | ip = ip->i_mnext; | |
435 | } | |
436 | continue; | |
437 | } | |
438 | ||
439 | if (VN_BAD(vp)) { | |
440 | ip = ip->i_mnext; | |
441 | continue; | |
442 | } | |
443 | ||
444 | if (XFS_FORCED_SHUTDOWN(mp) && !(flags & SYNC_CLOSE)) { | |
445 | XFS_MOUNT_IUNLOCK(mp); | |
f0e2d93c | 446 | kmem_free(ipointer); |
1da177e4 LT |
447 | return 0; |
448 | } | |
449 | ||
1da177e4 LT |
450 | /* |
451 | * Try to lock without sleeping. We're out of order with | |
452 | * the inode list lock here, so if we fail we need to drop | |
453 | * the mount lock and try again. If we're called from | |
454 | * bdflush() here, then don't bother. | |
455 | * | |
456 | * The inode lock here actually coordinates with the | |
457 | * almost spurious inode lock in xfs_ireclaim() to prevent | |
458 | * the vnode we handle here without a reference from | |
459 | * being freed while we reference it. If we lock the inode | |
460 | * while it's on the mount list here, then the spurious inode | |
461 | * lock in xfs_ireclaim() after the inode is pulled from | |
462 | * the mount list will sleep until we release it here. | |
463 | * This keeps the vnode from being freed while we reference | |
cdb62687 | 464 | * it. |
1da177e4 LT |
465 | */ |
466 | if (xfs_ilock_nowait(ip, lock_flags) == 0) { | |
44866d39 | 467 | if (vp == NULL) { |
1da177e4 LT |
468 | ip = ip->i_mnext; |
469 | continue; | |
470 | } | |
471 | ||
cdb62687 | 472 | vp = vn_grab(vp); |
1da177e4 | 473 | if (vp == NULL) { |
cdb62687 | 474 | ip = ip->i_mnext; |
1da177e4 LT |
475 | continue; |
476 | } | |
477 | ||
cdb62687 | 478 | IPOINTER_INSERT(ip, mp); |
1da177e4 LT |
479 | xfs_ilock(ip, lock_flags); |
480 | ||
e4f75291 | 481 | ASSERT(vp == VFS_I(ip)); |
1da177e4 LT |
482 | ASSERT(ip->i_mount == mp); |
483 | ||
484 | vnode_refed = B_TRUE; | |
485 | } | |
486 | ||
487 | /* From here on in the loop we may have a marker record | |
488 | * in the inode list. | |
489 | */ | |
490 | ||
40095b64 DC |
491 | /* |
492 | * If we have to flush data or wait for I/O completion | |
493 | * we need to drop the ilock that we currently hold. | |
494 | * If we need to drop the lock, insert a marker if we | |
495 | * have not already done so. | |
496 | */ | |
497 | if ((flags & (SYNC_CLOSE|SYNC_IOWAIT)) || | |
498 | ((flags & SYNC_DELWRI) && VN_DIRTY(vp))) { | |
499 | if (mount_locked) { | |
500 | IPOINTER_INSERT(ip, mp); | |
1da177e4 | 501 | } |
40095b64 | 502 | xfs_iunlock(ip, XFS_ILOCK_SHARED); |
1da177e4 | 503 | |
40095b64 DC |
504 | if (flags & SYNC_CLOSE) { |
505 | /* Shutdown case. Flush and invalidate. */ | |
506 | if (XFS_FORCED_SHUTDOWN(mp)) | |
739bfb2a CH |
507 | xfs_tosspages(ip, 0, -1, |
508 | FI_REMAPF); | |
40095b64 | 509 | else |
739bfb2a CH |
510 | error = xfs_flushinval_pages(ip, |
511 | 0, -1, FI_REMAPF); | |
40095b64 | 512 | } else if ((flags & SYNC_DELWRI) && VN_DIRTY(vp)) { |
739bfb2a | 513 | error = xfs_flush_pages(ip, 0, |
67fcaa73 | 514 | -1, fflag, FI_NONE); |
1da177e4 LT |
515 | } |
516 | ||
40095b64 DC |
517 | /* |
518 | * When freezing, we need to wait ensure all I/O (including direct | |
519 | * I/O) is complete to ensure no further data modification can take | |
520 | * place after this point | |
521 | */ | |
522 | if (flags & SYNC_IOWAIT) | |
b677c210 | 523 | vn_iowait(ip); |
40095b64 DC |
524 | |
525 | xfs_ilock(ip, XFS_ILOCK_SHARED); | |
1da177e4 LT |
526 | } |
527 | ||
44866d39 LM |
528 | if ((flags & SYNC_ATTR) && |
529 | (ip->i_update_core || | |
530 | (ip->i_itemp && ip->i_itemp->ili_format.ilf_fields))) { | |
531 | if (mount_locked) | |
532 | IPOINTER_INSERT(ip, mp); | |
1da177e4 | 533 | |
44866d39 LM |
534 | if (flags & SYNC_WAIT) { |
535 | xfs_iflock(ip); | |
536 | error = xfs_iflush(ip, XFS_IFLUSH_SYNC); | |
1da177e4 | 537 | |
44866d39 LM |
538 | /* |
539 | * If we can't acquire the flush lock, then the inode | |
540 | * is already being flushed so don't bother waiting. | |
541 | * | |
542 | * If we can lock it then do a delwri flush so we can | |
543 | * combine multiple inode flushes in each disk write. | |
544 | */ | |
545 | } else if (xfs_iflock_nowait(ip)) { | |
546 | error = xfs_iflush(ip, XFS_IFLUSH_DELWRI); | |
547 | } else if (bypassed) { | |
548 | (*bypassed)++; | |
1da177e4 LT |
549 | } |
550 | } | |
551 | ||
552 | if (lock_flags != 0) { | |
553 | xfs_iunlock(ip, lock_flags); | |
554 | } | |
555 | ||
556 | if (vnode_refed) { | |
557 | /* | |
558 | * If we had to take a reference on the vnode | |
559 | * above, then wait until after we've unlocked | |
560 | * the inode to release the reference. This is | |
561 | * because we can be already holding the inode | |
43355099 | 562 | * lock when IRELE() calls xfs_inactive(). |
1da177e4 LT |
563 | * |
564 | * Make sure to drop the mount lock before calling | |
43355099 | 565 | * IRELE() so that we don't trip over ourselves if |
1da177e4 LT |
566 | * we have to go for the mount lock again in the |
567 | * inactive code. | |
568 | */ | |
569 | if (mount_locked) { | |
570 | IPOINTER_INSERT(ip, mp); | |
571 | } | |
572 | ||
43355099 | 573 | IRELE(ip); |
1da177e4 LT |
574 | |
575 | vnode_refed = B_FALSE; | |
576 | } | |
577 | ||
578 | if (error) { | |
579 | last_error = error; | |
580 | } | |
581 | ||
582 | /* | |
583 | * bail out if the filesystem is corrupted. | |
584 | */ | |
585 | if (error == EFSCORRUPTED) { | |
586 | if (!mount_locked) { | |
587 | XFS_MOUNT_ILOCK(mp); | |
588 | IPOINTER_REMOVE(ip, mp); | |
589 | } | |
590 | XFS_MOUNT_IUNLOCK(mp); | |
591 | ASSERT(ipointer_in == B_FALSE); | |
f0e2d93c | 592 | kmem_free(ipointer); |
1da177e4 LT |
593 | return XFS_ERROR(error); |
594 | } | |
595 | ||
596 | /* Let other threads have a chance at the mount lock | |
597 | * if we have looped many times without dropping the | |
598 | * lock. | |
599 | */ | |
600 | if ((++preempt & XFS_PREEMPT_MASK) == 0) { | |
601 | if (mount_locked) { | |
602 | IPOINTER_INSERT(ip, mp); | |
603 | } | |
604 | } | |
605 | ||
606 | if (mount_locked == B_FALSE) { | |
607 | XFS_MOUNT_ILOCK(mp); | |
608 | mount_locked = B_TRUE; | |
609 | IPOINTER_REMOVE(ip, mp); | |
610 | continue; | |
611 | } | |
612 | ||
613 | ASSERT(ipointer_in == B_FALSE); | |
614 | ip = ip->i_mnext; | |
615 | ||
616 | } while (ip != mp->m_inodes); | |
617 | ||
618 | XFS_MOUNT_IUNLOCK(mp); | |
619 | ||
620 | ASSERT(ipointer_in == B_FALSE); | |
621 | ||
f0e2d93c | 622 | kmem_free(ipointer); |
1da177e4 LT |
623 | return XFS_ERROR(last_error); |
624 | } | |
625 | ||
626 | /* | |
627 | * xfs sync routine for internal use | |
628 | * | |
b83bd138 | 629 | * This routine supports all of the flags defined for the generic vfs_sync |
1f9b3b64 | 630 | * interface as explained above under xfs_sync. |
1da177e4 LT |
631 | * |
632 | */ | |
633 | int | |
634 | xfs_syncsub( | |
635 | xfs_mount_t *mp, | |
636 | int flags, | |
1da177e4 LT |
637 | int *bypassed) |
638 | { | |
639 | int error = 0; | |
640 | int last_error = 0; | |
641 | uint log_flags = XFS_LOG_FORCE; | |
642 | xfs_buf_t *bp; | |
643 | xfs_buf_log_item_t *bip; | |
644 | ||
645 | /* | |
646 | * Sync out the log. This ensures that the log is periodically | |
647 | * flushed even if there is not enough activity to fill it up. | |
648 | */ | |
649 | if (flags & SYNC_WAIT) | |
650 | log_flags |= XFS_LOG_SYNC; | |
651 | ||
652 | xfs_log_force(mp, (xfs_lsn_t)0, log_flags); | |
653 | ||
654 | if (flags & (SYNC_ATTR|SYNC_DELWRI)) { | |
655 | if (flags & SYNC_BDFLUSH) | |
656 | xfs_finish_reclaim_all(mp, 1); | |
657 | else | |
1f9b3b64 | 658 | error = xfs_sync_inodes(mp, flags, bypassed); |
1da177e4 LT |
659 | } |
660 | ||
661 | /* | |
662 | * Flushing out dirty data above probably generated more | |
663 | * log activity, so if this isn't vfs_sync() then flush | |
664 | * the log again. | |
665 | */ | |
666 | if (flags & SYNC_DELWRI) { | |
667 | xfs_log_force(mp, (xfs_lsn_t)0, log_flags); | |
668 | } | |
669 | ||
670 | if (flags & SYNC_FSDATA) { | |
671 | /* | |
672 | * If this is vfs_sync() then only sync the superblock | |
673 | * if we can lock it without sleeping and it is not pinned. | |
674 | */ | |
675 | if (flags & SYNC_BDFLUSH) { | |
676 | bp = xfs_getsb(mp, XFS_BUF_TRYLOCK); | |
677 | if (bp != NULL) { | |
678 | bip = XFS_BUF_FSPRIVATE(bp,xfs_buf_log_item_t*); | |
679 | if ((bip != NULL) && | |
680 | xfs_buf_item_dirty(bip)) { | |
681 | if (!(XFS_BUF_ISPINNED(bp))) { | |
682 | XFS_BUF_ASYNC(bp); | |
683 | error = xfs_bwrite(mp, bp); | |
684 | } else { | |
685 | xfs_buf_relse(bp); | |
686 | } | |
687 | } else { | |
688 | xfs_buf_relse(bp); | |
689 | } | |
690 | } | |
691 | } else { | |
692 | bp = xfs_getsb(mp, 0); | |
693 | /* | |
694 | * If the buffer is pinned then push on the log so | |
695 | * we won't get stuck waiting in the write for | |
696 | * someone, maybe ourselves, to flush the log. | |
697 | * Even though we just pushed the log above, we | |
698 | * did not have the superblock buffer locked at | |
699 | * that point so it can become pinned in between | |
700 | * there and here. | |
701 | */ | |
702 | if (XFS_BUF_ISPINNED(bp)) | |
703 | xfs_log_force(mp, (xfs_lsn_t)0, XFS_LOG_FORCE); | |
704 | if (flags & SYNC_WAIT) | |
705 | XFS_BUF_UNASYNC(bp); | |
706 | else | |
707 | XFS_BUF_ASYNC(bp); | |
708 | error = xfs_bwrite(mp, bp); | |
709 | } | |
710 | if (error) { | |
711 | last_error = error; | |
712 | } | |
713 | } | |
714 | ||
1da177e4 LT |
715 | /* |
716 | * Now check to see if the log needs a "dummy" transaction. | |
717 | */ | |
1da177e4 LT |
718 | if (!(flags & SYNC_REMOUNT) && xfs_log_need_covered(mp)) { |
719 | xfs_trans_t *tp; | |
720 | xfs_inode_t *ip; | |
721 | ||
722 | /* | |
723 | * Put a dummy transaction in the log to tell | |
724 | * recovery that all others are OK. | |
725 | */ | |
726 | tp = xfs_trans_alloc(mp, XFS_TRANS_DUMMY1); | |
727 | if ((error = xfs_trans_reserve(tp, 0, | |
728 | XFS_ICHANGE_LOG_RES(mp), | |
729 | 0, 0, 0))) { | |
730 | xfs_trans_cancel(tp, 0); | |
731 | return error; | |
732 | } | |
733 | ||
734 | ip = mp->m_rootip; | |
735 | xfs_ilock(ip, XFS_ILOCK_EXCL); | |
736 | ||
737 | xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); | |
738 | xfs_trans_ihold(tp, ip); | |
739 | xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); | |
1c72bf90 | 740 | error = xfs_trans_commit(tp, 0); |
1da177e4 LT |
741 | xfs_iunlock(ip, XFS_ILOCK_EXCL); |
742 | xfs_log_force(mp, (xfs_lsn_t)0, log_flags); | |
743 | } | |
744 | ||
745 | /* | |
746 | * When shutting down, we need to insure that the AIL is pushed | |
747 | * to disk or the filesystem can appear corrupt from the PROM. | |
748 | */ | |
749 | if ((flags & (SYNC_CLOSE|SYNC_WAIT)) == (SYNC_CLOSE|SYNC_WAIT)) { | |
750 | XFS_bflush(mp->m_ddev_targp); | |
751 | if (mp->m_rtdev_targp) { | |
752 | XFS_bflush(mp->m_rtdev_targp); | |
753 | } | |
754 | } | |
755 | ||
756 | return XFS_ERROR(last_error); | |
757 | } |