2 * Copyright (c) 2000-2002,2005 Silicon Graphics, Inc.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License as
7 * published by the Free Software Foundation.
9 * This program is distributed in the hope that it would be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write the Free Software Foundation,
16 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
20 #include "xfs_format.h"
21 #include "xfs_log_format.h"
22 #include "xfs_trans_resv.h"
25 #include "xfs_mount.h"
26 #include "xfs_inode.h"
27 #include "xfs_trans.h"
28 #include "xfs_inode_item.h"
29 #include "xfs_error.h"
30 #include "xfs_trace.h"
31 #include "xfs_trans_priv.h"
32 #include "xfs_dinode.h"
36 kmem_zone_t
*xfs_ili_zone
; /* inode log item zone */
38 static inline struct xfs_inode_log_item
*INODE_ITEM(struct xfs_log_item
*lip
)
40 return container_of(lip
, struct xfs_inode_log_item
, ili_item
);
44 xfs_inode_item_data_fork_size(
45 struct xfs_inode_log_item
*iip
,
49 struct xfs_inode
*ip
= iip
->ili_inode
;
51 switch (ip
->i_d
.di_format
) {
52 case XFS_DINODE_FMT_EXTENTS
:
53 if ((iip
->ili_fields
& XFS_ILOG_DEXT
) &&
54 ip
->i_d
.di_nextents
> 0 &&
55 ip
->i_df
.if_bytes
> 0) {
56 /* worst case, doesn't subtract delalloc extents */
57 *nbytes
+= XFS_IFORK_DSIZE(ip
);
61 case XFS_DINODE_FMT_BTREE
:
62 if ((iip
->ili_fields
& XFS_ILOG_DBROOT
) &&
63 ip
->i_df
.if_broot_bytes
> 0) {
64 *nbytes
+= ip
->i_df
.if_broot_bytes
;
68 case XFS_DINODE_FMT_LOCAL
:
69 if ((iip
->ili_fields
& XFS_ILOG_DDATA
) &&
70 ip
->i_df
.if_bytes
> 0) {
71 *nbytes
+= roundup(ip
->i_df
.if_bytes
, 4);
76 case XFS_DINODE_FMT_DEV
:
77 case XFS_DINODE_FMT_UUID
:
86 xfs_inode_item_attr_fork_size(
87 struct xfs_inode_log_item
*iip
,
91 struct xfs_inode
*ip
= iip
->ili_inode
;
93 switch (ip
->i_d
.di_aformat
) {
94 case XFS_DINODE_FMT_EXTENTS
:
95 if ((iip
->ili_fields
& XFS_ILOG_AEXT
) &&
96 ip
->i_d
.di_anextents
> 0 &&
97 ip
->i_afp
->if_bytes
> 0) {
98 /* worst case, doesn't subtract unused space */
99 *nbytes
+= XFS_IFORK_ASIZE(ip
);
103 case XFS_DINODE_FMT_BTREE
:
104 if ((iip
->ili_fields
& XFS_ILOG_ABROOT
) &&
105 ip
->i_afp
->if_broot_bytes
> 0) {
106 *nbytes
+= ip
->i_afp
->if_broot_bytes
;
110 case XFS_DINODE_FMT_LOCAL
:
111 if ((iip
->ili_fields
& XFS_ILOG_ADATA
) &&
112 ip
->i_afp
->if_bytes
> 0) {
113 *nbytes
+= roundup(ip
->i_afp
->if_bytes
, 4);
124 * This returns the number of iovecs needed to log the given inode item.
126 * We need one iovec for the inode log format structure, one for the
127 * inode core, and possibly one for the inode data/extents/b-tree root
128 * and one for the inode attribute data/extents/b-tree root.
132 struct xfs_log_item
*lip
,
136 struct xfs_inode_log_item
*iip
= INODE_ITEM(lip
);
137 struct xfs_inode
*ip
= iip
->ili_inode
;
140 *nbytes
+= sizeof(struct xfs_inode_log_format
) +
141 xfs_icdinode_size(ip
->i_d
.di_version
);
143 xfs_inode_item_data_fork_size(iip
, nvecs
, nbytes
);
145 xfs_inode_item_attr_fork_size(iip
, nvecs
, nbytes
);
149 * xfs_inode_item_format_extents - convert in-core extents to on-disk form
151 * For either the data or attr fork in extent format, we need to endian convert
152 * the in-core extent as we place them into the on-disk inode. In this case, we
153 * need to do this conversion before we write the extents into the log. Because
154 * we don't have the disk inode to write into here, we allocate a buffer and
155 * format the extents into it via xfs_iextents_copy(). We free the buffer in
156 * the unlock routine after the copy for the log has been made.
158 * In the case of the data fork, the in-core and on-disk fork sizes can be
159 * different due to delayed allocation extents. We only log on-disk extents
160 * here, so always use the physical fork size to determine the size of the
161 * buffer we need to allocate.
164 xfs_inode_item_format_extents(
165 struct xfs_inode
*ip
,
166 struct xfs_log_iovec
**vecp
,
170 xfs_bmbt_rec_t
*ext_buffer
;
173 ext_buffer
= kmem_alloc(XFS_IFORK_SIZE(ip
, whichfork
), KM_SLEEP
);
174 if (whichfork
== XFS_DATA_FORK
)
175 ip
->i_itemp
->ili_extents_buf
= ext_buffer
;
177 ip
->i_itemp
->ili_aextents_buf
= ext_buffer
;
179 len
= xfs_iextents_copy(ip
, ext_buffer
, whichfork
);
180 xlog_copy_iovec(vecp
, type
, ext_buffer
, len
);
185 * If this is a v1 format inode, then we need to log it as such. This means
186 * that we have to copy the link count from the new field to the old. We
187 * don't have to worry about the new fields, because nothing trusts them as
188 * long as the old inode version number is there.
191 xfs_inode_item_format_v1_inode(
192 struct xfs_inode
*ip
)
194 if (!xfs_sb_version_hasnlink(&ip
->i_mount
->m_sb
)) {
198 ASSERT(ip
->i_d
.di_nlink
<= XFS_MAXLINK_1
);
199 ip
->i_d
.di_onlink
= ip
->i_d
.di_nlink
;
202 * The superblock version has already been bumped,
203 * so just make the conversion to the new inode
206 ip
->i_d
.di_version
= 2;
207 ip
->i_d
.di_onlink
= 0;
208 memset(&(ip
->i_d
.di_pad
[0]), 0, sizeof(ip
->i_d
.di_pad
));
213 xfs_inode_item_format_data_fork(
214 struct xfs_inode_log_item
*iip
,
215 struct xfs_log_iovec
**vecp
,
218 struct xfs_inode
*ip
= iip
->ili_inode
;
221 switch (ip
->i_d
.di_format
) {
222 case XFS_DINODE_FMT_EXTENTS
:
224 ~(XFS_ILOG_DDATA
| XFS_ILOG_DBROOT
|
225 XFS_ILOG_DEV
| XFS_ILOG_UUID
);
227 if ((iip
->ili_fields
& XFS_ILOG_DEXT
) &&
228 ip
->i_d
.di_nextents
> 0 &&
229 ip
->i_df
.if_bytes
> 0) {
230 ASSERT(ip
->i_df
.if_u1
.if_extents
!= NULL
);
231 ASSERT(ip
->i_df
.if_bytes
/ sizeof(xfs_bmbt_rec_t
) > 0);
232 ASSERT(iip
->ili_extents_buf
== NULL
);
234 #ifdef XFS_NATIVE_HOST
235 if (ip
->i_d
.di_nextents
== ip
->i_df
.if_bytes
/
236 (uint
)sizeof(xfs_bmbt_rec_t
)) {
238 * There are no delayed allocation
239 * extents, so just point to the
240 * real extents array.
242 xlog_copy_iovec(vecp
, XLOG_REG_TYPE_IEXT
,
243 ip
->i_df
.if_u1
.if_extents
,
245 iip
->ili_format
.ilf_dsize
= ip
->i_df
.if_bytes
;
249 iip
->ili_format
.ilf_dsize
=
250 xfs_inode_item_format_extents(ip
, vecp
,
251 XFS_DATA_FORK
, XLOG_REG_TYPE_IEXT
);
252 ASSERT(iip
->ili_format
.ilf_dsize
<= ip
->i_df
.if_bytes
);
256 iip
->ili_fields
&= ~XFS_ILOG_DEXT
;
259 case XFS_DINODE_FMT_BTREE
:
261 ~(XFS_ILOG_DDATA
| XFS_ILOG_DEXT
|
262 XFS_ILOG_DEV
| XFS_ILOG_UUID
);
264 if ((iip
->ili_fields
& XFS_ILOG_DBROOT
) &&
265 ip
->i_df
.if_broot_bytes
> 0) {
266 ASSERT(ip
->i_df
.if_broot
!= NULL
);
267 xlog_copy_iovec(vecp
, XLOG_REG_TYPE_IBROOT
,
269 ip
->i_df
.if_broot_bytes
);
271 iip
->ili_format
.ilf_dsize
= ip
->i_df
.if_broot_bytes
;
273 ASSERT(!(iip
->ili_fields
&
275 iip
->ili_fields
&= ~XFS_ILOG_DBROOT
;
278 case XFS_DINODE_FMT_LOCAL
:
280 ~(XFS_ILOG_DEXT
| XFS_ILOG_DBROOT
|
281 XFS_ILOG_DEV
| XFS_ILOG_UUID
);
282 if ((iip
->ili_fields
& XFS_ILOG_DDATA
) &&
283 ip
->i_df
.if_bytes
> 0) {
285 * Round i_bytes up to a word boundary.
286 * The underlying memory is guaranteed to
287 * to be there by xfs_idata_realloc().
289 data_bytes
= roundup(ip
->i_df
.if_bytes
, 4);
290 ASSERT(ip
->i_df
.if_real_bytes
== 0 ||
291 ip
->i_df
.if_real_bytes
== data_bytes
);
292 ASSERT(ip
->i_df
.if_u1
.if_data
!= NULL
);
293 ASSERT(ip
->i_d
.di_size
> 0);
294 xlog_copy_iovec(vecp
, XLOG_REG_TYPE_ILOCAL
,
295 ip
->i_df
.if_u1
.if_data
, data_bytes
);
297 iip
->ili_format
.ilf_dsize
= (unsigned)data_bytes
;
299 iip
->ili_fields
&= ~XFS_ILOG_DDATA
;
302 case XFS_DINODE_FMT_DEV
:
304 ~(XFS_ILOG_DDATA
| XFS_ILOG_DBROOT
|
305 XFS_ILOG_DEXT
| XFS_ILOG_UUID
);
306 if (iip
->ili_fields
& XFS_ILOG_DEV
) {
307 iip
->ili_format
.ilf_u
.ilfu_rdev
=
308 ip
->i_df
.if_u2
.if_rdev
;
311 case XFS_DINODE_FMT_UUID
:
313 ~(XFS_ILOG_DDATA
| XFS_ILOG_DBROOT
|
314 XFS_ILOG_DEXT
| XFS_ILOG_DEV
);
315 if (iip
->ili_fields
& XFS_ILOG_UUID
) {
316 iip
->ili_format
.ilf_u
.ilfu_uuid
=
317 ip
->i_df
.if_u2
.if_uuid
;
327 xfs_inode_item_format_attr_fork(
328 struct xfs_inode_log_item
*iip
,
329 struct xfs_log_iovec
**vecp
,
332 struct xfs_inode
*ip
= iip
->ili_inode
;
335 switch (ip
->i_d
.di_aformat
) {
336 case XFS_DINODE_FMT_EXTENTS
:
338 ~(XFS_ILOG_ADATA
| XFS_ILOG_ABROOT
);
340 if ((iip
->ili_fields
& XFS_ILOG_AEXT
) &&
341 ip
->i_d
.di_anextents
> 0 &&
342 ip
->i_afp
->if_bytes
> 0) {
343 ASSERT(ip
->i_afp
->if_bytes
/ sizeof(xfs_bmbt_rec_t
) ==
344 ip
->i_d
.di_anextents
);
345 ASSERT(ip
->i_afp
->if_u1
.if_extents
!= NULL
);
346 #ifdef XFS_NATIVE_HOST
348 * There are not delayed allocation extents
349 * for attributes, so just point at the array.
351 xlog_copy_iovec(vecp
, XLOG_REG_TYPE_IATTR_EXT
,
352 ip
->i_afp
->if_u1
.if_extents
,
353 ip
->i_afp
->if_bytes
);
354 iip
->ili_format
.ilf_asize
= ip
->i_afp
->if_bytes
;
356 ASSERT(iip
->ili_aextents_buf
== NULL
);
357 iip
->ili_format
.ilf_asize
=
358 xfs_inode_item_format_extents(ip
, vecp
,
359 XFS_ATTR_FORK
, XLOG_REG_TYPE_IATTR_EXT
);
363 iip
->ili_fields
&= ~XFS_ILOG_AEXT
;
366 case XFS_DINODE_FMT_BTREE
:
368 ~(XFS_ILOG_ADATA
| XFS_ILOG_AEXT
);
370 if ((iip
->ili_fields
& XFS_ILOG_ABROOT
) &&
371 ip
->i_afp
->if_broot_bytes
> 0) {
372 ASSERT(ip
->i_afp
->if_broot
!= NULL
);
374 xlog_copy_iovec(vecp
, XLOG_REG_TYPE_IATTR_BROOT
,
376 ip
->i_afp
->if_broot_bytes
);
378 iip
->ili_format
.ilf_asize
= ip
->i_afp
->if_broot_bytes
;
380 iip
->ili_fields
&= ~XFS_ILOG_ABROOT
;
383 case XFS_DINODE_FMT_LOCAL
:
385 ~(XFS_ILOG_AEXT
| XFS_ILOG_ABROOT
);
387 if ((iip
->ili_fields
& XFS_ILOG_ADATA
) &&
388 ip
->i_afp
->if_bytes
> 0) {
390 * Round i_bytes up to a word boundary.
391 * The underlying memory is guaranteed to
392 * to be there by xfs_idata_realloc().
394 data_bytes
= roundup(ip
->i_afp
->if_bytes
, 4);
395 ASSERT(ip
->i_afp
->if_real_bytes
== 0 ||
396 ip
->i_afp
->if_real_bytes
== data_bytes
);
397 ASSERT(ip
->i_afp
->if_u1
.if_data
!= NULL
);
398 xlog_copy_iovec(vecp
, XLOG_REG_TYPE_IATTR_LOCAL
,
399 ip
->i_afp
->if_u1
.if_data
,
402 iip
->ili_format
.ilf_asize
= (unsigned)data_bytes
;
404 iip
->ili_fields
&= ~XFS_ILOG_ADATA
;
414 * This is called to fill in the vector of log iovecs for the given inode
415 * log item. It fills the first item with an inode log format structure,
416 * the second with the on-disk inode structure, and a possible third and/or
417 * fourth with the inode data/extents/b-tree root and inode attributes
418 * data/extents/b-tree root.
421 xfs_inode_item_format(
422 struct xfs_log_item
*lip
,
423 struct xfs_log_iovec
*vecp
)
425 struct xfs_inode_log_item
*iip
= INODE_ITEM(lip
);
426 struct xfs_inode
*ip
= iip
->ili_inode
;
429 xlog_copy_iovec(&vecp
, XLOG_REG_TYPE_IFORMAT
,
431 sizeof(struct xfs_inode_log_format
));
434 xlog_copy_iovec(&vecp
, XLOG_REG_TYPE_ICORE
,
436 xfs_icdinode_size(ip
->i_d
.di_version
));
439 if (ip
->i_d
.di_version
== 1)
440 xfs_inode_item_format_v1_inode(ip
);
442 xfs_inode_item_format_data_fork(iip
, &vecp
, &nvecs
);
443 if (XFS_IFORK_Q(ip
)) {
444 xfs_inode_item_format_attr_fork(iip
, &vecp
, &nvecs
);
447 ~(XFS_ILOG_ADATA
| XFS_ILOG_ABROOT
| XFS_ILOG_AEXT
);
451 * Now update the log format that goes out to disk from the in-core
452 * values. We always write the inode core to make the arithmetic
453 * games in recovery easier, which isn't a big deal as just about any
454 * transaction would dirty it anyway.
456 iip
->ili_format
.ilf_fields
= XFS_ILOG_CORE
|
457 (iip
->ili_fields
& ~XFS_ILOG_TIMESTAMP
);
458 iip
->ili_format
.ilf_size
= nvecs
;
462 * This is called to pin the inode associated with the inode log
463 * item in memory so it cannot be written out.
467 struct xfs_log_item
*lip
)
469 struct xfs_inode
*ip
= INODE_ITEM(lip
)->ili_inode
;
471 ASSERT(xfs_isilocked(ip
, XFS_ILOCK_EXCL
));
473 trace_xfs_inode_pin(ip
, _RET_IP_
);
474 atomic_inc(&ip
->i_pincount
);
479 * This is called to unpin the inode associated with the inode log
480 * item which was previously pinned with a call to xfs_inode_item_pin().
482 * Also wake up anyone in xfs_iunpin_wait() if the count goes to 0.
485 xfs_inode_item_unpin(
486 struct xfs_log_item
*lip
,
489 struct xfs_inode
*ip
= INODE_ITEM(lip
)->ili_inode
;
491 trace_xfs_inode_unpin(ip
, _RET_IP_
);
492 ASSERT(atomic_read(&ip
->i_pincount
) > 0);
493 if (atomic_dec_and_test(&ip
->i_pincount
))
494 wake_up_bit(&ip
->i_flags
, __XFS_IPINNED_BIT
);
499 struct xfs_log_item
*lip
,
500 struct list_head
*buffer_list
)
502 struct xfs_inode_log_item
*iip
= INODE_ITEM(lip
);
503 struct xfs_inode
*ip
= iip
->ili_inode
;
504 struct xfs_buf
*bp
= NULL
;
505 uint rval
= XFS_ITEM_SUCCESS
;
508 if (xfs_ipincount(ip
) > 0)
509 return XFS_ITEM_PINNED
;
511 if (!xfs_ilock_nowait(ip
, XFS_ILOCK_SHARED
))
512 return XFS_ITEM_LOCKED
;
515 * Re-check the pincount now that we stabilized the value by
518 if (xfs_ipincount(ip
) > 0) {
519 rval
= XFS_ITEM_PINNED
;
524 * Stale inode items should force out the iclog.
526 if (ip
->i_flags
& XFS_ISTALE
) {
527 rval
= XFS_ITEM_PINNED
;
532 * Someone else is already flushing the inode. Nothing we can do
533 * here but wait for the flush to finish and remove the item from
536 if (!xfs_iflock_nowait(ip
)) {
537 rval
= XFS_ITEM_FLUSHING
;
541 ASSERT(iip
->ili_fields
!= 0 || XFS_FORCED_SHUTDOWN(ip
->i_mount
));
542 ASSERT(iip
->ili_logged
== 0 || XFS_FORCED_SHUTDOWN(ip
->i_mount
));
544 spin_unlock(&lip
->li_ailp
->xa_lock
);
546 error
= xfs_iflush(ip
, &bp
);
548 if (!xfs_buf_delwri_queue(bp
, buffer_list
))
549 rval
= XFS_ITEM_FLUSHING
;
553 spin_lock(&lip
->li_ailp
->xa_lock
);
555 xfs_iunlock(ip
, XFS_ILOCK_SHARED
);
560 * Unlock the inode associated with the inode log item.
561 * Clear the fields of the inode and inode log item that
562 * are specific to the current transaction. If the
563 * hold flags is set, do not unlock the inode.
566 xfs_inode_item_unlock(
567 struct xfs_log_item
*lip
)
569 struct xfs_inode_log_item
*iip
= INODE_ITEM(lip
);
570 struct xfs_inode
*ip
= iip
->ili_inode
;
571 unsigned short lock_flags
;
573 ASSERT(ip
->i_itemp
!= NULL
);
574 ASSERT(xfs_isilocked(ip
, XFS_ILOCK_EXCL
));
577 * If the inode needed a separate buffer with which to log
578 * its extents, then free it now.
580 if (iip
->ili_extents_buf
!= NULL
) {
581 ASSERT(ip
->i_d
.di_format
== XFS_DINODE_FMT_EXTENTS
);
582 ASSERT(ip
->i_d
.di_nextents
> 0);
583 ASSERT(iip
->ili_fields
& XFS_ILOG_DEXT
);
584 ASSERT(ip
->i_df
.if_bytes
> 0);
585 kmem_free(iip
->ili_extents_buf
);
586 iip
->ili_extents_buf
= NULL
;
588 if (iip
->ili_aextents_buf
!= NULL
) {
589 ASSERT(ip
->i_d
.di_aformat
== XFS_DINODE_FMT_EXTENTS
);
590 ASSERT(ip
->i_d
.di_anextents
> 0);
591 ASSERT(iip
->ili_fields
& XFS_ILOG_AEXT
);
592 ASSERT(ip
->i_afp
->if_bytes
> 0);
593 kmem_free(iip
->ili_aextents_buf
);
594 iip
->ili_aextents_buf
= NULL
;
597 lock_flags
= iip
->ili_lock_flags
;
598 iip
->ili_lock_flags
= 0;
600 xfs_iunlock(ip
, lock_flags
);
604 * This is called to find out where the oldest active copy of the inode log
605 * item in the on disk log resides now that the last log write of it completed
606 * at the given lsn. Since we always re-log all dirty data in an inode, the
607 * latest copy in the on disk log is the only one that matters. Therefore,
608 * simply return the given lsn.
610 * If the inode has been marked stale because the cluster is being freed, we
611 * don't want to (re-)insert this inode into the AIL. There is a race condition
612 * where the cluster buffer may be unpinned before the inode is inserted into
613 * the AIL during transaction committed processing. If the buffer is unpinned
614 * before the inode item has been committed and inserted, then it is possible
615 * for the buffer to be written and IO completes before the inode is inserted
616 * into the AIL. In that case, we'd be inserting a clean, stale inode into the
617 * AIL which will never get removed. It will, however, get reclaimed which
618 * triggers an assert in xfs_inode_free() complaining about freein an inode
621 * To avoid this, just unpin the inode directly and return a LSN of -1 so the
622 * transaction committed code knows that it does not need to do any further
623 * processing on the item.
626 xfs_inode_item_committed(
627 struct xfs_log_item
*lip
,
630 struct xfs_inode_log_item
*iip
= INODE_ITEM(lip
);
631 struct xfs_inode
*ip
= iip
->ili_inode
;
633 if (xfs_iflags_test(ip
, XFS_ISTALE
)) {
634 xfs_inode_item_unpin(lip
, 0);
641 * XXX rcc - this one really has to do something. Probably needs
642 * to stamp in a new field in the incore inode.
645 xfs_inode_item_committing(
646 struct xfs_log_item
*lip
,
649 INODE_ITEM(lip
)->ili_last_lsn
= lsn
;
653 * This is the ops vector shared by all buf log items.
655 static const struct xfs_item_ops xfs_inode_item_ops
= {
656 .iop_size
= xfs_inode_item_size
,
657 .iop_format
= xfs_inode_item_format
,
658 .iop_pin
= xfs_inode_item_pin
,
659 .iop_unpin
= xfs_inode_item_unpin
,
660 .iop_unlock
= xfs_inode_item_unlock
,
661 .iop_committed
= xfs_inode_item_committed
,
662 .iop_push
= xfs_inode_item_push
,
663 .iop_committing
= xfs_inode_item_committing
668 * Initialize the inode log item for a newly allocated (in-core) inode.
672 struct xfs_inode
*ip
,
673 struct xfs_mount
*mp
)
675 struct xfs_inode_log_item
*iip
;
677 ASSERT(ip
->i_itemp
== NULL
);
678 iip
= ip
->i_itemp
= kmem_zone_zalloc(xfs_ili_zone
, KM_SLEEP
);
681 xfs_log_item_init(mp
, &iip
->ili_item
, XFS_LI_INODE
,
682 &xfs_inode_item_ops
);
683 iip
->ili_format
.ilf_type
= XFS_LI_INODE
;
684 iip
->ili_format
.ilf_ino
= ip
->i_ino
;
685 iip
->ili_format
.ilf_blkno
= ip
->i_imap
.im_blkno
;
686 iip
->ili_format
.ilf_len
= ip
->i_imap
.im_len
;
687 iip
->ili_format
.ilf_boffset
= ip
->i_imap
.im_boffset
;
691 * Free the inode log item and any memory hanging off of it.
694 xfs_inode_item_destroy(
697 kmem_zone_free(xfs_ili_zone
, ip
->i_itemp
);
702 * This is the inode flushing I/O completion routine. It is called
703 * from interrupt level when the buffer containing the inode is
704 * flushed to disk. It is responsible for removing the inode item
705 * from the AIL if it has not been re-logged, and unlocking the inode's
708 * To reduce AIL lock traffic as much as possible, we scan the buffer log item
709 * list for other inodes that will run this function. We remove them from the
710 * buffer list so we can process all the inode IO completions in one AIL lock
716 struct xfs_log_item
*lip
)
718 struct xfs_inode_log_item
*iip
;
719 struct xfs_log_item
*blip
;
720 struct xfs_log_item
*next
;
721 struct xfs_log_item
*prev
;
722 struct xfs_ail
*ailp
= lip
->li_ailp
;
726 * Scan the buffer IO completions for other inodes being completed and
727 * attach them to the current inode log item.
731 while (blip
!= NULL
) {
732 if (lip
->li_cb
!= xfs_iflush_done
) {
734 blip
= blip
->li_bio_list
;
738 /* remove from list */
739 next
= blip
->li_bio_list
;
743 prev
->li_bio_list
= next
;
746 /* add to current list */
747 blip
->li_bio_list
= lip
->li_bio_list
;
748 lip
->li_bio_list
= blip
;
751 * while we have the item, do the unlocked check for needing
754 iip
= INODE_ITEM(blip
);
755 if (iip
->ili_logged
&& blip
->li_lsn
== iip
->ili_flush_lsn
)
761 /* make sure we capture the state of the initial inode. */
762 iip
= INODE_ITEM(lip
);
763 if (iip
->ili_logged
&& lip
->li_lsn
== iip
->ili_flush_lsn
)
767 * We only want to pull the item from the AIL if it is
768 * actually there and its location in the log has not
769 * changed since we started the flush. Thus, we only bother
770 * if the ili_logged flag is set and the inode's lsn has not
771 * changed. First we check the lsn outside
772 * the lock since it's cheaper, and then we recheck while
773 * holding the lock before removing the inode from the AIL.
776 struct xfs_log_item
*log_items
[need_ail
];
778 spin_lock(&ailp
->xa_lock
);
779 for (blip
= lip
; blip
; blip
= blip
->li_bio_list
) {
780 iip
= INODE_ITEM(blip
);
781 if (iip
->ili_logged
&&
782 blip
->li_lsn
== iip
->ili_flush_lsn
) {
783 log_items
[i
++] = blip
;
785 ASSERT(i
<= need_ail
);
787 /* xfs_trans_ail_delete_bulk() drops the AIL lock. */
788 xfs_trans_ail_delete_bulk(ailp
, log_items
, i
,
789 SHUTDOWN_CORRUPT_INCORE
);
794 * clean up and unlock the flush lock now we are done. We can clear the
795 * ili_last_fields bits now that we know that the data corresponding to
796 * them is safely on disk.
798 for (blip
= lip
; blip
; blip
= next
) {
799 next
= blip
->li_bio_list
;
800 blip
->li_bio_list
= NULL
;
802 iip
= INODE_ITEM(blip
);
804 iip
->ili_last_fields
= 0;
805 xfs_ifunlock(iip
->ili_inode
);
810 * This is the inode flushing abort routine. It is called from xfs_iflush when
811 * the filesystem is shutting down to clean up the inode state. It is
812 * responsible for removing the inode item from the AIL if it has not been
813 * re-logged, and unlocking the inode's flush lock.
820 xfs_inode_log_item_t
*iip
= ip
->i_itemp
;
823 struct xfs_ail
*ailp
= iip
->ili_item
.li_ailp
;
824 if (iip
->ili_item
.li_flags
& XFS_LI_IN_AIL
) {
825 spin_lock(&ailp
->xa_lock
);
826 if (iip
->ili_item
.li_flags
& XFS_LI_IN_AIL
) {
827 /* xfs_trans_ail_delete() drops the AIL lock. */
828 xfs_trans_ail_delete(ailp
, &iip
->ili_item
,
830 SHUTDOWN_LOG_IO_ERROR
:
831 SHUTDOWN_CORRUPT_INCORE
);
833 spin_unlock(&ailp
->xa_lock
);
837 * Clear the ili_last_fields bits now that we know that the
838 * data corresponding to them is safely on disk.
840 iip
->ili_last_fields
= 0;
842 * Clear the inode logging fields so no more flushes are
848 * Release the inode's flush lock since we're done with it.
856 struct xfs_log_item
*lip
)
858 xfs_iflush_abort(INODE_ITEM(lip
)->ili_inode
, true);
862 * convert an xfs_inode_log_format struct from either 32 or 64 bit versions
863 * (which can have different field alignments) to the native version
866 xfs_inode_item_format_convert(
867 xfs_log_iovec_t
*buf
,
868 xfs_inode_log_format_t
*in_f
)
870 if (buf
->i_len
== sizeof(xfs_inode_log_format_32_t
)) {
871 xfs_inode_log_format_32_t
*in_f32
= buf
->i_addr
;
873 in_f
->ilf_type
= in_f32
->ilf_type
;
874 in_f
->ilf_size
= in_f32
->ilf_size
;
875 in_f
->ilf_fields
= in_f32
->ilf_fields
;
876 in_f
->ilf_asize
= in_f32
->ilf_asize
;
877 in_f
->ilf_dsize
= in_f32
->ilf_dsize
;
878 in_f
->ilf_ino
= in_f32
->ilf_ino
;
879 /* copy biggest field of ilf_u */
880 memcpy(in_f
->ilf_u
.ilfu_uuid
.__u_bits
,
881 in_f32
->ilf_u
.ilfu_uuid
.__u_bits
,
883 in_f
->ilf_blkno
= in_f32
->ilf_blkno
;
884 in_f
->ilf_len
= in_f32
->ilf_len
;
885 in_f
->ilf_boffset
= in_f32
->ilf_boffset
;
887 } else if (buf
->i_len
== sizeof(xfs_inode_log_format_64_t
)){
888 xfs_inode_log_format_64_t
*in_f64
= buf
->i_addr
;
890 in_f
->ilf_type
= in_f64
->ilf_type
;
891 in_f
->ilf_size
= in_f64
->ilf_size
;
892 in_f
->ilf_fields
= in_f64
->ilf_fields
;
893 in_f
->ilf_asize
= in_f64
->ilf_asize
;
894 in_f
->ilf_dsize
= in_f64
->ilf_dsize
;
895 in_f
->ilf_ino
= in_f64
->ilf_ino
;
896 /* copy biggest field of ilf_u */
897 memcpy(in_f
->ilf_u
.ilfu_uuid
.__u_bits
,
898 in_f64
->ilf_u
.ilfu_uuid
.__u_bits
,
900 in_f
->ilf_blkno
= in_f64
->ilf_blkno
;
901 in_f
->ilf_len
= in_f64
->ilf_len
;
902 in_f
->ilf_boffset
= in_f64
->ilf_boffset
;