| 1 | /* |
| 2 | * Copyright (c) 2000-2003,2005 Silicon Graphics, Inc. |
| 3 | * Copyright (C) 2010 Red Hat, Inc. |
| 4 | * All Rights Reserved. |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License as |
| 8 | * published by the Free Software Foundation. |
| 9 | * |
| 10 | * This program is distributed in the hope that it would be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * GNU General Public License for more details. |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License |
| 16 | * along with this program; if not, write the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA |
| 18 | */ |
| 19 | #include "xfs.h" |
| 20 | #include "xfs_fs.h" |
| 21 | #include "xfs_format.h" |
| 22 | #include "xfs_log.h" |
| 23 | #include "xfs_trans.h" |
| 24 | #include "xfs_sb.h" |
| 25 | #include "xfs_ag.h" |
| 26 | #include "xfs_mount.h" |
| 27 | #include "xfs_error.h" |
| 28 | #include "xfs_da_btree.h" |
| 29 | #include "xfs_bmap_btree.h" |
| 30 | #include "xfs_alloc_btree.h" |
| 31 | #include "xfs_ialloc_btree.h" |
| 32 | #include "xfs_dinode.h" |
| 33 | #include "xfs_inode.h" |
| 34 | #include "xfs_btree.h" |
| 35 | #include "xfs_ialloc.h" |
| 36 | #include "xfs_alloc.h" |
| 37 | #include "xfs_extent_busy.h" |
| 38 | #include "xfs_bmap.h" |
| 39 | #include "xfs_quota.h" |
| 40 | #include "xfs_qm.h" |
| 41 | #include "xfs_trans_priv.h" |
| 42 | #include "xfs_trans_space.h" |
| 43 | #include "xfs_inode_item.h" |
| 44 | #include "xfs_log_priv.h" |
| 45 | #include "xfs_buf_item.h" |
| 46 | #include "xfs_trace.h" |
| 47 | |
| 48 | kmem_zone_t *xfs_trans_zone; |
| 49 | kmem_zone_t *xfs_log_item_desc_zone; |
| 50 | |
| 51 | /* |
| 52 | * A buffer has a format structure overhead in the log in addition |
| 53 | * to the data, so we need to take this into account when reserving |
| 54 | * space in a transaction for a buffer. Round the space required up |
| 55 | * to a multiple of 128 bytes so that we don't change the historical |
| 56 | * reservation that has been used for this overhead. |
| 57 | */ |
| 58 | STATIC uint |
| 59 | xfs_buf_log_overhead(void) |
| 60 | { |
| 61 | return round_up(sizeof(struct xlog_op_header) + |
| 62 | sizeof(struct xfs_buf_log_format), 128); |
| 63 | } |
| 64 | |
| 65 | /* |
| 66 | * Calculate out transaction log reservation per item in bytes. |
| 67 | * |
| 68 | * The nbufs argument is used to indicate the number of items that |
| 69 | * will be changed in a transaction. size is used to tell how many |
| 70 | * bytes should be reserved per item. |
| 71 | */ |
| 72 | STATIC uint |
| 73 | xfs_calc_buf_res( |
| 74 | uint nbufs, |
| 75 | uint size) |
| 76 | { |
| 77 | return nbufs * (size + xfs_buf_log_overhead()); |
| 78 | } |
| 79 | |
| 80 | /* |
| 81 | * Various log reservation values. |
| 82 | * |
| 83 | * These are based on the size of the file system block because that is what |
| 84 | * most transactions manipulate. Each adds in an additional 128 bytes per |
| 85 | * item logged to try to account for the overhead of the transaction mechanism. |
| 86 | * |
| 87 | * Note: Most of the reservations underestimate the number of allocation |
| 88 | * groups into which they could free extents in the xfs_bmap_finish() call. |
| 89 | * This is because the number in the worst case is quite high and quite |
| 90 | * unusual. In order to fix this we need to change xfs_bmap_finish() to free |
| 91 | * extents in only a single AG at a time. This will require changes to the |
| 92 | * EFI code as well, however, so that the EFI for the extents not freed is |
| 93 | * logged again in each transaction. See SGI PV #261917. |
| 94 | * |
| 95 | * Reservation functions here avoid a huge stack in xfs_trans_init due to |
| 96 | * register overflow from temporaries in the calculations. |
| 97 | */ |
| 98 | |
| 99 | |
| 100 | /* |
| 101 | * In a write transaction we can allocate a maximum of 2 |
| 102 | * extents. This gives: |
| 103 | * the inode getting the new extents: inode size |
| 104 | * the inode's bmap btree: max depth * block size |
| 105 | * the agfs of the ags from which the extents are allocated: 2 * sector |
| 106 | * the superblock free block counter: sector size |
| 107 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
| 108 | * And the bmap_finish transaction can free bmap blocks in a join: |
| 109 | * the agfs of the ags containing the blocks: 2 * sector size |
| 110 | * the agfls of the ags containing the blocks: 2 * sector size |
| 111 | * the super block free block counter: sector size |
| 112 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
| 113 | */ |
| 114 | STATIC uint |
| 115 | xfs_calc_write_reservation( |
| 116 | struct xfs_mount *mp) |
| 117 | { |
| 118 | return XFS_DQUOT_LOGRES(mp) + |
| 119 | MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + |
| 120 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), |
| 121 | XFS_FSB_TO_B(mp, 1)) + |
| 122 | xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + |
| 123 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), |
| 124 | XFS_FSB_TO_B(mp, 1))), |
| 125 | (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + |
| 126 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), |
| 127 | XFS_FSB_TO_B(mp, 1)))); |
| 128 | } |
| 129 | |
| 130 | /* |
| 131 | * In truncating a file we free up to two extents at once. We can modify: |
| 132 | * the inode being truncated: inode size |
| 133 | * the inode's bmap btree: (max depth + 1) * block size |
| 134 | * And the bmap_finish transaction can free the blocks and bmap blocks: |
| 135 | * the agf for each of the ags: 4 * sector size |
| 136 | * the agfl for each of the ags: 4 * sector size |
| 137 | * the super block to reflect the freed blocks: sector size |
| 138 | * worst case split in allocation btrees per extent assuming 4 extents: |
| 139 | * 4 exts * 2 trees * (2 * max depth - 1) * block size |
| 140 | * the inode btree: max depth * blocksize |
| 141 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
| 142 | */ |
| 143 | STATIC uint |
| 144 | xfs_calc_itruncate_reservation( |
| 145 | struct xfs_mount *mp) |
| 146 | { |
| 147 | return XFS_DQUOT_LOGRES(mp) + |
| 148 | MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + |
| 149 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK) + 1, |
| 150 | XFS_FSB_TO_B(mp, 1))), |
| 151 | (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + |
| 152 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), |
| 153 | XFS_FSB_TO_B(mp, 1)) + |
| 154 | xfs_calc_buf_res(5, 0) + |
| 155 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
| 156 | XFS_FSB_TO_B(mp, 1)) + |
| 157 | xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + |
| 158 | mp->m_in_maxlevels, 0))); |
| 159 | } |
| 160 | |
| 161 | /* |
| 162 | * In renaming a files we can modify: |
| 163 | * the four inodes involved: 4 * inode size |
| 164 | * the two directory btrees: 2 * (max depth + v2) * dir block size |
| 165 | * the two directory bmap btrees: 2 * max depth * block size |
| 166 | * And the bmap_finish transaction can free dir and bmap blocks (two sets |
| 167 | * of bmap blocks) giving: |
| 168 | * the agf for the ags in which the blocks live: 3 * sector size |
| 169 | * the agfl for the ags in which the blocks live: 3 * sector size |
| 170 | * the superblock for the free block count: sector size |
| 171 | * the allocation btrees: 3 exts * 2 trees * (2 * max depth - 1) * block size |
| 172 | */ |
| 173 | STATIC uint |
| 174 | xfs_calc_rename_reservation( |
| 175 | struct xfs_mount *mp) |
| 176 | { |
| 177 | return XFS_DQUOT_LOGRES(mp) + |
| 178 | MAX((xfs_calc_buf_res(4, mp->m_sb.sb_inodesize) + |
| 179 | xfs_calc_buf_res(2 * XFS_DIROP_LOG_COUNT(mp), |
| 180 | XFS_FSB_TO_B(mp, 1))), |
| 181 | (xfs_calc_buf_res(7, mp->m_sb.sb_sectsize) + |
| 182 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 3), |
| 183 | XFS_FSB_TO_B(mp, 1)))); |
| 184 | } |
| 185 | |
| 186 | /* |
| 187 | * For creating a link to an inode: |
| 188 | * the parent directory inode: inode size |
| 189 | * the linked inode: inode size |
| 190 | * the directory btree could split: (max depth + v2) * dir block size |
| 191 | * the directory bmap btree could join or split: (max depth + v2) * blocksize |
| 192 | * And the bmap_finish transaction can free some bmap blocks giving: |
| 193 | * the agf for the ag in which the blocks live: sector size |
| 194 | * the agfl for the ag in which the blocks live: sector size |
| 195 | * the superblock for the free block count: sector size |
| 196 | * the allocation btrees: 2 trees * (2 * max depth - 1) * block size |
| 197 | */ |
| 198 | STATIC uint |
| 199 | xfs_calc_link_reservation( |
| 200 | struct xfs_mount *mp) |
| 201 | { |
| 202 | return XFS_DQUOT_LOGRES(mp) + |
| 203 | MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + |
| 204 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), |
| 205 | XFS_FSB_TO_B(mp, 1))), |
| 206 | (xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + |
| 207 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
| 208 | XFS_FSB_TO_B(mp, 1)))); |
| 209 | } |
| 210 | |
| 211 | /* |
| 212 | * For removing a directory entry we can modify: |
| 213 | * the parent directory inode: inode size |
| 214 | * the removed inode: inode size |
| 215 | * the directory btree could join: (max depth + v2) * dir block size |
| 216 | * the directory bmap btree could join or split: (max depth + v2) * blocksize |
| 217 | * And the bmap_finish transaction can free the dir and bmap blocks giving: |
| 218 | * the agf for the ag in which the blocks live: 2 * sector size |
| 219 | * the agfl for the ag in which the blocks live: 2 * sector size |
| 220 | * the superblock for the free block count: sector size |
| 221 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
| 222 | */ |
| 223 | STATIC uint |
| 224 | xfs_calc_remove_reservation( |
| 225 | struct xfs_mount *mp) |
| 226 | { |
| 227 | return XFS_DQUOT_LOGRES(mp) + |
| 228 | MAX((xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + |
| 229 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), |
| 230 | XFS_FSB_TO_B(mp, 1))), |
| 231 | (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + |
| 232 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), |
| 233 | XFS_FSB_TO_B(mp, 1)))); |
| 234 | } |
| 235 | |
| 236 | /* |
| 237 | * For create, break it in to the two cases that the transaction |
| 238 | * covers. We start with the modify case - allocation done by modification |
| 239 | * of the state of existing inodes - and the allocation case. |
| 240 | */ |
| 241 | |
| 242 | /* |
| 243 | * For create we can modify: |
| 244 | * the parent directory inode: inode size |
| 245 | * the new inode: inode size |
| 246 | * the inode btree entry: block size |
| 247 | * the superblock for the nlink flag: sector size |
| 248 | * the directory btree: (max depth + v2) * dir block size |
| 249 | * the directory inode's bmap btree: (max depth + v2) * block size |
| 250 | */ |
| 251 | STATIC uint |
| 252 | xfs_calc_create_resv_modify( |
| 253 | struct xfs_mount *mp) |
| 254 | { |
| 255 | return xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + |
| 256 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
| 257 | (uint)XFS_FSB_TO_B(mp, 1) + |
| 258 | xfs_calc_buf_res(XFS_DIROP_LOG_COUNT(mp), XFS_FSB_TO_B(mp, 1)); |
| 259 | } |
| 260 | |
| 261 | /* |
| 262 | * For create we can allocate some inodes giving: |
| 263 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
| 264 | * the superblock for the nlink flag: sector size |
| 265 | * the inode blocks allocated: XFS_IALLOC_BLOCKS * blocksize |
| 266 | * the inode btree: max depth * blocksize |
| 267 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
| 268 | */ |
| 269 | STATIC uint |
| 270 | xfs_calc_create_resv_alloc( |
| 271 | struct xfs_mount *mp) |
| 272 | { |
| 273 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
| 274 | mp->m_sb.sb_sectsize + |
| 275 | xfs_calc_buf_res(XFS_IALLOC_BLOCKS(mp), XFS_FSB_TO_B(mp, 1)) + |
| 276 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + |
| 277 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
| 278 | XFS_FSB_TO_B(mp, 1)); |
| 279 | } |
| 280 | |
| 281 | STATIC uint |
| 282 | __xfs_calc_create_reservation( |
| 283 | struct xfs_mount *mp) |
| 284 | { |
| 285 | return XFS_DQUOT_LOGRES(mp) + |
| 286 | MAX(xfs_calc_create_resv_alloc(mp), |
| 287 | xfs_calc_create_resv_modify(mp)); |
| 288 | } |
| 289 | |
| 290 | /* |
| 291 | * For icreate we can allocate some inodes giving: |
| 292 | * the agi and agf of the ag getting the new inodes: 2 * sectorsize |
| 293 | * the superblock for the nlink flag: sector size |
| 294 | * the inode btree: max depth * blocksize |
| 295 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
| 296 | */ |
| 297 | STATIC uint |
| 298 | xfs_calc_icreate_resv_alloc( |
| 299 | struct xfs_mount *mp) |
| 300 | { |
| 301 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
| 302 | mp->m_sb.sb_sectsize + |
| 303 | xfs_calc_buf_res(mp->m_in_maxlevels, XFS_FSB_TO_B(mp, 1)) + |
| 304 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
| 305 | XFS_FSB_TO_B(mp, 1)); |
| 306 | } |
| 307 | |
| 308 | STATIC uint |
| 309 | xfs_calc_icreate_reservation(xfs_mount_t *mp) |
| 310 | { |
| 311 | return XFS_DQUOT_LOGRES(mp) + |
| 312 | MAX(xfs_calc_icreate_resv_alloc(mp), |
| 313 | xfs_calc_create_resv_modify(mp)); |
| 314 | } |
| 315 | |
| 316 | STATIC uint |
| 317 | xfs_calc_create_reservation( |
| 318 | struct xfs_mount *mp) |
| 319 | { |
| 320 | if (xfs_sb_version_hascrc(&mp->m_sb)) |
| 321 | return xfs_calc_icreate_reservation(mp); |
| 322 | return __xfs_calc_create_reservation(mp); |
| 323 | |
| 324 | } |
| 325 | |
| 326 | /* |
| 327 | * Making a new directory is the same as creating a new file. |
| 328 | */ |
| 329 | STATIC uint |
| 330 | xfs_calc_mkdir_reservation( |
| 331 | struct xfs_mount *mp) |
| 332 | { |
| 333 | return xfs_calc_create_reservation(mp); |
| 334 | } |
| 335 | |
| 336 | |
| 337 | /* |
| 338 | * Making a new symplink is the same as creating a new file, but |
| 339 | * with the added blocks for remote symlink data which can be up to 1kB in |
| 340 | * length (MAXPATHLEN). |
| 341 | */ |
| 342 | STATIC uint |
| 343 | xfs_calc_symlink_reservation( |
| 344 | struct xfs_mount *mp) |
| 345 | { |
| 346 | return xfs_calc_create_reservation(mp) + |
| 347 | xfs_calc_buf_res(1, MAXPATHLEN); |
| 348 | } |
| 349 | |
| 350 | /* |
| 351 | * In freeing an inode we can modify: |
| 352 | * the inode being freed: inode size |
| 353 | * the super block free inode counter: sector size |
| 354 | * the agi hash list and counters: sector size |
| 355 | * the inode btree entry: block size |
| 356 | * the on disk inode before ours in the agi hash list: inode cluster size |
| 357 | * the inode btree: max depth * blocksize |
| 358 | * the allocation btrees: 2 trees * (max depth - 1) * block size |
| 359 | */ |
| 360 | STATIC uint |
| 361 | xfs_calc_ifree_reservation( |
| 362 | struct xfs_mount *mp) |
| 363 | { |
| 364 | return XFS_DQUOT_LOGRES(mp) + |
| 365 | xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + |
| 366 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
| 367 | xfs_calc_buf_res(1, XFS_FSB_TO_B(mp, 1)) + |
| 368 | MAX((__uint16_t)XFS_FSB_TO_B(mp, 1), |
| 369 | XFS_INODE_CLUSTER_SIZE(mp)) + |
| 370 | xfs_calc_buf_res(1, 0) + |
| 371 | xfs_calc_buf_res(2 + XFS_IALLOC_BLOCKS(mp) + |
| 372 | mp->m_in_maxlevels, 0) + |
| 373 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
| 374 | XFS_FSB_TO_B(mp, 1)); |
| 375 | } |
| 376 | |
| 377 | /* |
| 378 | * When only changing the inode we log the inode and possibly the superblock |
| 379 | * We also add a bit of slop for the transaction stuff. |
| 380 | */ |
| 381 | STATIC uint |
| 382 | xfs_calc_ichange_reservation( |
| 383 | struct xfs_mount *mp) |
| 384 | { |
| 385 | return XFS_DQUOT_LOGRES(mp) + |
| 386 | mp->m_sb.sb_inodesize + |
| 387 | mp->m_sb.sb_sectsize + |
| 388 | 512; |
| 389 | |
| 390 | } |
| 391 | |
| 392 | /* |
| 393 | * Growing the data section of the filesystem. |
| 394 | * superblock |
| 395 | * agi and agf |
| 396 | * allocation btrees |
| 397 | */ |
| 398 | STATIC uint |
| 399 | xfs_calc_growdata_reservation( |
| 400 | struct xfs_mount *mp) |
| 401 | { |
| 402 | return xfs_calc_buf_res(3, mp->m_sb.sb_sectsize) + |
| 403 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
| 404 | XFS_FSB_TO_B(mp, 1)); |
| 405 | } |
| 406 | |
| 407 | /* |
| 408 | * Growing the rt section of the filesystem. |
| 409 | * In the first set of transactions (ALLOC) we allocate space to the |
| 410 | * bitmap or summary files. |
| 411 | * superblock: sector size |
| 412 | * agf of the ag from which the extent is allocated: sector size |
| 413 | * bmap btree for bitmap/summary inode: max depth * blocksize |
| 414 | * bitmap/summary inode: inode size |
| 415 | * allocation btrees for 1 block alloc: 2 * (2 * maxdepth - 1) * blocksize |
| 416 | */ |
| 417 | STATIC uint |
| 418 | xfs_calc_growrtalloc_reservation( |
| 419 | struct xfs_mount *mp) |
| 420 | { |
| 421 | return xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
| 422 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), |
| 423 | XFS_FSB_TO_B(mp, 1)) + |
| 424 | xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + |
| 425 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
| 426 | XFS_FSB_TO_B(mp, 1)); |
| 427 | } |
| 428 | |
| 429 | /* |
| 430 | * Growing the rt section of the filesystem. |
| 431 | * In the second set of transactions (ZERO) we zero the new metadata blocks. |
| 432 | * one bitmap/summary block: blocksize |
| 433 | */ |
| 434 | STATIC uint |
| 435 | xfs_calc_growrtzero_reservation( |
| 436 | struct xfs_mount *mp) |
| 437 | { |
| 438 | return xfs_calc_buf_res(1, mp->m_sb.sb_blocksize); |
| 439 | } |
| 440 | |
| 441 | /* |
| 442 | * Growing the rt section of the filesystem. |
| 443 | * In the third set of transactions (FREE) we update metadata without |
| 444 | * allocating any new blocks. |
| 445 | * superblock: sector size |
| 446 | * bitmap inode: inode size |
| 447 | * summary inode: inode size |
| 448 | * one bitmap block: blocksize |
| 449 | * summary blocks: new summary size |
| 450 | */ |
| 451 | STATIC uint |
| 452 | xfs_calc_growrtfree_reservation( |
| 453 | struct xfs_mount *mp) |
| 454 | { |
| 455 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
| 456 | xfs_calc_buf_res(2, mp->m_sb.sb_inodesize) + |
| 457 | xfs_calc_buf_res(1, mp->m_sb.sb_blocksize) + |
| 458 | xfs_calc_buf_res(1, mp->m_rsumsize); |
| 459 | } |
| 460 | |
| 461 | /* |
| 462 | * Logging the inode modification timestamp on a synchronous write. |
| 463 | * inode |
| 464 | */ |
| 465 | STATIC uint |
| 466 | xfs_calc_swrite_reservation( |
| 467 | struct xfs_mount *mp) |
| 468 | { |
| 469 | return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize); |
| 470 | } |
| 471 | |
| 472 | /* |
| 473 | * Logging the inode mode bits when writing a setuid/setgid file |
| 474 | * inode |
| 475 | */ |
| 476 | STATIC uint |
| 477 | xfs_calc_writeid_reservation(xfs_mount_t *mp) |
| 478 | { |
| 479 | return xfs_calc_buf_res(1, mp->m_sb.sb_inodesize); |
| 480 | } |
| 481 | |
| 482 | /* |
| 483 | * Converting the inode from non-attributed to attributed. |
| 484 | * the inode being converted: inode size |
| 485 | * agf block and superblock (for block allocation) |
| 486 | * the new block (directory sized) |
| 487 | * bmap blocks for the new directory block |
| 488 | * allocation btrees |
| 489 | */ |
| 490 | STATIC uint |
| 491 | xfs_calc_addafork_reservation( |
| 492 | struct xfs_mount *mp) |
| 493 | { |
| 494 | return XFS_DQUOT_LOGRES(mp) + |
| 495 | xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + |
| 496 | xfs_calc_buf_res(2, mp->m_sb.sb_sectsize) + |
| 497 | xfs_calc_buf_res(1, mp->m_dirblksize) + |
| 498 | xfs_calc_buf_res(XFS_DAENTER_BMAP1B(mp, XFS_DATA_FORK) + 1, |
| 499 | XFS_FSB_TO_B(mp, 1)) + |
| 500 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 1), |
| 501 | XFS_FSB_TO_B(mp, 1)); |
| 502 | } |
| 503 | |
| 504 | /* |
| 505 | * Removing the attribute fork of a file |
| 506 | * the inode being truncated: inode size |
| 507 | * the inode's bmap btree: max depth * block size |
| 508 | * And the bmap_finish transaction can free the blocks and bmap blocks: |
| 509 | * the agf for each of the ags: 4 * sector size |
| 510 | * the agfl for each of the ags: 4 * sector size |
| 511 | * the super block to reflect the freed blocks: sector size |
| 512 | * worst case split in allocation btrees per extent assuming 4 extents: |
| 513 | * 4 exts * 2 trees * (2 * max depth - 1) * block size |
| 514 | */ |
| 515 | STATIC uint |
| 516 | xfs_calc_attrinval_reservation( |
| 517 | struct xfs_mount *mp) |
| 518 | { |
| 519 | return MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + |
| 520 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), |
| 521 | XFS_FSB_TO_B(mp, 1))), |
| 522 | (xfs_calc_buf_res(9, mp->m_sb.sb_sectsize) + |
| 523 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 4), |
| 524 | XFS_FSB_TO_B(mp, 1)))); |
| 525 | } |
| 526 | |
| 527 | /* |
| 528 | * Setting an attribute at mount time. |
| 529 | * the inode getting the attribute |
| 530 | * the superblock for allocations |
| 531 | * the agfs extents are allocated from |
| 532 | * the attribute btree * max depth |
| 533 | * the inode allocation btree |
| 534 | * Since attribute transaction space is dependent on the size of the attribute, |
| 535 | * the calculation is done partially at mount time and partially at runtime(see |
| 536 | * below). |
| 537 | */ |
| 538 | STATIC uint |
| 539 | xfs_calc_attrsetm_reservation( |
| 540 | struct xfs_mount *mp) |
| 541 | { |
| 542 | return XFS_DQUOT_LOGRES(mp) + |
| 543 | xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + |
| 544 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
| 545 | xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, XFS_FSB_TO_B(mp, 1)); |
| 546 | } |
| 547 | |
| 548 | /* |
| 549 | * Setting an attribute at runtime, transaction space unit per block. |
| 550 | * the superblock for allocations: sector size |
| 551 | * the inode bmap btree could join or split: max depth * block size |
| 552 | * Since the runtime attribute transaction space is dependent on the total |
| 553 | * blocks needed for the 1st bmap, here we calculate out the space unit for |
| 554 | * one block so that the caller could figure out the total space according |
| 555 | * to the attibute extent length in blocks by: ext * XFS_ATTRSETRT_LOG_RES(mp). |
| 556 | */ |
| 557 | STATIC uint |
| 558 | xfs_calc_attrsetrt_reservation( |
| 559 | struct xfs_mount *mp) |
| 560 | { |
| 561 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize) + |
| 562 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK), |
| 563 | XFS_FSB_TO_B(mp, 1)); |
| 564 | } |
| 565 | |
| 566 | /* |
| 567 | * Removing an attribute. |
| 568 | * the inode: inode size |
| 569 | * the attribute btree could join: max depth * block size |
| 570 | * the inode bmap btree could join or split: max depth * block size |
| 571 | * And the bmap_finish transaction can free the attr blocks freed giving: |
| 572 | * the agf for the ag in which the blocks live: 2 * sector size |
| 573 | * the agfl for the ag in which the blocks live: 2 * sector size |
| 574 | * the superblock for the free block count: sector size |
| 575 | * the allocation btrees: 2 exts * 2 trees * (2 * max depth - 1) * block size |
| 576 | */ |
| 577 | STATIC uint |
| 578 | xfs_calc_attrrm_reservation( |
| 579 | struct xfs_mount *mp) |
| 580 | { |
| 581 | return XFS_DQUOT_LOGRES(mp) + |
| 582 | MAX((xfs_calc_buf_res(1, mp->m_sb.sb_inodesize) + |
| 583 | xfs_calc_buf_res(XFS_DA_NODE_MAXDEPTH, |
| 584 | XFS_FSB_TO_B(mp, 1)) + |
| 585 | (uint)XFS_FSB_TO_B(mp, |
| 586 | XFS_BM_MAXLEVELS(mp, XFS_ATTR_FORK)) + |
| 587 | xfs_calc_buf_res(XFS_BM_MAXLEVELS(mp, XFS_DATA_FORK), 0)), |
| 588 | (xfs_calc_buf_res(5, mp->m_sb.sb_sectsize) + |
| 589 | xfs_calc_buf_res(XFS_ALLOCFREE_LOG_COUNT(mp, 2), |
| 590 | XFS_FSB_TO_B(mp, 1)))); |
| 591 | } |
| 592 | |
| 593 | /* |
| 594 | * Clearing a bad agino number in an agi hash bucket. |
| 595 | */ |
| 596 | STATIC uint |
| 597 | xfs_calc_clear_agi_bucket_reservation( |
| 598 | struct xfs_mount *mp) |
| 599 | { |
| 600 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); |
| 601 | } |
| 602 | |
| 603 | /* |
| 604 | * Clearing the quotaflags in the superblock. |
| 605 | * the super block for changing quota flags: sector size |
| 606 | */ |
| 607 | STATIC uint |
| 608 | xfs_calc_qm_sbchange_reservation( |
| 609 | struct xfs_mount *mp) |
| 610 | { |
| 611 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); |
| 612 | } |
| 613 | |
| 614 | /* |
| 615 | * Adjusting quota limits. |
| 616 | * the xfs_disk_dquot_t: sizeof(struct xfs_disk_dquot) |
| 617 | */ |
| 618 | STATIC uint |
| 619 | xfs_calc_qm_setqlim_reservation( |
| 620 | struct xfs_mount *mp) |
| 621 | { |
| 622 | return xfs_calc_buf_res(1, sizeof(struct xfs_disk_dquot)); |
| 623 | } |
| 624 | |
| 625 | /* |
| 626 | * Allocating quota on disk if needed. |
| 627 | * the write transaction log space: XFS_WRITE_LOG_RES(mp) |
| 628 | * the unit of quota allocation: one system block size |
| 629 | */ |
| 630 | STATIC uint |
| 631 | xfs_calc_qm_dqalloc_reservation( |
| 632 | struct xfs_mount *mp) |
| 633 | { |
| 634 | return XFS_WRITE_LOG_RES(mp) + |
| 635 | xfs_calc_buf_res(1, |
| 636 | XFS_FSB_TO_B(mp, XFS_DQUOT_CLUSTER_SIZE_FSB) - 1); |
| 637 | } |
| 638 | |
| 639 | /* |
| 640 | * Turning off quotas. |
| 641 | * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 |
| 642 | * the superblock for the quota flags: sector size |
| 643 | */ |
| 644 | STATIC uint |
| 645 | xfs_calc_qm_quotaoff_reservation( |
| 646 | struct xfs_mount *mp) |
| 647 | { |
| 648 | return sizeof(struct xfs_qoff_logitem) * 2 + |
| 649 | xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); |
| 650 | } |
| 651 | |
| 652 | /* |
| 653 | * End of turning off quotas. |
| 654 | * the xfs_qoff_logitem_t: sizeof(struct xfs_qoff_logitem) * 2 |
| 655 | */ |
| 656 | STATIC uint |
| 657 | xfs_calc_qm_quotaoff_end_reservation( |
| 658 | struct xfs_mount *mp) |
| 659 | { |
| 660 | return sizeof(struct xfs_qoff_logitem) * 2; |
| 661 | } |
| 662 | |
| 663 | /* |
| 664 | * Syncing the incore super block changes to disk. |
| 665 | * the super block to reflect the changes: sector size |
| 666 | */ |
| 667 | STATIC uint |
| 668 | xfs_calc_sb_reservation( |
| 669 | struct xfs_mount *mp) |
| 670 | { |
| 671 | return xfs_calc_buf_res(1, mp->m_sb.sb_sectsize); |
| 672 | } |
| 673 | |
| 674 | /* |
| 675 | * Initialize the precomputed transaction reservation values |
| 676 | * in the mount structure. |
| 677 | */ |
| 678 | void |
| 679 | xfs_trans_init( |
| 680 | struct xfs_mount *mp) |
| 681 | { |
| 682 | struct xfs_trans_reservations *resp = &mp->m_reservations; |
| 683 | |
| 684 | resp->tr_write = xfs_calc_write_reservation(mp); |
| 685 | resp->tr_itruncate = xfs_calc_itruncate_reservation(mp); |
| 686 | resp->tr_rename = xfs_calc_rename_reservation(mp); |
| 687 | resp->tr_link = xfs_calc_link_reservation(mp); |
| 688 | resp->tr_remove = xfs_calc_remove_reservation(mp); |
| 689 | resp->tr_symlink = xfs_calc_symlink_reservation(mp); |
| 690 | resp->tr_create = xfs_calc_create_reservation(mp); |
| 691 | resp->tr_mkdir = xfs_calc_mkdir_reservation(mp); |
| 692 | resp->tr_ifree = xfs_calc_ifree_reservation(mp); |
| 693 | resp->tr_ichange = xfs_calc_ichange_reservation(mp); |
| 694 | resp->tr_growdata = xfs_calc_growdata_reservation(mp); |
| 695 | resp->tr_swrite = xfs_calc_swrite_reservation(mp); |
| 696 | resp->tr_writeid = xfs_calc_writeid_reservation(mp); |
| 697 | resp->tr_addafork = xfs_calc_addafork_reservation(mp); |
| 698 | resp->tr_attrinval = xfs_calc_attrinval_reservation(mp); |
| 699 | resp->tr_attrsetm = xfs_calc_attrsetm_reservation(mp); |
| 700 | resp->tr_attrsetrt = xfs_calc_attrsetrt_reservation(mp); |
| 701 | resp->tr_attrrm = xfs_calc_attrrm_reservation(mp); |
| 702 | resp->tr_clearagi = xfs_calc_clear_agi_bucket_reservation(mp); |
| 703 | resp->tr_growrtalloc = xfs_calc_growrtalloc_reservation(mp); |
| 704 | resp->tr_growrtzero = xfs_calc_growrtzero_reservation(mp); |
| 705 | resp->tr_growrtfree = xfs_calc_growrtfree_reservation(mp); |
| 706 | resp->tr_qm_sbchange = xfs_calc_qm_sbchange_reservation(mp); |
| 707 | resp->tr_qm_setqlim = xfs_calc_qm_setqlim_reservation(mp); |
| 708 | resp->tr_qm_dqalloc = xfs_calc_qm_dqalloc_reservation(mp); |
| 709 | resp->tr_qm_quotaoff = xfs_calc_qm_quotaoff_reservation(mp); |
| 710 | resp->tr_qm_equotaoff = xfs_calc_qm_quotaoff_end_reservation(mp); |
| 711 | resp->tr_sb = xfs_calc_sb_reservation(mp); |
| 712 | } |
| 713 | |
| 714 | /* |
| 715 | * This routine is called to allocate a transaction structure. |
| 716 | * The type parameter indicates the type of the transaction. These |
| 717 | * are enumerated in xfs_trans.h. |
| 718 | * |
| 719 | * Dynamically allocate the transaction structure from the transaction |
| 720 | * zone, initialize it, and return it to the caller. |
| 721 | */ |
| 722 | xfs_trans_t * |
| 723 | xfs_trans_alloc( |
| 724 | xfs_mount_t *mp, |
| 725 | uint type) |
| 726 | { |
| 727 | xfs_trans_t *tp; |
| 728 | |
| 729 | sb_start_intwrite(mp->m_super); |
| 730 | tp = _xfs_trans_alloc(mp, type, KM_SLEEP); |
| 731 | tp->t_flags |= XFS_TRANS_FREEZE_PROT; |
| 732 | return tp; |
| 733 | } |
| 734 | |
| 735 | xfs_trans_t * |
| 736 | _xfs_trans_alloc( |
| 737 | xfs_mount_t *mp, |
| 738 | uint type, |
| 739 | xfs_km_flags_t memflags) |
| 740 | { |
| 741 | xfs_trans_t *tp; |
| 742 | |
| 743 | WARN_ON(mp->m_super->s_writers.frozen == SB_FREEZE_COMPLETE); |
| 744 | atomic_inc(&mp->m_active_trans); |
| 745 | |
| 746 | tp = kmem_zone_zalloc(xfs_trans_zone, memflags); |
| 747 | tp->t_magic = XFS_TRANS_MAGIC; |
| 748 | tp->t_type = type; |
| 749 | tp->t_mountp = mp; |
| 750 | INIT_LIST_HEAD(&tp->t_items); |
| 751 | INIT_LIST_HEAD(&tp->t_busy); |
| 752 | return tp; |
| 753 | } |
| 754 | |
| 755 | /* |
| 756 | * Free the transaction structure. If there is more clean up |
| 757 | * to do when the structure is freed, add it here. |
| 758 | */ |
| 759 | STATIC void |
| 760 | xfs_trans_free( |
| 761 | struct xfs_trans *tp) |
| 762 | { |
| 763 | xfs_extent_busy_sort(&tp->t_busy); |
| 764 | xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false); |
| 765 | |
| 766 | atomic_dec(&tp->t_mountp->m_active_trans); |
| 767 | if (tp->t_flags & XFS_TRANS_FREEZE_PROT) |
| 768 | sb_end_intwrite(tp->t_mountp->m_super); |
| 769 | xfs_trans_free_dqinfo(tp); |
| 770 | kmem_zone_free(xfs_trans_zone, tp); |
| 771 | } |
| 772 | |
| 773 | /* |
| 774 | * This is called to create a new transaction which will share the |
| 775 | * permanent log reservation of the given transaction. The remaining |
| 776 | * unused block and rt extent reservations are also inherited. This |
| 777 | * implies that the original transaction is no longer allowed to allocate |
| 778 | * blocks. Locks and log items, however, are no inherited. They must |
| 779 | * be added to the new transaction explicitly. |
| 780 | */ |
| 781 | xfs_trans_t * |
| 782 | xfs_trans_dup( |
| 783 | xfs_trans_t *tp) |
| 784 | { |
| 785 | xfs_trans_t *ntp; |
| 786 | |
| 787 | ntp = kmem_zone_zalloc(xfs_trans_zone, KM_SLEEP); |
| 788 | |
| 789 | /* |
| 790 | * Initialize the new transaction structure. |
| 791 | */ |
| 792 | ntp->t_magic = XFS_TRANS_MAGIC; |
| 793 | ntp->t_type = tp->t_type; |
| 794 | ntp->t_mountp = tp->t_mountp; |
| 795 | INIT_LIST_HEAD(&ntp->t_items); |
| 796 | INIT_LIST_HEAD(&ntp->t_busy); |
| 797 | |
| 798 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
| 799 | ASSERT(tp->t_ticket != NULL); |
| 800 | |
| 801 | ntp->t_flags = XFS_TRANS_PERM_LOG_RES | |
| 802 | (tp->t_flags & XFS_TRANS_RESERVE) | |
| 803 | (tp->t_flags & XFS_TRANS_FREEZE_PROT); |
| 804 | /* We gave our writer reference to the new transaction */ |
| 805 | tp->t_flags &= ~XFS_TRANS_FREEZE_PROT; |
| 806 | ntp->t_ticket = xfs_log_ticket_get(tp->t_ticket); |
| 807 | ntp->t_blk_res = tp->t_blk_res - tp->t_blk_res_used; |
| 808 | tp->t_blk_res = tp->t_blk_res_used; |
| 809 | ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used; |
| 810 | tp->t_rtx_res = tp->t_rtx_res_used; |
| 811 | ntp->t_pflags = tp->t_pflags; |
| 812 | |
| 813 | xfs_trans_dup_dqinfo(tp, ntp); |
| 814 | |
| 815 | atomic_inc(&tp->t_mountp->m_active_trans); |
| 816 | return ntp; |
| 817 | } |
| 818 | |
| 819 | /* |
| 820 | * This is called to reserve free disk blocks and log space for the |
| 821 | * given transaction. This must be done before allocating any resources |
| 822 | * within the transaction. |
| 823 | * |
| 824 | * This will return ENOSPC if there are not enough blocks available. |
| 825 | * It will sleep waiting for available log space. |
| 826 | * The only valid value for the flags parameter is XFS_RES_LOG_PERM, which |
| 827 | * is used by long running transactions. If any one of the reservations |
| 828 | * fails then they will all be backed out. |
| 829 | * |
| 830 | * This does not do quota reservations. That typically is done by the |
| 831 | * caller afterwards. |
| 832 | */ |
| 833 | int |
| 834 | xfs_trans_reserve( |
| 835 | xfs_trans_t *tp, |
| 836 | uint blocks, |
| 837 | uint logspace, |
| 838 | uint rtextents, |
| 839 | uint flags, |
| 840 | uint logcount) |
| 841 | { |
| 842 | int error = 0; |
| 843 | int rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; |
| 844 | |
| 845 | /* Mark this thread as being in a transaction */ |
| 846 | current_set_flags_nested(&tp->t_pflags, PF_FSTRANS); |
| 847 | |
| 848 | /* |
| 849 | * Attempt to reserve the needed disk blocks by decrementing |
| 850 | * the number needed from the number available. This will |
| 851 | * fail if the count would go below zero. |
| 852 | */ |
| 853 | if (blocks > 0) { |
| 854 | error = xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, |
| 855 | -((int64_t)blocks), rsvd); |
| 856 | if (error != 0) { |
| 857 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
| 858 | return (XFS_ERROR(ENOSPC)); |
| 859 | } |
| 860 | tp->t_blk_res += blocks; |
| 861 | } |
| 862 | |
| 863 | /* |
| 864 | * Reserve the log space needed for this transaction. |
| 865 | */ |
| 866 | if (logspace > 0) { |
| 867 | bool permanent = false; |
| 868 | |
| 869 | ASSERT(tp->t_log_res == 0 || tp->t_log_res == logspace); |
| 870 | ASSERT(tp->t_log_count == 0 || tp->t_log_count == logcount); |
| 871 | |
| 872 | if (flags & XFS_TRANS_PERM_LOG_RES) { |
| 873 | tp->t_flags |= XFS_TRANS_PERM_LOG_RES; |
| 874 | permanent = true; |
| 875 | } else { |
| 876 | ASSERT(tp->t_ticket == NULL); |
| 877 | ASSERT(!(tp->t_flags & XFS_TRANS_PERM_LOG_RES)); |
| 878 | } |
| 879 | |
| 880 | if (tp->t_ticket != NULL) { |
| 881 | ASSERT(flags & XFS_TRANS_PERM_LOG_RES); |
| 882 | error = xfs_log_regrant(tp->t_mountp, tp->t_ticket); |
| 883 | } else { |
| 884 | error = xfs_log_reserve(tp->t_mountp, logspace, |
| 885 | logcount, &tp->t_ticket, |
| 886 | XFS_TRANSACTION, permanent, |
| 887 | tp->t_type); |
| 888 | } |
| 889 | |
| 890 | if (error) |
| 891 | goto undo_blocks; |
| 892 | |
| 893 | tp->t_log_res = logspace; |
| 894 | tp->t_log_count = logcount; |
| 895 | } |
| 896 | |
| 897 | /* |
| 898 | * Attempt to reserve the needed realtime extents by decrementing |
| 899 | * the number needed from the number available. This will |
| 900 | * fail if the count would go below zero. |
| 901 | */ |
| 902 | if (rtextents > 0) { |
| 903 | error = xfs_mod_incore_sb(tp->t_mountp, XFS_SBS_FREXTENTS, |
| 904 | -((int64_t)rtextents), rsvd); |
| 905 | if (error) { |
| 906 | error = XFS_ERROR(ENOSPC); |
| 907 | goto undo_log; |
| 908 | } |
| 909 | tp->t_rtx_res += rtextents; |
| 910 | } |
| 911 | |
| 912 | return 0; |
| 913 | |
| 914 | /* |
| 915 | * Error cases jump to one of these labels to undo any |
| 916 | * reservations which have already been performed. |
| 917 | */ |
| 918 | undo_log: |
| 919 | if (logspace > 0) { |
| 920 | int log_flags; |
| 921 | |
| 922 | if (flags & XFS_TRANS_PERM_LOG_RES) { |
| 923 | log_flags = XFS_LOG_REL_PERM_RESERV; |
| 924 | } else { |
| 925 | log_flags = 0; |
| 926 | } |
| 927 | xfs_log_done(tp->t_mountp, tp->t_ticket, NULL, log_flags); |
| 928 | tp->t_ticket = NULL; |
| 929 | tp->t_log_res = 0; |
| 930 | tp->t_flags &= ~XFS_TRANS_PERM_LOG_RES; |
| 931 | } |
| 932 | |
| 933 | undo_blocks: |
| 934 | if (blocks > 0) { |
| 935 | xfs_icsb_modify_counters(tp->t_mountp, XFS_SBS_FDBLOCKS, |
| 936 | (int64_t)blocks, rsvd); |
| 937 | tp->t_blk_res = 0; |
| 938 | } |
| 939 | |
| 940 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
| 941 | |
| 942 | return error; |
| 943 | } |
| 944 | |
| 945 | /* |
| 946 | * Record the indicated change to the given field for application |
| 947 | * to the file system's superblock when the transaction commits. |
| 948 | * For now, just store the change in the transaction structure. |
| 949 | * |
| 950 | * Mark the transaction structure to indicate that the superblock |
| 951 | * needs to be updated before committing. |
| 952 | * |
| 953 | * Because we may not be keeping track of allocated/free inodes and |
| 954 | * used filesystem blocks in the superblock, we do not mark the |
| 955 | * superblock dirty in this transaction if we modify these fields. |
| 956 | * We still need to update the transaction deltas so that they get |
| 957 | * applied to the incore superblock, but we don't want them to |
| 958 | * cause the superblock to get locked and logged if these are the |
| 959 | * only fields in the superblock that the transaction modifies. |
| 960 | */ |
| 961 | void |
| 962 | xfs_trans_mod_sb( |
| 963 | xfs_trans_t *tp, |
| 964 | uint field, |
| 965 | int64_t delta) |
| 966 | { |
| 967 | uint32_t flags = (XFS_TRANS_DIRTY|XFS_TRANS_SB_DIRTY); |
| 968 | xfs_mount_t *mp = tp->t_mountp; |
| 969 | |
| 970 | switch (field) { |
| 971 | case XFS_TRANS_SB_ICOUNT: |
| 972 | tp->t_icount_delta += delta; |
| 973 | if (xfs_sb_version_haslazysbcount(&mp->m_sb)) |
| 974 | flags &= ~XFS_TRANS_SB_DIRTY; |
| 975 | break; |
| 976 | case XFS_TRANS_SB_IFREE: |
| 977 | tp->t_ifree_delta += delta; |
| 978 | if (xfs_sb_version_haslazysbcount(&mp->m_sb)) |
| 979 | flags &= ~XFS_TRANS_SB_DIRTY; |
| 980 | break; |
| 981 | case XFS_TRANS_SB_FDBLOCKS: |
| 982 | /* |
| 983 | * Track the number of blocks allocated in the |
| 984 | * transaction. Make sure it does not exceed the |
| 985 | * number reserved. |
| 986 | */ |
| 987 | if (delta < 0) { |
| 988 | tp->t_blk_res_used += (uint)-delta; |
| 989 | ASSERT(tp->t_blk_res_used <= tp->t_blk_res); |
| 990 | } |
| 991 | tp->t_fdblocks_delta += delta; |
| 992 | if (xfs_sb_version_haslazysbcount(&mp->m_sb)) |
| 993 | flags &= ~XFS_TRANS_SB_DIRTY; |
| 994 | break; |
| 995 | case XFS_TRANS_SB_RES_FDBLOCKS: |
| 996 | /* |
| 997 | * The allocation has already been applied to the |
| 998 | * in-core superblock's counter. This should only |
| 999 | * be applied to the on-disk superblock. |
| 1000 | */ |
| 1001 | ASSERT(delta < 0); |
| 1002 | tp->t_res_fdblocks_delta += delta; |
| 1003 | if (xfs_sb_version_haslazysbcount(&mp->m_sb)) |
| 1004 | flags &= ~XFS_TRANS_SB_DIRTY; |
| 1005 | break; |
| 1006 | case XFS_TRANS_SB_FREXTENTS: |
| 1007 | /* |
| 1008 | * Track the number of blocks allocated in the |
| 1009 | * transaction. Make sure it does not exceed the |
| 1010 | * number reserved. |
| 1011 | */ |
| 1012 | if (delta < 0) { |
| 1013 | tp->t_rtx_res_used += (uint)-delta; |
| 1014 | ASSERT(tp->t_rtx_res_used <= tp->t_rtx_res); |
| 1015 | } |
| 1016 | tp->t_frextents_delta += delta; |
| 1017 | break; |
| 1018 | case XFS_TRANS_SB_RES_FREXTENTS: |
| 1019 | /* |
| 1020 | * The allocation has already been applied to the |
| 1021 | * in-core superblock's counter. This should only |
| 1022 | * be applied to the on-disk superblock. |
| 1023 | */ |
| 1024 | ASSERT(delta < 0); |
| 1025 | tp->t_res_frextents_delta += delta; |
| 1026 | break; |
| 1027 | case XFS_TRANS_SB_DBLOCKS: |
| 1028 | ASSERT(delta > 0); |
| 1029 | tp->t_dblocks_delta += delta; |
| 1030 | break; |
| 1031 | case XFS_TRANS_SB_AGCOUNT: |
| 1032 | ASSERT(delta > 0); |
| 1033 | tp->t_agcount_delta += delta; |
| 1034 | break; |
| 1035 | case XFS_TRANS_SB_IMAXPCT: |
| 1036 | tp->t_imaxpct_delta += delta; |
| 1037 | break; |
| 1038 | case XFS_TRANS_SB_REXTSIZE: |
| 1039 | tp->t_rextsize_delta += delta; |
| 1040 | break; |
| 1041 | case XFS_TRANS_SB_RBMBLOCKS: |
| 1042 | tp->t_rbmblocks_delta += delta; |
| 1043 | break; |
| 1044 | case XFS_TRANS_SB_RBLOCKS: |
| 1045 | tp->t_rblocks_delta += delta; |
| 1046 | break; |
| 1047 | case XFS_TRANS_SB_REXTENTS: |
| 1048 | tp->t_rextents_delta += delta; |
| 1049 | break; |
| 1050 | case XFS_TRANS_SB_REXTSLOG: |
| 1051 | tp->t_rextslog_delta += delta; |
| 1052 | break; |
| 1053 | default: |
| 1054 | ASSERT(0); |
| 1055 | return; |
| 1056 | } |
| 1057 | |
| 1058 | tp->t_flags |= flags; |
| 1059 | } |
| 1060 | |
| 1061 | /* |
| 1062 | * xfs_trans_apply_sb_deltas() is called from the commit code |
| 1063 | * to bring the superblock buffer into the current transaction |
| 1064 | * and modify it as requested by earlier calls to xfs_trans_mod_sb(). |
| 1065 | * |
| 1066 | * For now we just look at each field allowed to change and change |
| 1067 | * it if necessary. |
| 1068 | */ |
| 1069 | STATIC void |
| 1070 | xfs_trans_apply_sb_deltas( |
| 1071 | xfs_trans_t *tp) |
| 1072 | { |
| 1073 | xfs_dsb_t *sbp; |
| 1074 | xfs_buf_t *bp; |
| 1075 | int whole = 0; |
| 1076 | |
| 1077 | bp = xfs_trans_getsb(tp, tp->t_mountp, 0); |
| 1078 | sbp = XFS_BUF_TO_SBP(bp); |
| 1079 | |
| 1080 | /* |
| 1081 | * Check that superblock mods match the mods made to AGF counters. |
| 1082 | */ |
| 1083 | ASSERT((tp->t_fdblocks_delta + tp->t_res_fdblocks_delta) == |
| 1084 | (tp->t_ag_freeblks_delta + tp->t_ag_flist_delta + |
| 1085 | tp->t_ag_btree_delta)); |
| 1086 | |
| 1087 | /* |
| 1088 | * Only update the superblock counters if we are logging them |
| 1089 | */ |
| 1090 | if (!xfs_sb_version_haslazysbcount(&(tp->t_mountp->m_sb))) { |
| 1091 | if (tp->t_icount_delta) |
| 1092 | be64_add_cpu(&sbp->sb_icount, tp->t_icount_delta); |
| 1093 | if (tp->t_ifree_delta) |
| 1094 | be64_add_cpu(&sbp->sb_ifree, tp->t_ifree_delta); |
| 1095 | if (tp->t_fdblocks_delta) |
| 1096 | be64_add_cpu(&sbp->sb_fdblocks, tp->t_fdblocks_delta); |
| 1097 | if (tp->t_res_fdblocks_delta) |
| 1098 | be64_add_cpu(&sbp->sb_fdblocks, tp->t_res_fdblocks_delta); |
| 1099 | } |
| 1100 | |
| 1101 | if (tp->t_frextents_delta) |
| 1102 | be64_add_cpu(&sbp->sb_frextents, tp->t_frextents_delta); |
| 1103 | if (tp->t_res_frextents_delta) |
| 1104 | be64_add_cpu(&sbp->sb_frextents, tp->t_res_frextents_delta); |
| 1105 | |
| 1106 | if (tp->t_dblocks_delta) { |
| 1107 | be64_add_cpu(&sbp->sb_dblocks, tp->t_dblocks_delta); |
| 1108 | whole = 1; |
| 1109 | } |
| 1110 | if (tp->t_agcount_delta) { |
| 1111 | be32_add_cpu(&sbp->sb_agcount, tp->t_agcount_delta); |
| 1112 | whole = 1; |
| 1113 | } |
| 1114 | if (tp->t_imaxpct_delta) { |
| 1115 | sbp->sb_imax_pct += tp->t_imaxpct_delta; |
| 1116 | whole = 1; |
| 1117 | } |
| 1118 | if (tp->t_rextsize_delta) { |
| 1119 | be32_add_cpu(&sbp->sb_rextsize, tp->t_rextsize_delta); |
| 1120 | whole = 1; |
| 1121 | } |
| 1122 | if (tp->t_rbmblocks_delta) { |
| 1123 | be32_add_cpu(&sbp->sb_rbmblocks, tp->t_rbmblocks_delta); |
| 1124 | whole = 1; |
| 1125 | } |
| 1126 | if (tp->t_rblocks_delta) { |
| 1127 | be64_add_cpu(&sbp->sb_rblocks, tp->t_rblocks_delta); |
| 1128 | whole = 1; |
| 1129 | } |
| 1130 | if (tp->t_rextents_delta) { |
| 1131 | be64_add_cpu(&sbp->sb_rextents, tp->t_rextents_delta); |
| 1132 | whole = 1; |
| 1133 | } |
| 1134 | if (tp->t_rextslog_delta) { |
| 1135 | sbp->sb_rextslog += tp->t_rextslog_delta; |
| 1136 | whole = 1; |
| 1137 | } |
| 1138 | |
| 1139 | if (whole) |
| 1140 | /* |
| 1141 | * Log the whole thing, the fields are noncontiguous. |
| 1142 | */ |
| 1143 | xfs_trans_log_buf(tp, bp, 0, sizeof(xfs_dsb_t) - 1); |
| 1144 | else |
| 1145 | /* |
| 1146 | * Since all the modifiable fields are contiguous, we |
| 1147 | * can get away with this. |
| 1148 | */ |
| 1149 | xfs_trans_log_buf(tp, bp, offsetof(xfs_dsb_t, sb_icount), |
| 1150 | offsetof(xfs_dsb_t, sb_frextents) + |
| 1151 | sizeof(sbp->sb_frextents) - 1); |
| 1152 | } |
| 1153 | |
| 1154 | /* |
| 1155 | * xfs_trans_unreserve_and_mod_sb() is called to release unused reservations |
| 1156 | * and apply superblock counter changes to the in-core superblock. The |
| 1157 | * t_res_fdblocks_delta and t_res_frextents_delta fields are explicitly NOT |
| 1158 | * applied to the in-core superblock. The idea is that that has already been |
| 1159 | * done. |
| 1160 | * |
| 1161 | * This is done efficiently with a single call to xfs_mod_incore_sb_batch(). |
| 1162 | * However, we have to ensure that we only modify each superblock field only |
| 1163 | * once because the application of the delta values may not be atomic. That can |
| 1164 | * lead to ENOSPC races occurring if we have two separate modifcations of the |
| 1165 | * free space counter to put back the entire reservation and then take away |
| 1166 | * what we used. |
| 1167 | * |
| 1168 | * If we are not logging superblock counters, then the inode allocated/free and |
| 1169 | * used block counts are not updated in the on disk superblock. In this case, |
| 1170 | * XFS_TRANS_SB_DIRTY will not be set when the transaction is updated but we |
| 1171 | * still need to update the incore superblock with the changes. |
| 1172 | */ |
| 1173 | void |
| 1174 | xfs_trans_unreserve_and_mod_sb( |
| 1175 | xfs_trans_t *tp) |
| 1176 | { |
| 1177 | xfs_mod_sb_t msb[9]; /* If you add cases, add entries */ |
| 1178 | xfs_mod_sb_t *msbp; |
| 1179 | xfs_mount_t *mp = tp->t_mountp; |
| 1180 | /* REFERENCED */ |
| 1181 | int error; |
| 1182 | int rsvd; |
| 1183 | int64_t blkdelta = 0; |
| 1184 | int64_t rtxdelta = 0; |
| 1185 | int64_t idelta = 0; |
| 1186 | int64_t ifreedelta = 0; |
| 1187 | |
| 1188 | msbp = msb; |
| 1189 | rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0; |
| 1190 | |
| 1191 | /* calculate deltas */ |
| 1192 | if (tp->t_blk_res > 0) |
| 1193 | blkdelta = tp->t_blk_res; |
| 1194 | if ((tp->t_fdblocks_delta != 0) && |
| 1195 | (xfs_sb_version_haslazysbcount(&mp->m_sb) || |
| 1196 | (tp->t_flags & XFS_TRANS_SB_DIRTY))) |
| 1197 | blkdelta += tp->t_fdblocks_delta; |
| 1198 | |
| 1199 | if (tp->t_rtx_res > 0) |
| 1200 | rtxdelta = tp->t_rtx_res; |
| 1201 | if ((tp->t_frextents_delta != 0) && |
| 1202 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) |
| 1203 | rtxdelta += tp->t_frextents_delta; |
| 1204 | |
| 1205 | if (xfs_sb_version_haslazysbcount(&mp->m_sb) || |
| 1206 | (tp->t_flags & XFS_TRANS_SB_DIRTY)) { |
| 1207 | idelta = tp->t_icount_delta; |
| 1208 | ifreedelta = tp->t_ifree_delta; |
| 1209 | } |
| 1210 | |
| 1211 | /* apply the per-cpu counters */ |
| 1212 | if (blkdelta) { |
| 1213 | error = xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, |
| 1214 | blkdelta, rsvd); |
| 1215 | if (error) |
| 1216 | goto out; |
| 1217 | } |
| 1218 | |
| 1219 | if (idelta) { |
| 1220 | error = xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, |
| 1221 | idelta, rsvd); |
| 1222 | if (error) |
| 1223 | goto out_undo_fdblocks; |
| 1224 | } |
| 1225 | |
| 1226 | if (ifreedelta) { |
| 1227 | error = xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, |
| 1228 | ifreedelta, rsvd); |
| 1229 | if (error) |
| 1230 | goto out_undo_icount; |
| 1231 | } |
| 1232 | |
| 1233 | /* apply remaining deltas */ |
| 1234 | if (rtxdelta != 0) { |
| 1235 | msbp->msb_field = XFS_SBS_FREXTENTS; |
| 1236 | msbp->msb_delta = rtxdelta; |
| 1237 | msbp++; |
| 1238 | } |
| 1239 | |
| 1240 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) { |
| 1241 | if (tp->t_dblocks_delta != 0) { |
| 1242 | msbp->msb_field = XFS_SBS_DBLOCKS; |
| 1243 | msbp->msb_delta = tp->t_dblocks_delta; |
| 1244 | msbp++; |
| 1245 | } |
| 1246 | if (tp->t_agcount_delta != 0) { |
| 1247 | msbp->msb_field = XFS_SBS_AGCOUNT; |
| 1248 | msbp->msb_delta = tp->t_agcount_delta; |
| 1249 | msbp++; |
| 1250 | } |
| 1251 | if (tp->t_imaxpct_delta != 0) { |
| 1252 | msbp->msb_field = XFS_SBS_IMAX_PCT; |
| 1253 | msbp->msb_delta = tp->t_imaxpct_delta; |
| 1254 | msbp++; |
| 1255 | } |
| 1256 | if (tp->t_rextsize_delta != 0) { |
| 1257 | msbp->msb_field = XFS_SBS_REXTSIZE; |
| 1258 | msbp->msb_delta = tp->t_rextsize_delta; |
| 1259 | msbp++; |
| 1260 | } |
| 1261 | if (tp->t_rbmblocks_delta != 0) { |
| 1262 | msbp->msb_field = XFS_SBS_RBMBLOCKS; |
| 1263 | msbp->msb_delta = tp->t_rbmblocks_delta; |
| 1264 | msbp++; |
| 1265 | } |
| 1266 | if (tp->t_rblocks_delta != 0) { |
| 1267 | msbp->msb_field = XFS_SBS_RBLOCKS; |
| 1268 | msbp->msb_delta = tp->t_rblocks_delta; |
| 1269 | msbp++; |
| 1270 | } |
| 1271 | if (tp->t_rextents_delta != 0) { |
| 1272 | msbp->msb_field = XFS_SBS_REXTENTS; |
| 1273 | msbp->msb_delta = tp->t_rextents_delta; |
| 1274 | msbp++; |
| 1275 | } |
| 1276 | if (tp->t_rextslog_delta != 0) { |
| 1277 | msbp->msb_field = XFS_SBS_REXTSLOG; |
| 1278 | msbp->msb_delta = tp->t_rextslog_delta; |
| 1279 | msbp++; |
| 1280 | } |
| 1281 | } |
| 1282 | |
| 1283 | /* |
| 1284 | * If we need to change anything, do it. |
| 1285 | */ |
| 1286 | if (msbp > msb) { |
| 1287 | error = xfs_mod_incore_sb_batch(tp->t_mountp, msb, |
| 1288 | (uint)(msbp - msb), rsvd); |
| 1289 | if (error) |
| 1290 | goto out_undo_ifreecount; |
| 1291 | } |
| 1292 | |
| 1293 | return; |
| 1294 | |
| 1295 | out_undo_ifreecount: |
| 1296 | if (ifreedelta) |
| 1297 | xfs_icsb_modify_counters(mp, XFS_SBS_IFREE, -ifreedelta, rsvd); |
| 1298 | out_undo_icount: |
| 1299 | if (idelta) |
| 1300 | xfs_icsb_modify_counters(mp, XFS_SBS_ICOUNT, -idelta, rsvd); |
| 1301 | out_undo_fdblocks: |
| 1302 | if (blkdelta) |
| 1303 | xfs_icsb_modify_counters(mp, XFS_SBS_FDBLOCKS, -blkdelta, rsvd); |
| 1304 | out: |
| 1305 | ASSERT(error == 0); |
| 1306 | return; |
| 1307 | } |
| 1308 | |
| 1309 | /* |
| 1310 | * Add the given log item to the transaction's list of log items. |
| 1311 | * |
| 1312 | * The log item will now point to its new descriptor with its li_desc field. |
| 1313 | */ |
| 1314 | void |
| 1315 | xfs_trans_add_item( |
| 1316 | struct xfs_trans *tp, |
| 1317 | struct xfs_log_item *lip) |
| 1318 | { |
| 1319 | struct xfs_log_item_desc *lidp; |
| 1320 | |
| 1321 | ASSERT(lip->li_mountp == tp->t_mountp); |
| 1322 | ASSERT(lip->li_ailp == tp->t_mountp->m_ail); |
| 1323 | |
| 1324 | lidp = kmem_zone_zalloc(xfs_log_item_desc_zone, KM_SLEEP | KM_NOFS); |
| 1325 | |
| 1326 | lidp->lid_item = lip; |
| 1327 | lidp->lid_flags = 0; |
| 1328 | list_add_tail(&lidp->lid_trans, &tp->t_items); |
| 1329 | |
| 1330 | lip->li_desc = lidp; |
| 1331 | } |
| 1332 | |
| 1333 | STATIC void |
| 1334 | xfs_trans_free_item_desc( |
| 1335 | struct xfs_log_item_desc *lidp) |
| 1336 | { |
| 1337 | list_del_init(&lidp->lid_trans); |
| 1338 | kmem_zone_free(xfs_log_item_desc_zone, lidp); |
| 1339 | } |
| 1340 | |
| 1341 | /* |
| 1342 | * Unlink and free the given descriptor. |
| 1343 | */ |
| 1344 | void |
| 1345 | xfs_trans_del_item( |
| 1346 | struct xfs_log_item *lip) |
| 1347 | { |
| 1348 | xfs_trans_free_item_desc(lip->li_desc); |
| 1349 | lip->li_desc = NULL; |
| 1350 | } |
| 1351 | |
| 1352 | /* |
| 1353 | * Unlock all of the items of a transaction and free all the descriptors |
| 1354 | * of that transaction. |
| 1355 | */ |
| 1356 | void |
| 1357 | xfs_trans_free_items( |
| 1358 | struct xfs_trans *tp, |
| 1359 | xfs_lsn_t commit_lsn, |
| 1360 | int flags) |
| 1361 | { |
| 1362 | struct xfs_log_item_desc *lidp, *next; |
| 1363 | |
| 1364 | list_for_each_entry_safe(lidp, next, &tp->t_items, lid_trans) { |
| 1365 | struct xfs_log_item *lip = lidp->lid_item; |
| 1366 | |
| 1367 | lip->li_desc = NULL; |
| 1368 | |
| 1369 | if (commit_lsn != NULLCOMMITLSN) |
| 1370 | IOP_COMMITTING(lip, commit_lsn); |
| 1371 | if (flags & XFS_TRANS_ABORT) |
| 1372 | lip->li_flags |= XFS_LI_ABORTED; |
| 1373 | IOP_UNLOCK(lip); |
| 1374 | |
| 1375 | xfs_trans_free_item_desc(lidp); |
| 1376 | } |
| 1377 | } |
| 1378 | |
| 1379 | static inline void |
| 1380 | xfs_log_item_batch_insert( |
| 1381 | struct xfs_ail *ailp, |
| 1382 | struct xfs_ail_cursor *cur, |
| 1383 | struct xfs_log_item **log_items, |
| 1384 | int nr_items, |
| 1385 | xfs_lsn_t commit_lsn) |
| 1386 | { |
| 1387 | int i; |
| 1388 | |
| 1389 | spin_lock(&ailp->xa_lock); |
| 1390 | /* xfs_trans_ail_update_bulk drops ailp->xa_lock */ |
| 1391 | xfs_trans_ail_update_bulk(ailp, cur, log_items, nr_items, commit_lsn); |
| 1392 | |
| 1393 | for (i = 0; i < nr_items; i++) |
| 1394 | IOP_UNPIN(log_items[i], 0); |
| 1395 | } |
| 1396 | |
| 1397 | /* |
| 1398 | * Bulk operation version of xfs_trans_committed that takes a log vector of |
| 1399 | * items to insert into the AIL. This uses bulk AIL insertion techniques to |
| 1400 | * minimise lock traffic. |
| 1401 | * |
| 1402 | * If we are called with the aborted flag set, it is because a log write during |
| 1403 | * a CIL checkpoint commit has failed. In this case, all the items in the |
| 1404 | * checkpoint have already gone through IOP_COMMITED and IOP_UNLOCK, which |
| 1405 | * means that checkpoint commit abort handling is treated exactly the same |
| 1406 | * as an iclog write error even though we haven't started any IO yet. Hence in |
| 1407 | * this case all we need to do is IOP_COMMITTED processing, followed by an |
| 1408 | * IOP_UNPIN(aborted) call. |
| 1409 | * |
| 1410 | * The AIL cursor is used to optimise the insert process. If commit_lsn is not |
| 1411 | * at the end of the AIL, the insert cursor avoids the need to walk |
| 1412 | * the AIL to find the insertion point on every xfs_log_item_batch_insert() |
| 1413 | * call. This saves a lot of needless list walking and is a net win, even |
| 1414 | * though it slightly increases that amount of AIL lock traffic to set it up |
| 1415 | * and tear it down. |
| 1416 | */ |
| 1417 | void |
| 1418 | xfs_trans_committed_bulk( |
| 1419 | struct xfs_ail *ailp, |
| 1420 | struct xfs_log_vec *log_vector, |
| 1421 | xfs_lsn_t commit_lsn, |
| 1422 | int aborted) |
| 1423 | { |
| 1424 | #define LOG_ITEM_BATCH_SIZE 32 |
| 1425 | struct xfs_log_item *log_items[LOG_ITEM_BATCH_SIZE]; |
| 1426 | struct xfs_log_vec *lv; |
| 1427 | struct xfs_ail_cursor cur; |
| 1428 | int i = 0; |
| 1429 | |
| 1430 | spin_lock(&ailp->xa_lock); |
| 1431 | xfs_trans_ail_cursor_last(ailp, &cur, commit_lsn); |
| 1432 | spin_unlock(&ailp->xa_lock); |
| 1433 | |
| 1434 | /* unpin all the log items */ |
| 1435 | for (lv = log_vector; lv; lv = lv->lv_next ) { |
| 1436 | struct xfs_log_item *lip = lv->lv_item; |
| 1437 | xfs_lsn_t item_lsn; |
| 1438 | |
| 1439 | if (aborted) |
| 1440 | lip->li_flags |= XFS_LI_ABORTED; |
| 1441 | item_lsn = IOP_COMMITTED(lip, commit_lsn); |
| 1442 | |
| 1443 | /* item_lsn of -1 means the item needs no further processing */ |
| 1444 | if (XFS_LSN_CMP(item_lsn, (xfs_lsn_t)-1) == 0) |
| 1445 | continue; |
| 1446 | |
| 1447 | /* |
| 1448 | * if we are aborting the operation, no point in inserting the |
| 1449 | * object into the AIL as we are in a shutdown situation. |
| 1450 | */ |
| 1451 | if (aborted) { |
| 1452 | ASSERT(XFS_FORCED_SHUTDOWN(ailp->xa_mount)); |
| 1453 | IOP_UNPIN(lip, 1); |
| 1454 | continue; |
| 1455 | } |
| 1456 | |
| 1457 | if (item_lsn != commit_lsn) { |
| 1458 | |
| 1459 | /* |
| 1460 | * Not a bulk update option due to unusual item_lsn. |
| 1461 | * Push into AIL immediately, rechecking the lsn once |
| 1462 | * we have the ail lock. Then unpin the item. This does |
| 1463 | * not affect the AIL cursor the bulk insert path is |
| 1464 | * using. |
| 1465 | */ |
| 1466 | spin_lock(&ailp->xa_lock); |
| 1467 | if (XFS_LSN_CMP(item_lsn, lip->li_lsn) > 0) |
| 1468 | xfs_trans_ail_update(ailp, lip, item_lsn); |
| 1469 | else |
| 1470 | spin_unlock(&ailp->xa_lock); |
| 1471 | IOP_UNPIN(lip, 0); |
| 1472 | continue; |
| 1473 | } |
| 1474 | |
| 1475 | /* Item is a candidate for bulk AIL insert. */ |
| 1476 | log_items[i++] = lv->lv_item; |
| 1477 | if (i >= LOG_ITEM_BATCH_SIZE) { |
| 1478 | xfs_log_item_batch_insert(ailp, &cur, log_items, |
| 1479 | LOG_ITEM_BATCH_SIZE, commit_lsn); |
| 1480 | i = 0; |
| 1481 | } |
| 1482 | } |
| 1483 | |
| 1484 | /* make sure we insert the remainder! */ |
| 1485 | if (i) |
| 1486 | xfs_log_item_batch_insert(ailp, &cur, log_items, i, commit_lsn); |
| 1487 | |
| 1488 | spin_lock(&ailp->xa_lock); |
| 1489 | xfs_trans_ail_cursor_done(ailp, &cur); |
| 1490 | spin_unlock(&ailp->xa_lock); |
| 1491 | } |
| 1492 | |
| 1493 | /* |
| 1494 | * Commit the given transaction to the log. |
| 1495 | * |
| 1496 | * XFS disk error handling mechanism is not based on a typical |
| 1497 | * transaction abort mechanism. Logically after the filesystem |
| 1498 | * gets marked 'SHUTDOWN', we can't let any new transactions |
| 1499 | * be durable - ie. committed to disk - because some metadata might |
| 1500 | * be inconsistent. In such cases, this returns an error, and the |
| 1501 | * caller may assume that all locked objects joined to the transaction |
| 1502 | * have already been unlocked as if the commit had succeeded. |
| 1503 | * Do not reference the transaction structure after this call. |
| 1504 | */ |
| 1505 | int |
| 1506 | xfs_trans_commit( |
| 1507 | struct xfs_trans *tp, |
| 1508 | uint flags) |
| 1509 | { |
| 1510 | struct xfs_mount *mp = tp->t_mountp; |
| 1511 | xfs_lsn_t commit_lsn = -1; |
| 1512 | int error = 0; |
| 1513 | int log_flags = 0; |
| 1514 | int sync = tp->t_flags & XFS_TRANS_SYNC; |
| 1515 | |
| 1516 | /* |
| 1517 | * Determine whether this commit is releasing a permanent |
| 1518 | * log reservation or not. |
| 1519 | */ |
| 1520 | if (flags & XFS_TRANS_RELEASE_LOG_RES) { |
| 1521 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
| 1522 | log_flags = XFS_LOG_REL_PERM_RESERV; |
| 1523 | } |
| 1524 | |
| 1525 | /* |
| 1526 | * If there is nothing to be logged by the transaction, |
| 1527 | * then unlock all of the items associated with the |
| 1528 | * transaction and free the transaction structure. |
| 1529 | * Also make sure to return any reserved blocks to |
| 1530 | * the free pool. |
| 1531 | */ |
| 1532 | if (!(tp->t_flags & XFS_TRANS_DIRTY)) |
| 1533 | goto out_unreserve; |
| 1534 | |
| 1535 | if (XFS_FORCED_SHUTDOWN(mp)) { |
| 1536 | error = XFS_ERROR(EIO); |
| 1537 | goto out_unreserve; |
| 1538 | } |
| 1539 | |
| 1540 | ASSERT(tp->t_ticket != NULL); |
| 1541 | |
| 1542 | /* |
| 1543 | * If we need to update the superblock, then do it now. |
| 1544 | */ |
| 1545 | if (tp->t_flags & XFS_TRANS_SB_DIRTY) |
| 1546 | xfs_trans_apply_sb_deltas(tp); |
| 1547 | xfs_trans_apply_dquot_deltas(tp); |
| 1548 | |
| 1549 | error = xfs_log_commit_cil(mp, tp, &commit_lsn, flags); |
| 1550 | if (error == ENOMEM) { |
| 1551 | xfs_force_shutdown(mp, SHUTDOWN_LOG_IO_ERROR); |
| 1552 | error = XFS_ERROR(EIO); |
| 1553 | goto out_unreserve; |
| 1554 | } |
| 1555 | |
| 1556 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
| 1557 | xfs_trans_free(tp); |
| 1558 | |
| 1559 | /* |
| 1560 | * If the transaction needs to be synchronous, then force the |
| 1561 | * log out now and wait for it. |
| 1562 | */ |
| 1563 | if (sync) { |
| 1564 | if (!error) { |
| 1565 | error = _xfs_log_force_lsn(mp, commit_lsn, |
| 1566 | XFS_LOG_SYNC, NULL); |
| 1567 | } |
| 1568 | XFS_STATS_INC(xs_trans_sync); |
| 1569 | } else { |
| 1570 | XFS_STATS_INC(xs_trans_async); |
| 1571 | } |
| 1572 | |
| 1573 | return error; |
| 1574 | |
| 1575 | out_unreserve: |
| 1576 | xfs_trans_unreserve_and_mod_sb(tp); |
| 1577 | |
| 1578 | /* |
| 1579 | * It is indeed possible for the transaction to be not dirty but |
| 1580 | * the dqinfo portion to be. All that means is that we have some |
| 1581 | * (non-persistent) quota reservations that need to be unreserved. |
| 1582 | */ |
| 1583 | xfs_trans_unreserve_and_mod_dquots(tp); |
| 1584 | if (tp->t_ticket) { |
| 1585 | commit_lsn = xfs_log_done(mp, tp->t_ticket, NULL, log_flags); |
| 1586 | if (commit_lsn == -1 && !error) |
| 1587 | error = XFS_ERROR(EIO); |
| 1588 | } |
| 1589 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
| 1590 | xfs_trans_free_items(tp, NULLCOMMITLSN, error ? XFS_TRANS_ABORT : 0); |
| 1591 | xfs_trans_free(tp); |
| 1592 | |
| 1593 | XFS_STATS_INC(xs_trans_empty); |
| 1594 | return error; |
| 1595 | } |
| 1596 | |
| 1597 | /* |
| 1598 | * Unlock all of the transaction's items and free the transaction. |
| 1599 | * The transaction must not have modified any of its items, because |
| 1600 | * there is no way to restore them to their previous state. |
| 1601 | * |
| 1602 | * If the transaction has made a log reservation, make sure to release |
| 1603 | * it as well. |
| 1604 | */ |
| 1605 | void |
| 1606 | xfs_trans_cancel( |
| 1607 | xfs_trans_t *tp, |
| 1608 | int flags) |
| 1609 | { |
| 1610 | int log_flags; |
| 1611 | xfs_mount_t *mp = tp->t_mountp; |
| 1612 | |
| 1613 | /* |
| 1614 | * See if the caller is being too lazy to figure out if |
| 1615 | * the transaction really needs an abort. |
| 1616 | */ |
| 1617 | if ((flags & XFS_TRANS_ABORT) && !(tp->t_flags & XFS_TRANS_DIRTY)) |
| 1618 | flags &= ~XFS_TRANS_ABORT; |
| 1619 | /* |
| 1620 | * See if the caller is relying on us to shut down the |
| 1621 | * filesystem. This happens in paths where we detect |
| 1622 | * corruption and decide to give up. |
| 1623 | */ |
| 1624 | if ((tp->t_flags & XFS_TRANS_DIRTY) && !XFS_FORCED_SHUTDOWN(mp)) { |
| 1625 | XFS_ERROR_REPORT("xfs_trans_cancel", XFS_ERRLEVEL_LOW, mp); |
| 1626 | xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); |
| 1627 | } |
| 1628 | #ifdef DEBUG |
| 1629 | if (!(flags & XFS_TRANS_ABORT) && !XFS_FORCED_SHUTDOWN(mp)) { |
| 1630 | struct xfs_log_item_desc *lidp; |
| 1631 | |
| 1632 | list_for_each_entry(lidp, &tp->t_items, lid_trans) |
| 1633 | ASSERT(!(lidp->lid_item->li_type == XFS_LI_EFD)); |
| 1634 | } |
| 1635 | #endif |
| 1636 | xfs_trans_unreserve_and_mod_sb(tp); |
| 1637 | xfs_trans_unreserve_and_mod_dquots(tp); |
| 1638 | |
| 1639 | if (tp->t_ticket) { |
| 1640 | if (flags & XFS_TRANS_RELEASE_LOG_RES) { |
| 1641 | ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); |
| 1642 | log_flags = XFS_LOG_REL_PERM_RESERV; |
| 1643 | } else { |
| 1644 | log_flags = 0; |
| 1645 | } |
| 1646 | xfs_log_done(mp, tp->t_ticket, NULL, log_flags); |
| 1647 | } |
| 1648 | |
| 1649 | /* mark this thread as no longer being in a transaction */ |
| 1650 | current_restore_flags_nested(&tp->t_pflags, PF_FSTRANS); |
| 1651 | |
| 1652 | xfs_trans_free_items(tp, NULLCOMMITLSN, flags); |
| 1653 | xfs_trans_free(tp); |
| 1654 | } |
| 1655 | |
| 1656 | /* |
| 1657 | * Roll from one trans in the sequence of PERMANENT transactions to |
| 1658 | * the next: permanent transactions are only flushed out when |
| 1659 | * committed with XFS_TRANS_RELEASE_LOG_RES, but we still want as soon |
| 1660 | * as possible to let chunks of it go to the log. So we commit the |
| 1661 | * chunk we've been working on and get a new transaction to continue. |
| 1662 | */ |
| 1663 | int |
| 1664 | xfs_trans_roll( |
| 1665 | struct xfs_trans **tpp, |
| 1666 | struct xfs_inode *dp) |
| 1667 | { |
| 1668 | struct xfs_trans *trans; |
| 1669 | unsigned int logres, count; |
| 1670 | int error; |
| 1671 | |
| 1672 | /* |
| 1673 | * Ensure that the inode is always logged. |
| 1674 | */ |
| 1675 | trans = *tpp; |
| 1676 | xfs_trans_log_inode(trans, dp, XFS_ILOG_CORE); |
| 1677 | |
| 1678 | /* |
| 1679 | * Copy the critical parameters from one trans to the next. |
| 1680 | */ |
| 1681 | logres = trans->t_log_res; |
| 1682 | count = trans->t_log_count; |
| 1683 | *tpp = xfs_trans_dup(trans); |
| 1684 | |
| 1685 | /* |
| 1686 | * Commit the current transaction. |
| 1687 | * If this commit failed, then it'd just unlock those items that |
| 1688 | * are not marked ihold. That also means that a filesystem shutdown |
| 1689 | * is in progress. The caller takes the responsibility to cancel |
| 1690 | * the duplicate transaction that gets returned. |
| 1691 | */ |
| 1692 | error = xfs_trans_commit(trans, 0); |
| 1693 | if (error) |
| 1694 | return (error); |
| 1695 | |
| 1696 | trans = *tpp; |
| 1697 | |
| 1698 | /* |
| 1699 | * transaction commit worked ok so we can drop the extra ticket |
| 1700 | * reference that we gained in xfs_trans_dup() |
| 1701 | */ |
| 1702 | xfs_log_ticket_put(trans->t_ticket); |
| 1703 | |
| 1704 | |
| 1705 | /* |
| 1706 | * Reserve space in the log for th next transaction. |
| 1707 | * This also pushes items in the "AIL", the list of logged items, |
| 1708 | * out to disk if they are taking up space at the tail of the log |
| 1709 | * that we want to use. This requires that either nothing be locked |
| 1710 | * across this call, or that anything that is locked be logged in |
| 1711 | * the prior and the next transactions. |
| 1712 | */ |
| 1713 | error = xfs_trans_reserve(trans, 0, logres, 0, |
| 1714 | XFS_TRANS_PERM_LOG_RES, count); |
| 1715 | /* |
| 1716 | * Ensure that the inode is in the new transaction and locked. |
| 1717 | */ |
| 1718 | if (error) |
| 1719 | return error; |
| 1720 | |
| 1721 | xfs_trans_ijoin(trans, dp, 0); |
| 1722 | return 0; |
| 1723 | } |