jbd: check for error returned by kthread_create on creating journal thread
[deliverable/linux.git] / fs / ocfs2 / suballoc.c
CommitLineData
ccd979bd
MF
1/* -*- mode: c; c-basic-offset: 8; -*-
2 * vim: noexpandtab sw=8 ts=8 sts=0:
3 *
4 * suballoc.c
5 *
6 * metadata alloc and free
7 * Inspired by ext3 block groups.
8 *
9 * Copyright (C) 2002, 2004 Oracle. All rights reserved.
10 *
11 * This program is free software; you can redistribute it and/or
12 * modify it under the terms of the GNU General Public
13 * License as published by the Free Software Foundation; either
14 * version 2 of the License, or (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 * General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public
22 * License along with this program; if not, write to the
23 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
24 * Boston, MA 021110-1307, USA.
25 */
26
27#include <linux/fs.h>
28#include <linux/types.h>
29#include <linux/slab.h>
30#include <linux/highmem.h>
31
32#define MLOG_MASK_PREFIX ML_DISK_ALLOC
33#include <cluster/masklog.h>
34
35#include "ocfs2.h"
36
37#include "alloc.h"
38#include "dlmglue.h"
39#include "inode.h"
40#include "journal.h"
41#include "localalloc.h"
42#include "suballoc.h"
43#include "super.h"
44#include "sysfile.h"
45#include "uptodate.h"
46
47#include "buffer_head_io.h"
48
49static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg);
50static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe);
51static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl);
1fabe148 52static int ocfs2_block_group_fill(handle_t *handle,
ccd979bd
MF
53 struct inode *alloc_inode,
54 struct buffer_head *bg_bh,
55 u64 group_blkno,
56 u16 my_chain,
57 struct ocfs2_chain_list *cl);
58static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
59 struct inode *alloc_inode,
60 struct buffer_head *bh);
61
ccd979bd
MF
62static int ocfs2_cluster_group_search(struct inode *inode,
63 struct buffer_head *group_bh,
64 u32 bits_wanted, u32 min_bits,
65 u16 *bit_off, u16 *bits_found);
66static int ocfs2_block_group_search(struct inode *inode,
67 struct buffer_head *group_bh,
68 u32 bits_wanted, u32 min_bits,
69 u16 *bit_off, u16 *bits_found);
ccd979bd
MF
70static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
71 struct ocfs2_alloc_context *ac,
1fabe148 72 handle_t *handle,
ccd979bd
MF
73 u32 bits_wanted,
74 u32 min_bits,
75 u16 *bit_off,
76 unsigned int *num_bits,
77 u64 *bg_blkno);
78static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
79 int nr);
1fabe148 80static inline int ocfs2_block_group_set_bits(handle_t *handle,
ccd979bd
MF
81 struct inode *alloc_inode,
82 struct ocfs2_group_desc *bg,
83 struct buffer_head *group_bh,
84 unsigned int bit_off,
85 unsigned int num_bits);
1fabe148 86static inline int ocfs2_block_group_clear_bits(handle_t *handle,
ccd979bd
MF
87 struct inode *alloc_inode,
88 struct ocfs2_group_desc *bg,
89 struct buffer_head *group_bh,
90 unsigned int bit_off,
91 unsigned int num_bits);
92
1fabe148 93static int ocfs2_relink_block_group(handle_t *handle,
ccd979bd
MF
94 struct inode *alloc_inode,
95 struct buffer_head *fe_bh,
96 struct buffer_head *bg_bh,
97 struct buffer_head *prev_bg_bh,
98 u16 chain);
99static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
100 u32 wanted);
1fabe148 101static int ocfs2_free_suballoc_bits(handle_t *handle,
ccd979bd
MF
102 struct inode *alloc_inode,
103 struct buffer_head *alloc_bh,
104 unsigned int start_bit,
105 u64 bg_blkno,
106 unsigned int count);
107static inline u64 ocfs2_which_suballoc_group(u64 block,
108 unsigned int bit);
109static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
110 u64 bg_blkno,
111 u16 bg_bit_off);
112static inline u64 ocfs2_which_cluster_group(struct inode *inode,
113 u32 cluster);
114static inline void ocfs2_block_to_cluster_group(struct inode *inode,
115 u64 data_blkno,
116 u64 *bg_blkno,
117 u16 *bg_bit_off);
118
119void ocfs2_free_alloc_context(struct ocfs2_alloc_context *ac)
120{
da5cbf2f
MF
121 struct inode *inode = ac->ac_inode;
122
123 if (inode) {
124 if (ac->ac_which != OCFS2_AC_USE_LOCAL)
125 ocfs2_meta_unlock(inode, 1);
126
127 mutex_unlock(&inode->i_mutex);
128
129 iput(inode);
130 }
ccd979bd
MF
131 if (ac->ac_bh)
132 brelse(ac->ac_bh);
133 kfree(ac);
134}
135
136static u32 ocfs2_bits_per_group(struct ocfs2_chain_list *cl)
137{
138 return (u32)le16_to_cpu(cl->cl_cpg) * (u32)le16_to_cpu(cl->cl_bpc);
139}
140
7bf72ede
MF
141/* somewhat more expensive than our other checks, so use sparingly. */
142static int ocfs2_check_group_descriptor(struct super_block *sb,
143 struct ocfs2_dinode *di,
144 struct ocfs2_group_desc *gd)
145{
146 unsigned int max_bits;
147
148 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
149 OCFS2_RO_ON_INVALID_GROUP_DESC(sb, gd);
150 return -EIO;
151 }
152
153 if (di->i_blkno != gd->bg_parent_dinode) {
154 ocfs2_error(sb, "Group descriptor # %llu has bad parent "
155 "pointer (%llu, expected %llu)",
156 (unsigned long long)le64_to_cpu(gd->bg_blkno),
157 (unsigned long long)le64_to_cpu(gd->bg_parent_dinode),
158 (unsigned long long)le64_to_cpu(di->i_blkno));
159 return -EIO;
160 }
161
162 max_bits = le16_to_cpu(di->id2.i_chain.cl_cpg) * le16_to_cpu(di->id2.i_chain.cl_bpc);
163 if (le16_to_cpu(gd->bg_bits) > max_bits) {
164 ocfs2_error(sb, "Group descriptor # %llu has bit count of %u",
165 (unsigned long long)le64_to_cpu(gd->bg_blkno),
166 le16_to_cpu(gd->bg_bits));
167 return -EIO;
168 }
169
170 if (le16_to_cpu(gd->bg_chain) >=
171 le16_to_cpu(di->id2.i_chain.cl_next_free_rec)) {
172 ocfs2_error(sb, "Group descriptor # %llu has bad chain %u",
173 (unsigned long long)le64_to_cpu(gd->bg_blkno),
174 le16_to_cpu(gd->bg_chain));
175 return -EIO;
176 }
177
178 if (le16_to_cpu(gd->bg_free_bits_count) > le16_to_cpu(gd->bg_bits)) {
179 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
180 "claims that %u are free",
181 (unsigned long long)le64_to_cpu(gd->bg_blkno),
182 le16_to_cpu(gd->bg_bits),
183 le16_to_cpu(gd->bg_free_bits_count));
184 return -EIO;
185 }
186
187 if (le16_to_cpu(gd->bg_bits) > (8 * le16_to_cpu(gd->bg_size))) {
188 ocfs2_error(sb, "Group descriptor # %llu has bit count %u but "
189 "max bitmap bits of %u",
190 (unsigned long long)le64_to_cpu(gd->bg_blkno),
191 le16_to_cpu(gd->bg_bits),
192 8 * le16_to_cpu(gd->bg_size));
193 return -EIO;
194 }
195
196 return 0;
197}
198
1fabe148 199static int ocfs2_block_group_fill(handle_t *handle,
ccd979bd
MF
200 struct inode *alloc_inode,
201 struct buffer_head *bg_bh,
202 u64 group_blkno,
203 u16 my_chain,
204 struct ocfs2_chain_list *cl)
205{
206 int status = 0;
207 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
208 struct super_block * sb = alloc_inode->i_sb;
209
210 mlog_entry_void();
211
212 if (((unsigned long long) bg_bh->b_blocknr) != group_blkno) {
b0697053
MF
213 ocfs2_error(alloc_inode->i_sb, "group block (%llu) != "
214 "b_blocknr (%llu)",
215 (unsigned long long)group_blkno,
ccd979bd
MF
216 (unsigned long long) bg_bh->b_blocknr);
217 status = -EIO;
218 goto bail;
219 }
220
221 status = ocfs2_journal_access(handle,
222 alloc_inode,
223 bg_bh,
224 OCFS2_JOURNAL_ACCESS_CREATE);
225 if (status < 0) {
226 mlog_errno(status);
227 goto bail;
228 }
229
230 memset(bg, 0, sb->s_blocksize);
231 strcpy(bg->bg_signature, OCFS2_GROUP_DESC_SIGNATURE);
232 bg->bg_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
233 bg->bg_size = cpu_to_le16(ocfs2_group_bitmap_size(sb));
234 bg->bg_bits = cpu_to_le16(ocfs2_bits_per_group(cl));
235 bg->bg_chain = cpu_to_le16(my_chain);
236 bg->bg_next_group = cl->cl_recs[my_chain].c_blkno;
237 bg->bg_parent_dinode = cpu_to_le64(OCFS2_I(alloc_inode)->ip_blkno);
238 bg->bg_blkno = cpu_to_le64(group_blkno);
239 /* set the 1st bit in the bitmap to account for the descriptor block */
240 ocfs2_set_bit(0, (unsigned long *)bg->bg_bitmap);
241 bg->bg_free_bits_count = cpu_to_le16(le16_to_cpu(bg->bg_bits) - 1);
242
243 status = ocfs2_journal_dirty(handle, bg_bh);
244 if (status < 0)
245 mlog_errno(status);
246
247 /* There is no need to zero out or otherwise initialize the
248 * other blocks in a group - All valid FS metadata in a block
249 * group stores the superblock fs_generation value at
250 * allocation time. */
251
252bail:
253 mlog_exit(status);
254 return status;
255}
256
257static inline u16 ocfs2_find_smallest_chain(struct ocfs2_chain_list *cl)
258{
259 u16 curr, best;
260
261 best = curr = 0;
262 while (curr < le16_to_cpu(cl->cl_count)) {
263 if (le32_to_cpu(cl->cl_recs[best].c_total) >
264 le32_to_cpu(cl->cl_recs[curr].c_total))
265 best = curr;
266 curr++;
267 }
268 return best;
269}
270
271/*
272 * We expect the block group allocator to already be locked.
273 */
274static int ocfs2_block_group_alloc(struct ocfs2_super *osb,
275 struct inode *alloc_inode,
276 struct buffer_head *bh)
277{
278 int status, credits;
279 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) bh->b_data;
280 struct ocfs2_chain_list *cl;
281 struct ocfs2_alloc_context *ac = NULL;
1fabe148 282 handle_t *handle = NULL;
ccd979bd
MF
283 u32 bit_off, num_bits;
284 u16 alloc_rec;
285 u64 bg_blkno;
286 struct buffer_head *bg_bh = NULL;
287 struct ocfs2_group_desc *bg;
288
289 BUG_ON(ocfs2_is_cluster_bitmap(alloc_inode));
290
291 mlog_entry_void();
292
ccd979bd
MF
293 cl = &fe->id2.i_chain;
294 status = ocfs2_reserve_clusters(osb,
ccd979bd
MF
295 le16_to_cpu(cl->cl_cpg),
296 &ac);
297 if (status < 0) {
298 if (status != -ENOSPC)
299 mlog_errno(status);
300 goto bail;
301 }
302
303 credits = ocfs2_calc_group_alloc_credits(osb->sb,
304 le16_to_cpu(cl->cl_cpg));
65eff9cc 305 handle = ocfs2_start_trans(osb, credits);
ccd979bd
MF
306 if (IS_ERR(handle)) {
307 status = PTR_ERR(handle);
308 handle = NULL;
309 mlog_errno(status);
310 goto bail;
311 }
312
313 status = ocfs2_claim_clusters(osb,
314 handle,
315 ac,
316 le16_to_cpu(cl->cl_cpg),
317 &bit_off,
318 &num_bits);
319 if (status < 0) {
320 if (status != -ENOSPC)
321 mlog_errno(status);
322 goto bail;
323 }
324
325 alloc_rec = ocfs2_find_smallest_chain(cl);
326
327 /* setup the group */
328 bg_blkno = ocfs2_clusters_to_blocks(osb->sb, bit_off);
b0697053
MF
329 mlog(0, "new descriptor, record %u, at block %llu\n",
330 alloc_rec, (unsigned long long)bg_blkno);
ccd979bd
MF
331
332 bg_bh = sb_getblk(osb->sb, bg_blkno);
333 if (!bg_bh) {
334 status = -EIO;
335 mlog_errno(status);
336 goto bail;
337 }
338 ocfs2_set_new_buffer_uptodate(alloc_inode, bg_bh);
339
340 status = ocfs2_block_group_fill(handle,
341 alloc_inode,
342 bg_bh,
343 bg_blkno,
344 alloc_rec,
345 cl);
346 if (status < 0) {
347 mlog_errno(status);
348 goto bail;
349 }
350
351 bg = (struct ocfs2_group_desc *) bg_bh->b_data;
352
353 status = ocfs2_journal_access(handle, alloc_inode,
354 bh, OCFS2_JOURNAL_ACCESS_WRITE);
355 if (status < 0) {
356 mlog_errno(status);
357 goto bail;
358 }
359
360 le32_add_cpu(&cl->cl_recs[alloc_rec].c_free,
361 le16_to_cpu(bg->bg_free_bits_count));
362 le32_add_cpu(&cl->cl_recs[alloc_rec].c_total, le16_to_cpu(bg->bg_bits));
363 cl->cl_recs[alloc_rec].c_blkno = cpu_to_le64(bg_blkno);
364 if (le16_to_cpu(cl->cl_next_free_rec) < le16_to_cpu(cl->cl_count))
365 le16_add_cpu(&cl->cl_next_free_rec, 1);
366
367 le32_add_cpu(&fe->id1.bitmap1.i_used, le16_to_cpu(bg->bg_bits) -
368 le16_to_cpu(bg->bg_free_bits_count));
369 le32_add_cpu(&fe->id1.bitmap1.i_total, le16_to_cpu(bg->bg_bits));
370 le32_add_cpu(&fe->i_clusters, le16_to_cpu(cl->cl_cpg));
371
372 status = ocfs2_journal_dirty(handle, bh);
373 if (status < 0) {
374 mlog_errno(status);
375 goto bail;
376 }
377
378 spin_lock(&OCFS2_I(alloc_inode)->ip_lock);
379 OCFS2_I(alloc_inode)->ip_clusters = le32_to_cpu(fe->i_clusters);
380 fe->i_size = cpu_to_le64(ocfs2_clusters_to_bytes(alloc_inode->i_sb,
381 le32_to_cpu(fe->i_clusters)));
382 spin_unlock(&OCFS2_I(alloc_inode)->ip_lock);
383 i_size_write(alloc_inode, le64_to_cpu(fe->i_size));
8110b073 384 alloc_inode->i_blocks = ocfs2_inode_sector_count(alloc_inode);
ccd979bd
MF
385
386 status = 0;
387bail:
388 if (handle)
02dc1af4 389 ocfs2_commit_trans(osb, handle);
ccd979bd
MF
390
391 if (ac)
392 ocfs2_free_alloc_context(ac);
393
394 if (bg_bh)
395 brelse(bg_bh);
396
397 mlog_exit(status);
398 return status;
399}
400
401static int ocfs2_reserve_suballoc_bits(struct ocfs2_super *osb,
da5cbf2f
MF
402 struct ocfs2_alloc_context *ac,
403 int type,
404 u32 slot)
ccd979bd
MF
405{
406 int status;
407 u32 bits_wanted = ac->ac_bits_wanted;
da5cbf2f 408 struct inode *alloc_inode;
ccd979bd 409 struct buffer_head *bh = NULL;
ccd979bd
MF
410 struct ocfs2_dinode *fe;
411 u32 free_bits;
412
413 mlog_entry_void();
414
da5cbf2f
MF
415 alloc_inode = ocfs2_get_system_file_inode(osb, type, slot);
416 if (!alloc_inode) {
417 mlog_errno(-EINVAL);
418 return -EINVAL;
419 }
ccd979bd 420
da5cbf2f
MF
421 mutex_lock(&alloc_inode->i_mutex);
422
4bcec184 423 status = ocfs2_meta_lock(alloc_inode, &bh, 1);
ccd979bd 424 if (status < 0) {
da5cbf2f
MF
425 mutex_unlock(&alloc_inode->i_mutex);
426 iput(alloc_inode);
427
ccd979bd 428 mlog_errno(status);
da5cbf2f 429 return status;
ccd979bd
MF
430 }
431
da5cbf2f
MF
432 ac->ac_inode = alloc_inode;
433
ccd979bd
MF
434 fe = (struct ocfs2_dinode *) bh->b_data;
435 if (!OCFS2_IS_VALID_DINODE(fe)) {
436 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
437 status = -EIO;
438 goto bail;
439 }
440 if (!(fe->i_flags & cpu_to_le32(OCFS2_CHAIN_FL))) {
b0697053
MF
441 ocfs2_error(alloc_inode->i_sb, "Invalid chain allocator %llu",
442 (unsigned long long)le64_to_cpu(fe->i_blkno));
ccd979bd
MF
443 status = -EIO;
444 goto bail;
445 }
446
447 free_bits = le32_to_cpu(fe->id1.bitmap1.i_total) -
448 le32_to_cpu(fe->id1.bitmap1.i_used);
449
450 if (bits_wanted > free_bits) {
451 /* cluster bitmap never grows */
452 if (ocfs2_is_cluster_bitmap(alloc_inode)) {
453 mlog(0, "Disk Full: wanted=%u, free_bits=%u\n",
454 bits_wanted, free_bits);
455 status = -ENOSPC;
456 goto bail;
457 }
458
459 status = ocfs2_block_group_alloc(osb, alloc_inode, bh);
460 if (status < 0) {
461 if (status != -ENOSPC)
462 mlog_errno(status);
463 goto bail;
464 }
465 atomic_inc(&osb->alloc_stats.bg_extends);
466
467 /* You should never ask for this much metadata */
468 BUG_ON(bits_wanted >
469 (le32_to_cpu(fe->id1.bitmap1.i_total)
470 - le32_to_cpu(fe->id1.bitmap1.i_used)));
471 }
472
473 get_bh(bh);
474 ac->ac_bh = bh;
475bail:
476 if (bh)
477 brelse(bh);
478
479 mlog_exit(status);
480 return status;
481}
482
483int ocfs2_reserve_new_metadata(struct ocfs2_super *osb,
ccd979bd
MF
484 struct ocfs2_dinode *fe,
485 struct ocfs2_alloc_context **ac)
486{
487 int status;
da5cbf2f 488 u32 slot;
ccd979bd 489
cd861280 490 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
ccd979bd
MF
491 if (!(*ac)) {
492 status = -ENOMEM;
493 mlog_errno(status);
494 goto bail;
495 }
496
497 (*ac)->ac_bits_wanted = ocfs2_extend_meta_needed(fe);
ccd979bd
MF
498 (*ac)->ac_which = OCFS2_AC_USE_META;
499
500#ifndef OCFS2_USE_ALL_METADATA_SUBALLOCATORS
da5cbf2f 501 slot = 0;
ccd979bd 502#else
da5cbf2f 503 slot = osb->slot_num;
ccd979bd 504#endif
ccd979bd 505
ccd979bd
MF
506 (*ac)->ac_group_search = ocfs2_block_group_search;
507
da5cbf2f
MF
508 status = ocfs2_reserve_suballoc_bits(osb, (*ac),
509 EXTENT_ALLOC_SYSTEM_INODE, slot);
ccd979bd
MF
510 if (status < 0) {
511 if (status != -ENOSPC)
512 mlog_errno(status);
513 goto bail;
514 }
515
516 status = 0;
517bail:
518 if ((status < 0) && *ac) {
519 ocfs2_free_alloc_context(*ac);
520 *ac = NULL;
521 }
522
ccd979bd
MF
523 mlog_exit(status);
524 return status;
525}
526
527int ocfs2_reserve_new_inode(struct ocfs2_super *osb,
ccd979bd
MF
528 struct ocfs2_alloc_context **ac)
529{
530 int status;
ccd979bd 531
cd861280 532 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
ccd979bd
MF
533 if (!(*ac)) {
534 status = -ENOMEM;
535 mlog_errno(status);
536 goto bail;
537 }
538
539 (*ac)->ac_bits_wanted = 1;
ccd979bd
MF
540 (*ac)->ac_which = OCFS2_AC_USE_INODE;
541
ccd979bd
MF
542 (*ac)->ac_group_search = ocfs2_block_group_search;
543
da5cbf2f
MF
544 status = ocfs2_reserve_suballoc_bits(osb, *ac,
545 INODE_ALLOC_SYSTEM_INODE,
546 osb->slot_num);
ccd979bd
MF
547 if (status < 0) {
548 if (status != -ENOSPC)
549 mlog_errno(status);
550 goto bail;
551 }
552
553 status = 0;
554bail:
555 if ((status < 0) && *ac) {
556 ocfs2_free_alloc_context(*ac);
557 *ac = NULL;
558 }
559
ccd979bd
MF
560 mlog_exit(status);
561 return status;
562}
563
564/* local alloc code has to do the same thing, so rather than do this
565 * twice.. */
566int ocfs2_reserve_cluster_bitmap_bits(struct ocfs2_super *osb,
567 struct ocfs2_alloc_context *ac)
568{
569 int status;
570
ccd979bd
MF
571 ac->ac_which = OCFS2_AC_USE_MAIN;
572 ac->ac_group_search = ocfs2_cluster_group_search;
573
da5cbf2f
MF
574 status = ocfs2_reserve_suballoc_bits(osb, ac,
575 GLOBAL_BITMAP_SYSTEM_INODE,
576 OCFS2_INVALID_SLOT);
577 if (status < 0 && status != -ENOSPC) {
ccd979bd 578 mlog_errno(status);
da5cbf2f
MF
579 goto bail;
580 }
581
ccd979bd
MF
582bail:
583 return status;
584}
585
586/* Callers don't need to care which bitmap (local alloc or main) to
587 * use so we figure it out for them, but unfortunately this clutters
588 * things a bit. */
589int ocfs2_reserve_clusters(struct ocfs2_super *osb,
ccd979bd
MF
590 u32 bits_wanted,
591 struct ocfs2_alloc_context **ac)
592{
593 int status;
594
595 mlog_entry_void();
596
cd861280 597 *ac = kzalloc(sizeof(struct ocfs2_alloc_context), GFP_KERNEL);
ccd979bd
MF
598 if (!(*ac)) {
599 status = -ENOMEM;
600 mlog_errno(status);
601 goto bail;
602 }
603
604 (*ac)->ac_bits_wanted = bits_wanted;
ccd979bd
MF
605
606 status = -ENOSPC;
607 if (ocfs2_alloc_should_use_local(osb, bits_wanted)) {
608 status = ocfs2_reserve_local_alloc_bits(osb,
ccd979bd
MF
609 bits_wanted,
610 *ac);
611 if ((status < 0) && (status != -ENOSPC)) {
612 mlog_errno(status);
613 goto bail;
614 } else if (status == -ENOSPC) {
615 /* reserve_local_bits will return enospc with
616 * the local alloc inode still locked, so we
617 * can change this safely here. */
618 mlog(0, "Disabling local alloc\n");
619 /* We set to OCFS2_LA_DISABLED so that umount
620 * can clean up what's left of the local
621 * allocation */
622 osb->local_alloc_state = OCFS2_LA_DISABLED;
623 }
624 }
625
626 if (status == -ENOSPC) {
627 status = ocfs2_reserve_cluster_bitmap_bits(osb, *ac);
628 if (status < 0) {
629 if (status != -ENOSPC)
630 mlog_errno(status);
631 goto bail;
632 }
633 }
634
635 status = 0;
636bail:
637 if ((status < 0) && *ac) {
638 ocfs2_free_alloc_context(*ac);
639 *ac = NULL;
640 }
641
642 mlog_exit(status);
643 return status;
644}
645
646/*
647 * More or less lifted from ext3. I'll leave their description below:
648 *
649 * "For ext3 allocations, we must not reuse any blocks which are
650 * allocated in the bitmap buffer's "last committed data" copy. This
651 * prevents deletes from freeing up the page for reuse until we have
652 * committed the delete transaction.
653 *
654 * If we didn't do this, then deleting something and reallocating it as
655 * data would allow the old block to be overwritten before the
656 * transaction committed (because we force data to disk before commit).
657 * This would lead to corruption if we crashed between overwriting the
658 * data and committing the delete.
659 *
660 * @@@ We may want to make this allocation behaviour conditional on
661 * data-writes at some point, and disable it for metadata allocations or
662 * sync-data inodes."
663 *
664 * Note: OCFS2 already does this differently for metadata vs data
665 * allocations, as those bitmaps are seperate and undo access is never
666 * called on a metadata group descriptor.
667 */
668static int ocfs2_test_bg_bit_allocatable(struct buffer_head *bg_bh,
669 int nr)
670{
671 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
672
673 if (ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap))
674 return 0;
675 if (!buffer_jbd(bg_bh) || !bh2jh(bg_bh)->b_committed_data)
676 return 1;
677
678 bg = (struct ocfs2_group_desc *) bh2jh(bg_bh)->b_committed_data;
679 return !ocfs2_test_bit(nr, (unsigned long *)bg->bg_bitmap);
680}
681
682static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb,
683 struct buffer_head *bg_bh,
684 unsigned int bits_wanted,
7bf72ede 685 unsigned int total_bits,
ccd979bd
MF
686 u16 *bit_off,
687 u16 *bits_found)
688{
689 void *bitmap;
690 u16 best_offset, best_size;
691 int offset, start, found, status = 0;
692 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
693
694 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
695 OCFS2_RO_ON_INVALID_GROUP_DESC(osb->sb, bg);
696 return -EIO;
697 }
698
699 found = start = best_offset = best_size = 0;
700 bitmap = bg->bg_bitmap;
701
7bf72ede
MF
702 while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) {
703 if (offset == total_bits)
ccd979bd
MF
704 break;
705
706 if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) {
707 /* We found a zero, but we can't use it as it
708 * hasn't been put to disk yet! */
709 found = 0;
710 start = offset + 1;
711 } else if (offset == start) {
712 /* we found a zero */
713 found++;
714 /* move start to the next bit to test */
715 start++;
716 } else {
717 /* got a zero after some ones */
718 found = 1;
719 start = offset + 1;
720 }
721 if (found > best_size) {
722 best_size = found;
723 best_offset = start - found;
724 }
725 /* we got everything we needed */
726 if (found == bits_wanted) {
727 /* mlog(0, "Found it all!\n"); */
728 break;
729 }
730 }
731
732 /* XXX: I think the first clause is equivalent to the second
733 * - jlbec */
734 if (found == bits_wanted) {
735 *bit_off = start - found;
736 *bits_found = found;
737 } else if (best_size) {
738 *bit_off = best_offset;
739 *bits_found = best_size;
740 } else {
741 status = -ENOSPC;
742 /* No error log here -- see the comment above
743 * ocfs2_test_bg_bit_allocatable */
744 }
745
746 return status;
747}
748
1fabe148 749static inline int ocfs2_block_group_set_bits(handle_t *handle,
ccd979bd
MF
750 struct inode *alloc_inode,
751 struct ocfs2_group_desc *bg,
752 struct buffer_head *group_bh,
753 unsigned int bit_off,
754 unsigned int num_bits)
755{
756 int status;
757 void *bitmap = bg->bg_bitmap;
758 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
759
760 mlog_entry_void();
761
762 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
763 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
764 status = -EIO;
765 goto bail;
766 }
767 BUG_ON(le16_to_cpu(bg->bg_free_bits_count) < num_bits);
768
769 mlog(0, "block_group_set_bits: off = %u, num = %u\n", bit_off,
770 num_bits);
771
772 if (ocfs2_is_cluster_bitmap(alloc_inode))
773 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
774
775 status = ocfs2_journal_access(handle,
776 alloc_inode,
777 group_bh,
778 journal_type);
779 if (status < 0) {
780 mlog_errno(status);
781 goto bail;
782 }
783
784 le16_add_cpu(&bg->bg_free_bits_count, -num_bits);
785
786 while(num_bits--)
787 ocfs2_set_bit(bit_off++, bitmap);
788
789 status = ocfs2_journal_dirty(handle,
790 group_bh);
791 if (status < 0) {
792 mlog_errno(status);
793 goto bail;
794 }
795
796bail:
797 mlog_exit(status);
798 return status;
799}
800
801/* find the one with the most empty bits */
802static inline u16 ocfs2_find_victim_chain(struct ocfs2_chain_list *cl)
803{
804 u16 curr, best;
805
806 BUG_ON(!cl->cl_next_free_rec);
807
808 best = curr = 0;
809 while (curr < le16_to_cpu(cl->cl_next_free_rec)) {
810 if (le32_to_cpu(cl->cl_recs[curr].c_free) >
811 le32_to_cpu(cl->cl_recs[best].c_free))
812 best = curr;
813 curr++;
814 }
815
816 BUG_ON(best >= le16_to_cpu(cl->cl_next_free_rec));
817 return best;
818}
819
1fabe148 820static int ocfs2_relink_block_group(handle_t *handle,
ccd979bd
MF
821 struct inode *alloc_inode,
822 struct buffer_head *fe_bh,
823 struct buffer_head *bg_bh,
824 struct buffer_head *prev_bg_bh,
825 u16 chain)
826{
827 int status;
828 /* there is a really tiny chance the journal calls could fail,
829 * but we wouldn't want inconsistent blocks in *any* case. */
830 u64 fe_ptr, bg_ptr, prev_bg_ptr;
831 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) fe_bh->b_data;
832 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) bg_bh->b_data;
833 struct ocfs2_group_desc *prev_bg = (struct ocfs2_group_desc *) prev_bg_bh->b_data;
834
835 if (!OCFS2_IS_VALID_DINODE(fe)) {
836 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
837 status = -EIO;
838 goto out;
839 }
840 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
841 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
842 status = -EIO;
843 goto out;
844 }
845 if (!OCFS2_IS_VALID_GROUP_DESC(prev_bg)) {
846 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, prev_bg);
847 status = -EIO;
848 goto out;
849 }
850
b0697053 851 mlog(0, "Suballoc %llu, chain %u, move group %llu to top, prev = %llu\n",
1ca1a111
MF
852 (unsigned long long)le64_to_cpu(fe->i_blkno), chain,
853 (unsigned long long)le64_to_cpu(bg->bg_blkno),
854 (unsigned long long)le64_to_cpu(prev_bg->bg_blkno));
ccd979bd
MF
855
856 fe_ptr = le64_to_cpu(fe->id2.i_chain.cl_recs[chain].c_blkno);
857 bg_ptr = le64_to_cpu(bg->bg_next_group);
858 prev_bg_ptr = le64_to_cpu(prev_bg->bg_next_group);
859
860 status = ocfs2_journal_access(handle, alloc_inode, prev_bg_bh,
861 OCFS2_JOURNAL_ACCESS_WRITE);
862 if (status < 0) {
863 mlog_errno(status);
864 goto out_rollback;
865 }
866
867 prev_bg->bg_next_group = bg->bg_next_group;
868
869 status = ocfs2_journal_dirty(handle, prev_bg_bh);
870 if (status < 0) {
871 mlog_errno(status);
872 goto out_rollback;
873 }
874
875 status = ocfs2_journal_access(handle, alloc_inode, bg_bh,
876 OCFS2_JOURNAL_ACCESS_WRITE);
877 if (status < 0) {
878 mlog_errno(status);
879 goto out_rollback;
880 }
881
882 bg->bg_next_group = fe->id2.i_chain.cl_recs[chain].c_blkno;
883
884 status = ocfs2_journal_dirty(handle, bg_bh);
885 if (status < 0) {
886 mlog_errno(status);
887 goto out_rollback;
888 }
889
890 status = ocfs2_journal_access(handle, alloc_inode, fe_bh,
891 OCFS2_JOURNAL_ACCESS_WRITE);
892 if (status < 0) {
893 mlog_errno(status);
894 goto out_rollback;
895 }
896
897 fe->id2.i_chain.cl_recs[chain].c_blkno = bg->bg_blkno;
898
899 status = ocfs2_journal_dirty(handle, fe_bh);
900 if (status < 0) {
901 mlog_errno(status);
902 goto out_rollback;
903 }
904
905 status = 0;
906out_rollback:
907 if (status < 0) {
908 fe->id2.i_chain.cl_recs[chain].c_blkno = cpu_to_le64(fe_ptr);
909 bg->bg_next_group = cpu_to_le64(bg_ptr);
910 prev_bg->bg_next_group = cpu_to_le64(prev_bg_ptr);
911 }
912out:
913 mlog_exit(status);
914 return status;
915}
916
917static inline int ocfs2_block_group_reasonably_empty(struct ocfs2_group_desc *bg,
918 u32 wanted)
919{
920 return le16_to_cpu(bg->bg_free_bits_count) > wanted;
921}
922
923/* return 0 on success, -ENOSPC to keep searching and any other < 0
924 * value on error. */
925static int ocfs2_cluster_group_search(struct inode *inode,
926 struct buffer_head *group_bh,
927 u32 bits_wanted, u32 min_bits,
928 u16 *bit_off, u16 *bits_found)
929{
930 int search = -ENOSPC;
931 int ret;
7bf72ede 932 struct ocfs2_group_desc *gd = (struct ocfs2_group_desc *) group_bh->b_data;
ccd979bd 933 u16 tmp_off, tmp_found;
7bf72ede 934 unsigned int max_bits, gd_cluster_off;
ccd979bd
MF
935
936 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
937
7bf72ede
MF
938 if (gd->bg_free_bits_count) {
939 max_bits = le16_to_cpu(gd->bg_bits);
940
941 /* Tail groups in cluster bitmaps which aren't cpg
942 * aligned are prone to partial extention by a failed
943 * fs resize. If the file system resize never got to
944 * update the dinode cluster count, then we don't want
945 * to trust any clusters past it, regardless of what
946 * the group descriptor says. */
947 gd_cluster_off = ocfs2_blocks_to_clusters(inode->i_sb,
948 le64_to_cpu(gd->bg_blkno));
949 if ((gd_cluster_off + max_bits) >
950 OCFS2_I(inode)->ip_clusters) {
951 max_bits = OCFS2_I(inode)->ip_clusters - gd_cluster_off;
952 mlog(0, "Desc %llu, bg_bits %u, clusters %u, use %u\n",
953 (unsigned long long)le64_to_cpu(gd->bg_blkno),
954 le16_to_cpu(gd->bg_bits),
955 OCFS2_I(inode)->ip_clusters, max_bits);
956 }
957
ccd979bd
MF
958 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
959 group_bh, bits_wanted,
7bf72ede 960 max_bits,
ccd979bd
MF
961 &tmp_off, &tmp_found);
962 if (ret)
963 return ret;
964
965 /* ocfs2_block_group_find_clear_bits() might
966 * return success, but we still want to return
967 * -ENOSPC unless it found the minimum number
968 * of bits. */
969 if (min_bits <= tmp_found) {
970 *bit_off = tmp_off;
971 *bits_found = tmp_found;
972 search = 0; /* success */
973 }
974 }
975
976 return search;
977}
978
979static int ocfs2_block_group_search(struct inode *inode,
980 struct buffer_head *group_bh,
981 u32 bits_wanted, u32 min_bits,
982 u16 *bit_off, u16 *bits_found)
983{
984 int ret = -ENOSPC;
985 struct ocfs2_group_desc *bg = (struct ocfs2_group_desc *) group_bh->b_data;
986
987 BUG_ON(min_bits != 1);
988 BUG_ON(ocfs2_is_cluster_bitmap(inode));
989
990 if (bg->bg_free_bits_count)
991 ret = ocfs2_block_group_find_clear_bits(OCFS2_SB(inode->i_sb),
992 group_bh, bits_wanted,
7bf72ede 993 le16_to_cpu(bg->bg_bits),
ccd979bd
MF
994 bit_off, bits_found);
995
996 return ret;
997}
998
883d4cae 999static int ocfs2_alloc_dinode_update_counts(struct inode *inode,
1fabe148 1000 handle_t *handle,
883d4cae
MF
1001 struct buffer_head *di_bh,
1002 u32 num_bits,
1003 u16 chain)
1004{
1005 int ret;
1006 u32 tmp_used;
1007 struct ocfs2_dinode *di = (struct ocfs2_dinode *) di_bh->b_data;
1008 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &di->id2.i_chain;
1009
1010 ret = ocfs2_journal_access(handle, inode, di_bh,
1011 OCFS2_JOURNAL_ACCESS_WRITE);
1012 if (ret < 0) {
1013 mlog_errno(ret);
1014 goto out;
1015 }
1016
1017 tmp_used = le32_to_cpu(di->id1.bitmap1.i_used);
1018 di->id1.bitmap1.i_used = cpu_to_le32(num_bits + tmp_used);
1019 le32_add_cpu(&cl->cl_recs[chain].c_free, -num_bits);
1020
1021 ret = ocfs2_journal_dirty(handle, di_bh);
1022 if (ret < 0)
1023 mlog_errno(ret);
1024
1025out:
1026 return ret;
1027}
1028
1029static int ocfs2_search_one_group(struct ocfs2_alloc_context *ac,
1fabe148 1030 handle_t *handle,
883d4cae
MF
1031 u32 bits_wanted,
1032 u32 min_bits,
1033 u16 *bit_off,
1034 unsigned int *num_bits,
1035 u64 gd_blkno,
1036 u16 *bits_left)
1037{
1038 int ret;
1039 u16 found;
1040 struct buffer_head *group_bh = NULL;
1041 struct ocfs2_group_desc *gd;
1042 struct inode *alloc_inode = ac->ac_inode;
883d4cae
MF
1043
1044 ret = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb), gd_blkno,
1045 &group_bh, OCFS2_BH_CACHED, alloc_inode);
1046 if (ret < 0) {
1047 mlog_errno(ret);
1048 return ret;
1049 }
1050
1051 gd = (struct ocfs2_group_desc *) group_bh->b_data;
1052 if (!OCFS2_IS_VALID_GROUP_DESC(gd)) {
1053 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, gd);
1054 ret = -EIO;
1055 goto out;
1056 }
1057
1058 ret = ac->ac_group_search(alloc_inode, group_bh, bits_wanted, min_bits,
1059 bit_off, &found);
1060 if (ret < 0) {
1061 if (ret != -ENOSPC)
1062 mlog_errno(ret);
1063 goto out;
1064 }
1065
1066 *num_bits = found;
1067
1068 ret = ocfs2_alloc_dinode_update_counts(alloc_inode, handle, ac->ac_bh,
1069 *num_bits,
1070 le16_to_cpu(gd->bg_chain));
1071 if (ret < 0) {
1072 mlog_errno(ret);
1073 goto out;
1074 }
1075
1076 ret = ocfs2_block_group_set_bits(handle, alloc_inode, gd, group_bh,
1077 *bit_off, *num_bits);
1078 if (ret < 0)
1079 mlog_errno(ret);
1080
1081 *bits_left = le16_to_cpu(gd->bg_free_bits_count);
1082
1083out:
1084 brelse(group_bh);
1085
1086 return ret;
1087}
1088
ccd979bd 1089static int ocfs2_search_chain(struct ocfs2_alloc_context *ac,
1fabe148 1090 handle_t *handle,
ccd979bd
MF
1091 u32 bits_wanted,
1092 u32 min_bits,
1093 u16 *bit_off,
1094 unsigned int *num_bits,
883d4cae
MF
1095 u64 *bg_blkno,
1096 u16 *bits_left)
ccd979bd
MF
1097{
1098 int status;
1099 u16 chain, tmp_bits;
1100 u32 tmp_used;
1101 u64 next_group;
ccd979bd
MF
1102 struct inode *alloc_inode = ac->ac_inode;
1103 struct buffer_head *group_bh = NULL;
1104 struct buffer_head *prev_group_bh = NULL;
1105 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1106 struct ocfs2_chain_list *cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1107 struct ocfs2_group_desc *bg;
1108
1109 chain = ac->ac_chain;
b0697053
MF
1110 mlog(0, "trying to alloc %u bits from chain %u, inode %llu\n",
1111 bits_wanted, chain,
1112 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno);
ccd979bd
MF
1113
1114 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
1115 le64_to_cpu(cl->cl_recs[chain].c_blkno),
1116 &group_bh, OCFS2_BH_CACHED, alloc_inode);
1117 if (status < 0) {
1118 mlog_errno(status);
1119 goto bail;
1120 }
1121 bg = (struct ocfs2_group_desc *) group_bh->b_data;
7bf72ede
MF
1122 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1123 if (status) {
1124 mlog_errno(status);
ccd979bd
MF
1125 goto bail;
1126 }
1127
1128 status = -ENOSPC;
1129 /* for now, the chain search is a bit simplistic. We just use
1130 * the 1st group with any empty bits. */
1131 while ((status = ac->ac_group_search(alloc_inode, group_bh,
1132 bits_wanted, min_bits, bit_off,
1133 &tmp_bits)) == -ENOSPC) {
1134 if (!bg->bg_next_group)
1135 break;
1136
1137 if (prev_group_bh) {
1138 brelse(prev_group_bh);
1139 prev_group_bh = NULL;
1140 }
1141 next_group = le64_to_cpu(bg->bg_next_group);
1142 prev_group_bh = group_bh;
1143 group_bh = NULL;
1144 status = ocfs2_read_block(OCFS2_SB(alloc_inode->i_sb),
1145 next_group, &group_bh,
1146 OCFS2_BH_CACHED, alloc_inode);
1147 if (status < 0) {
1148 mlog_errno(status);
1149 goto bail;
1150 }
1151 bg = (struct ocfs2_group_desc *) group_bh->b_data;
7bf72ede
MF
1152 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, bg);
1153 if (status) {
1154 mlog_errno(status);
ccd979bd
MF
1155 goto bail;
1156 }
1157 }
1158 if (status < 0) {
1159 if (status != -ENOSPC)
1160 mlog_errno(status);
1161 goto bail;
1162 }
1163
b0697053 1164 mlog(0, "alloc succeeds: we give %u bits from block group %llu\n",
1ca1a111 1165 tmp_bits, (unsigned long long)le64_to_cpu(bg->bg_blkno));
ccd979bd
MF
1166
1167 *num_bits = tmp_bits;
1168
1169 BUG_ON(*num_bits == 0);
1170
1171 /*
1172 * Keep track of previous block descriptor read. When
1173 * we find a target, if we have read more than X
1174 * number of descriptors, and the target is reasonably
1175 * empty, relink him to top of his chain.
1176 *
1177 * We've read 0 extra blocks and only send one more to
1178 * the transaction, yet the next guy to search has a
1179 * much easier time.
1180 *
1181 * Do this *after* figuring out how many bits we're taking out
1182 * of our target group.
1183 */
1184 if (ac->ac_allow_chain_relink &&
1185 (prev_group_bh) &&
1186 (ocfs2_block_group_reasonably_empty(bg, *num_bits))) {
1187 status = ocfs2_relink_block_group(handle, alloc_inode,
1188 ac->ac_bh, group_bh,
1189 prev_group_bh, chain);
1190 if (status < 0) {
1191 mlog_errno(status);
1192 goto bail;
1193 }
1194 }
1195
1196 /* Ok, claim our bits now: set the info on dinode, chainlist
1197 * and then the group */
1198 status = ocfs2_journal_access(handle,
1199 alloc_inode,
1200 ac->ac_bh,
1201 OCFS2_JOURNAL_ACCESS_WRITE);
1202 if (status < 0) {
1203 mlog_errno(status);
1204 goto bail;
1205 }
1206
1207 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1208 fe->id1.bitmap1.i_used = cpu_to_le32(*num_bits + tmp_used);
1209 le32_add_cpu(&cl->cl_recs[chain].c_free, -(*num_bits));
1210
1211 status = ocfs2_journal_dirty(handle,
1212 ac->ac_bh);
1213 if (status < 0) {
1214 mlog_errno(status);
1215 goto bail;
1216 }
1217
1218 status = ocfs2_block_group_set_bits(handle,
1219 alloc_inode,
1220 bg,
1221 group_bh,
1222 *bit_off,
1223 *num_bits);
1224 if (status < 0) {
1225 mlog_errno(status);
1226 goto bail;
1227 }
1228
b0697053 1229 mlog(0, "Allocated %u bits from suballocator %llu\n", *num_bits,
1ca1a111 1230 (unsigned long long)le64_to_cpu(fe->i_blkno));
ccd979bd
MF
1231
1232 *bg_blkno = le64_to_cpu(bg->bg_blkno);
883d4cae 1233 *bits_left = le16_to_cpu(bg->bg_free_bits_count);
ccd979bd
MF
1234bail:
1235 if (group_bh)
1236 brelse(group_bh);
1237 if (prev_group_bh)
1238 brelse(prev_group_bh);
1239
1240 mlog_exit(status);
1241 return status;
1242}
1243
1244/* will give out up to bits_wanted contiguous bits. */
1245static int ocfs2_claim_suballoc_bits(struct ocfs2_super *osb,
1246 struct ocfs2_alloc_context *ac,
1fabe148 1247 handle_t *handle,
ccd979bd
MF
1248 u32 bits_wanted,
1249 u32 min_bits,
1250 u16 *bit_off,
1251 unsigned int *num_bits,
1252 u64 *bg_blkno)
1253{
1254 int status;
1255 u16 victim, i;
883d4cae
MF
1256 u16 bits_left = 0;
1257 u64 hint_blkno = ac->ac_last_group;
ccd979bd
MF
1258 struct ocfs2_chain_list *cl;
1259 struct ocfs2_dinode *fe;
1260
1261 mlog_entry_void();
1262
1263 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1264 BUG_ON(bits_wanted > (ac->ac_bits_wanted - ac->ac_bits_given));
1265 BUG_ON(!ac->ac_bh);
1266
1267 fe = (struct ocfs2_dinode *) ac->ac_bh->b_data;
1268 if (!OCFS2_IS_VALID_DINODE(fe)) {
1269 OCFS2_RO_ON_INVALID_DINODE(osb->sb, fe);
1270 status = -EIO;
1271 goto bail;
1272 }
1273 if (le32_to_cpu(fe->id1.bitmap1.i_used) >=
1274 le32_to_cpu(fe->id1.bitmap1.i_total)) {
b0697053
MF
1275 ocfs2_error(osb->sb, "Chain allocator dinode %llu has %u used "
1276 "bits but only %u total.",
1277 (unsigned long long)le64_to_cpu(fe->i_blkno),
ccd979bd
MF
1278 le32_to_cpu(fe->id1.bitmap1.i_used),
1279 le32_to_cpu(fe->id1.bitmap1.i_total));
1280 status = -EIO;
1281 goto bail;
1282 }
1283
883d4cae
MF
1284 if (hint_blkno) {
1285 /* Attempt to short-circuit the usual search mechanism
1286 * by jumping straight to the most recently used
1287 * allocation group. This helps us mantain some
1288 * contiguousness across allocations. */
da5cbf2f
MF
1289 status = ocfs2_search_one_group(ac, handle, bits_wanted,
1290 min_bits, bit_off, num_bits,
883d4cae
MF
1291 hint_blkno, &bits_left);
1292 if (!status) {
1293 /* Be careful to update *bg_blkno here as the
1294 * caller is expecting it to be filled in, and
1295 * ocfs2_search_one_group() won't do that for
1296 * us. */
1297 *bg_blkno = hint_blkno;
1298 goto set_hint;
1299 }
1300 if (status < 0 && status != -ENOSPC) {
1301 mlog_errno(status);
1302 goto bail;
1303 }
1304 }
1305
ccd979bd
MF
1306 cl = (struct ocfs2_chain_list *) &fe->id2.i_chain;
1307
1308 victim = ocfs2_find_victim_chain(cl);
1309 ac->ac_chain = victim;
1310 ac->ac_allow_chain_relink = 1;
1311
da5cbf2f 1312 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits, bit_off,
883d4cae 1313 num_bits, bg_blkno, &bits_left);
ccd979bd 1314 if (!status)
883d4cae 1315 goto set_hint;
ccd979bd
MF
1316 if (status < 0 && status != -ENOSPC) {
1317 mlog_errno(status);
1318 goto bail;
1319 }
1320
1321 mlog(0, "Search of victim chain %u came up with nothing, "
1322 "trying all chains now.\n", victim);
1323
1324 /* If we didn't pick a good victim, then just default to
1325 * searching each chain in order. Don't allow chain relinking
1326 * because we only calculate enough journal credits for one
1327 * relink per alloc. */
1328 ac->ac_allow_chain_relink = 0;
1329 for (i = 0; i < le16_to_cpu(cl->cl_next_free_rec); i ++) {
1330 if (i == victim)
1331 continue;
1332 if (!cl->cl_recs[i].c_free)
1333 continue;
1334
1335 ac->ac_chain = i;
da5cbf2f 1336 status = ocfs2_search_chain(ac, handle, bits_wanted, min_bits,
883d4cae
MF
1337 bit_off, num_bits, bg_blkno,
1338 &bits_left);
ccd979bd
MF
1339 if (!status)
1340 break;
1341 if (status < 0 && status != -ENOSPC) {
1342 mlog_errno(status);
1343 goto bail;
1344 }
1345 }
ccd979bd 1346
883d4cae
MF
1347set_hint:
1348 if (status != -ENOSPC) {
1349 /* If the next search of this group is not likely to
1350 * yield a suitable extent, then we reset the last
1351 * group hint so as to not waste a disk read */
1352 if (bits_left < min_bits)
1353 ac->ac_last_group = 0;
1354 else
1355 ac->ac_last_group = *bg_blkno;
1356 }
1357
1358bail:
ccd979bd
MF
1359 mlog_exit(status);
1360 return status;
1361}
1362
1363int ocfs2_claim_metadata(struct ocfs2_super *osb,
1fabe148 1364 handle_t *handle,
ccd979bd
MF
1365 struct ocfs2_alloc_context *ac,
1366 u32 bits_wanted,
1367 u16 *suballoc_bit_start,
1368 unsigned int *num_bits,
1369 u64 *blkno_start)
1370{
1371 int status;
1372 u64 bg_blkno;
1373
1374 BUG_ON(!ac);
1375 BUG_ON(ac->ac_bits_wanted < (ac->ac_bits_given + bits_wanted));
1376 BUG_ON(ac->ac_which != OCFS2_AC_USE_META);
ccd979bd
MF
1377
1378 status = ocfs2_claim_suballoc_bits(osb,
1379 ac,
da5cbf2f 1380 handle,
ccd979bd
MF
1381 bits_wanted,
1382 1,
1383 suballoc_bit_start,
1384 num_bits,
1385 &bg_blkno);
1386 if (status < 0) {
1387 mlog_errno(status);
1388 goto bail;
1389 }
1390 atomic_inc(&osb->alloc_stats.bg_allocs);
1391
1392 *blkno_start = bg_blkno + (u64) *suballoc_bit_start;
1393 ac->ac_bits_given += (*num_bits);
1394 status = 0;
1395bail:
1396 mlog_exit(status);
1397 return status;
1398}
1399
1400int ocfs2_claim_new_inode(struct ocfs2_super *osb,
1fabe148 1401 handle_t *handle,
ccd979bd
MF
1402 struct ocfs2_alloc_context *ac,
1403 u16 *suballoc_bit,
1404 u64 *fe_blkno)
1405{
1406 int status;
1407 unsigned int num_bits;
1408 u64 bg_blkno;
1409
1410 mlog_entry_void();
1411
1412 BUG_ON(!ac);
1413 BUG_ON(ac->ac_bits_given != 0);
1414 BUG_ON(ac->ac_bits_wanted != 1);
1415 BUG_ON(ac->ac_which != OCFS2_AC_USE_INODE);
ccd979bd
MF
1416
1417 status = ocfs2_claim_suballoc_bits(osb,
1418 ac,
da5cbf2f 1419 handle,
ccd979bd
MF
1420 1,
1421 1,
1422 suballoc_bit,
1423 &num_bits,
1424 &bg_blkno);
1425 if (status < 0) {
1426 mlog_errno(status);
1427 goto bail;
1428 }
1429 atomic_inc(&osb->alloc_stats.bg_allocs);
1430
1431 BUG_ON(num_bits != 1);
1432
1433 *fe_blkno = bg_blkno + (u64) (*suballoc_bit);
1434 ac->ac_bits_given++;
1435 status = 0;
1436bail:
1437 mlog_exit(status);
1438 return status;
1439}
1440
1441/* translate a group desc. blkno and it's bitmap offset into
1442 * disk cluster offset. */
1443static inline u32 ocfs2_desc_bitmap_to_cluster_off(struct inode *inode,
1444 u64 bg_blkno,
1445 u16 bg_bit_off)
1446{
1447 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1448 u32 cluster = 0;
1449
1450 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1451
1452 if (bg_blkno != osb->first_cluster_group_blkno)
1453 cluster = ocfs2_blocks_to_clusters(inode->i_sb, bg_blkno);
1454 cluster += (u32) bg_bit_off;
1455 return cluster;
1456}
1457
1458/* given a cluster offset, calculate which block group it belongs to
1459 * and return that block offset. */
1460static inline u64 ocfs2_which_cluster_group(struct inode *inode,
1461 u32 cluster)
1462{
1463 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1464 u32 group_no;
1465
1466 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1467
1468 group_no = cluster / osb->bitmap_cpg;
1469 if (!group_no)
1470 return osb->first_cluster_group_blkno;
1471 return ocfs2_clusters_to_blocks(inode->i_sb,
1472 group_no * osb->bitmap_cpg);
1473}
1474
1475/* given the block number of a cluster start, calculate which cluster
1476 * group and descriptor bitmap offset that corresponds to. */
1477static inline void ocfs2_block_to_cluster_group(struct inode *inode,
1478 u64 data_blkno,
1479 u64 *bg_blkno,
1480 u16 *bg_bit_off)
1481{
1482 struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
1483 u32 data_cluster = ocfs2_blocks_to_clusters(osb->sb, data_blkno);
1484
1485 BUG_ON(!ocfs2_is_cluster_bitmap(inode));
1486
1487 *bg_blkno = ocfs2_which_cluster_group(inode,
1488 data_cluster);
1489
1490 if (*bg_blkno == osb->first_cluster_group_blkno)
1491 *bg_bit_off = (u16) data_cluster;
1492 else
1493 *bg_bit_off = (u16) ocfs2_blocks_to_clusters(osb->sb,
1494 data_blkno - *bg_blkno);
1495}
1496
1497/*
1498 * min_bits - minimum contiguous chunk from this total allocation we
1499 * can handle. set to what we asked for originally for a full
1500 * contig. allocation, set to '1' to indicate we can deal with extents
1501 * of any size.
1502 */
1503int ocfs2_claim_clusters(struct ocfs2_super *osb,
1fabe148 1504 handle_t *handle,
ccd979bd
MF
1505 struct ocfs2_alloc_context *ac,
1506 u32 min_clusters,
1507 u32 *cluster_start,
1508 u32 *num_clusters)
1509{
1510 int status;
1511 unsigned int bits_wanted = ac->ac_bits_wanted - ac->ac_bits_given;
883d4cae 1512 u64 bg_blkno = 0;
ccd979bd
MF
1513 u16 bg_bit_off;
1514
1515 mlog_entry_void();
1516
1517 BUG_ON(!ac);
1518 BUG_ON(ac->ac_bits_given >= ac->ac_bits_wanted);
1519
1520 BUG_ON(ac->ac_which != OCFS2_AC_USE_LOCAL
1521 && ac->ac_which != OCFS2_AC_USE_MAIN);
ccd979bd
MF
1522
1523 if (ac->ac_which == OCFS2_AC_USE_LOCAL) {
1524 status = ocfs2_claim_local_alloc_bits(osb,
1525 handle,
1526 ac,
1527 bits_wanted,
1528 cluster_start,
1529 num_clusters);
1530 if (!status)
1531 atomic_inc(&osb->alloc_stats.local_data);
1532 } else {
1533 if (min_clusters > (osb->bitmap_cpg - 1)) {
1534 /* The only paths asking for contiguousness
1535 * should know about this already. */
1536 mlog(ML_ERROR, "minimum allocation requested exceeds "
1537 "group bitmap size!");
1538 status = -ENOSPC;
1539 goto bail;
1540 }
1541 /* clamp the current request down to a realistic size. */
1542 if (bits_wanted > (osb->bitmap_cpg - 1))
1543 bits_wanted = osb->bitmap_cpg - 1;
1544
1545 status = ocfs2_claim_suballoc_bits(osb,
1546 ac,
da5cbf2f 1547 handle,
ccd979bd
MF
1548 bits_wanted,
1549 min_clusters,
1550 &bg_bit_off,
1551 num_clusters,
1552 &bg_blkno);
1553 if (!status) {
1554 *cluster_start =
1555 ocfs2_desc_bitmap_to_cluster_off(ac->ac_inode,
1556 bg_blkno,
1557 bg_bit_off);
1558 atomic_inc(&osb->alloc_stats.bitmap_data);
1559 }
1560 }
1561 if (status < 0) {
1562 if (status != -ENOSPC)
1563 mlog_errno(status);
1564 goto bail;
1565 }
1566
1567 ac->ac_bits_given += *num_clusters;
1568
1569bail:
1570 mlog_exit(status);
1571 return status;
1572}
1573
1fabe148 1574static inline int ocfs2_block_group_clear_bits(handle_t *handle,
ccd979bd
MF
1575 struct inode *alloc_inode,
1576 struct ocfs2_group_desc *bg,
1577 struct buffer_head *group_bh,
1578 unsigned int bit_off,
1579 unsigned int num_bits)
1580{
1581 int status;
1582 unsigned int tmp;
1583 int journal_type = OCFS2_JOURNAL_ACCESS_WRITE;
1584 struct ocfs2_group_desc *undo_bg = NULL;
1585
1586 mlog_entry_void();
1587
1588 if (!OCFS2_IS_VALID_GROUP_DESC(bg)) {
1589 OCFS2_RO_ON_INVALID_GROUP_DESC(alloc_inode->i_sb, bg);
1590 status = -EIO;
1591 goto bail;
1592 }
1593
1594 mlog(0, "off = %u, num = %u\n", bit_off, num_bits);
1595
1596 if (ocfs2_is_cluster_bitmap(alloc_inode))
1597 journal_type = OCFS2_JOURNAL_ACCESS_UNDO;
1598
1599 status = ocfs2_journal_access(handle, alloc_inode, group_bh,
1600 journal_type);
1601 if (status < 0) {
1602 mlog_errno(status);
1603 goto bail;
1604 }
1605
1606 if (ocfs2_is_cluster_bitmap(alloc_inode))
1607 undo_bg = (struct ocfs2_group_desc *) bh2jh(group_bh)->b_committed_data;
1608
1609 tmp = num_bits;
1610 while(tmp--) {
1611 ocfs2_clear_bit((bit_off + tmp),
1612 (unsigned long *) bg->bg_bitmap);
1613 if (ocfs2_is_cluster_bitmap(alloc_inode))
1614 ocfs2_set_bit(bit_off + tmp,
1615 (unsigned long *) undo_bg->bg_bitmap);
1616 }
1617 le16_add_cpu(&bg->bg_free_bits_count, num_bits);
1618
1619 status = ocfs2_journal_dirty(handle, group_bh);
1620 if (status < 0)
1621 mlog_errno(status);
1622bail:
1623 return status;
1624}
1625
1626/*
1627 * expects the suballoc inode to already be locked.
1628 */
1fabe148 1629static int ocfs2_free_suballoc_bits(handle_t *handle,
ccd979bd
MF
1630 struct inode *alloc_inode,
1631 struct buffer_head *alloc_bh,
1632 unsigned int start_bit,
1633 u64 bg_blkno,
1634 unsigned int count)
1635{
1636 int status = 0;
1637 u32 tmp_used;
1638 struct ocfs2_super *osb = OCFS2_SB(alloc_inode->i_sb);
1639 struct ocfs2_dinode *fe = (struct ocfs2_dinode *) alloc_bh->b_data;
1640 struct ocfs2_chain_list *cl = &fe->id2.i_chain;
1641 struct buffer_head *group_bh = NULL;
1642 struct ocfs2_group_desc *group;
1643
1644 mlog_entry_void();
1645
1646 if (!OCFS2_IS_VALID_DINODE(fe)) {
1647 OCFS2_RO_ON_INVALID_DINODE(alloc_inode->i_sb, fe);
1648 status = -EIO;
1649 goto bail;
1650 }
1651 BUG_ON((count + start_bit) > ocfs2_bits_per_group(cl));
1652
b0697053
MF
1653 mlog(0, "%llu: freeing %u bits from group %llu, starting at %u\n",
1654 (unsigned long long)OCFS2_I(alloc_inode)->ip_blkno, count,
1655 (unsigned long long)bg_blkno, start_bit);
ccd979bd
MF
1656
1657 status = ocfs2_read_block(osb, bg_blkno, &group_bh, OCFS2_BH_CACHED,
1658 alloc_inode);
1659 if (status < 0) {
1660 mlog_errno(status);
1661 goto bail;
1662 }
1663
1664 group = (struct ocfs2_group_desc *) group_bh->b_data;
7bf72ede
MF
1665 status = ocfs2_check_group_descriptor(alloc_inode->i_sb, fe, group);
1666 if (status) {
1667 mlog_errno(status);
ccd979bd
MF
1668 goto bail;
1669 }
1670 BUG_ON((count + start_bit) > le16_to_cpu(group->bg_bits));
1671
1672 status = ocfs2_block_group_clear_bits(handle, alloc_inode,
1673 group, group_bh,
1674 start_bit, count);
1675 if (status < 0) {
1676 mlog_errno(status);
1677 goto bail;
1678 }
1679
1680 status = ocfs2_journal_access(handle, alloc_inode, alloc_bh,
1681 OCFS2_JOURNAL_ACCESS_WRITE);
1682 if (status < 0) {
1683 mlog_errno(status);
1684 goto bail;
1685 }
1686
1687 le32_add_cpu(&cl->cl_recs[le16_to_cpu(group->bg_chain)].c_free,
1688 count);
1689 tmp_used = le32_to_cpu(fe->id1.bitmap1.i_used);
1690 fe->id1.bitmap1.i_used = cpu_to_le32(tmp_used - count);
1691
1692 status = ocfs2_journal_dirty(handle, alloc_bh);
1693 if (status < 0) {
1694 mlog_errno(status);
1695 goto bail;
1696 }
1697
1698bail:
1699 if (group_bh)
1700 brelse(group_bh);
1701
1702 mlog_exit(status);
1703 return status;
1704}
1705
1706static inline u64 ocfs2_which_suballoc_group(u64 block, unsigned int bit)
1707{
1708 u64 group = block - (u64) bit;
1709
1710 return group;
1711}
1712
1fabe148 1713int ocfs2_free_dinode(handle_t *handle,
ccd979bd
MF
1714 struct inode *inode_alloc_inode,
1715 struct buffer_head *inode_alloc_bh,
1716 struct ocfs2_dinode *di)
1717{
1718 u64 blk = le64_to_cpu(di->i_blkno);
1719 u16 bit = le16_to_cpu(di->i_suballoc_bit);
1720 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1721
1722 return ocfs2_free_suballoc_bits(handle, inode_alloc_inode,
1723 inode_alloc_bh, bit, bg_blkno, 1);
1724}
1725
1fabe148 1726int ocfs2_free_extent_block(handle_t *handle,
ccd979bd
MF
1727 struct inode *eb_alloc_inode,
1728 struct buffer_head *eb_alloc_bh,
1729 struct ocfs2_extent_block *eb)
1730{
1731 u64 blk = le64_to_cpu(eb->h_blkno);
1732 u16 bit = le16_to_cpu(eb->h_suballoc_bit);
1733 u64 bg_blkno = ocfs2_which_suballoc_group(blk, bit);
1734
1735 return ocfs2_free_suballoc_bits(handle, eb_alloc_inode, eb_alloc_bh,
1736 bit, bg_blkno, 1);
1737}
1738
1fabe148 1739int ocfs2_free_clusters(handle_t *handle,
ccd979bd
MF
1740 struct inode *bitmap_inode,
1741 struct buffer_head *bitmap_bh,
1742 u64 start_blk,
1743 unsigned int num_clusters)
1744{
1745 int status;
1746 u16 bg_start_bit;
1747 u64 bg_blkno;
1748 struct ocfs2_dinode *fe;
1749
1750 /* You can't ever have a contiguous set of clusters
1751 * bigger than a block group bitmap so we never have to worry
1752 * about looping on them. */
1753
1754 mlog_entry_void();
1755
1756 /* This is expensive. We can safely remove once this stuff has
1757 * gotten tested really well. */
1758 BUG_ON(start_blk != ocfs2_clusters_to_blocks(bitmap_inode->i_sb, ocfs2_blocks_to_clusters(bitmap_inode->i_sb, start_blk)));
1759
1760 fe = (struct ocfs2_dinode *) bitmap_bh->b_data;
1761
1762 ocfs2_block_to_cluster_group(bitmap_inode, start_blk, &bg_blkno,
1763 &bg_start_bit);
1764
b0697053
MF
1765 mlog(0, "want to free %u clusters starting at block %llu\n",
1766 num_clusters, (unsigned long long)start_blk);
1767 mlog(0, "bg_blkno = %llu, bg_start_bit = %u\n",
1768 (unsigned long long)bg_blkno, bg_start_bit);
ccd979bd
MF
1769
1770 status = ocfs2_free_suballoc_bits(handle, bitmap_inode, bitmap_bh,
1771 bg_start_bit, bg_blkno,
1772 num_clusters);
1773 if (status < 0)
1774 mlog_errno(status);
1775
1776 mlog_exit(status);
1777 return status;
1778}
1779
1780static inline void ocfs2_debug_bg(struct ocfs2_group_desc *bg)
1781{
1782 printk("Block Group:\n");
1783 printk("bg_signature: %s\n", bg->bg_signature);
1784 printk("bg_size: %u\n", bg->bg_size);
1785 printk("bg_bits: %u\n", bg->bg_bits);
1786 printk("bg_free_bits_count: %u\n", bg->bg_free_bits_count);
1787 printk("bg_chain: %u\n", bg->bg_chain);
1788 printk("bg_generation: %u\n", le32_to_cpu(bg->bg_generation));
b0697053
MF
1789 printk("bg_next_group: %llu\n",
1790 (unsigned long long)bg->bg_next_group);
1791 printk("bg_parent_dinode: %llu\n",
1792 (unsigned long long)bg->bg_parent_dinode);
1793 printk("bg_blkno: %llu\n",
1794 (unsigned long long)bg->bg_blkno);
ccd979bd
MF
1795}
1796
1797static inline void ocfs2_debug_suballoc_inode(struct ocfs2_dinode *fe)
1798{
1799 int i;
1800
b0697053 1801 printk("Suballoc Inode %llu:\n", (unsigned long long)fe->i_blkno);
ccd979bd 1802 printk("i_signature: %s\n", fe->i_signature);
b0697053
MF
1803 printk("i_size: %llu\n",
1804 (unsigned long long)fe->i_size);
ccd979bd
MF
1805 printk("i_clusters: %u\n", fe->i_clusters);
1806 printk("i_generation: %u\n",
1807 le32_to_cpu(fe->i_generation));
1808 printk("id1.bitmap1.i_used: %u\n",
1809 le32_to_cpu(fe->id1.bitmap1.i_used));
1810 printk("id1.bitmap1.i_total: %u\n",
1811 le32_to_cpu(fe->id1.bitmap1.i_total));
1812 printk("id2.i_chain.cl_cpg: %u\n", fe->id2.i_chain.cl_cpg);
1813 printk("id2.i_chain.cl_bpc: %u\n", fe->id2.i_chain.cl_bpc);
1814 printk("id2.i_chain.cl_count: %u\n", fe->id2.i_chain.cl_count);
1815 printk("id2.i_chain.cl_next_free_rec: %u\n",
1816 fe->id2.i_chain.cl_next_free_rec);
1817 for(i = 0; i < fe->id2.i_chain.cl_next_free_rec; i++) {
1818 printk("fe->id2.i_chain.cl_recs[%d].c_free: %u\n", i,
1819 fe->id2.i_chain.cl_recs[i].c_free);
1820 printk("fe->id2.i_chain.cl_recs[%d].c_total: %u\n", i,
1821 fe->id2.i_chain.cl_recs[i].c_total);
b0697053
MF
1822 printk("fe->id2.i_chain.cl_recs[%d].c_blkno: %llu\n", i,
1823 (unsigned long long)fe->id2.i_chain.cl_recs[i].c_blkno);
ccd979bd
MF
1824 }
1825}
This page took 0.25827 seconds and 5 git commands to generate.