return err;
}
+/*
+ * lock the group_info alloc_sem of all the groups
+ * belonging to the same buddy cache page. This
+ * make sure other parallel operation on the buddy
+ * cache doesn't happen whild holding the buddy cache
+ * lock
+ */
+static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
+ ext4_group_t group)
+{
+ int i;
+ int block, pnum;
+ int blocks_per_page;
+ int groups_per_page;
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
+ ext4_group_t first_group;
+ struct ext4_group_info *grp;
+
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+
+ groups_per_page = blocks_per_page >> 1;
+ if (groups_per_page == 0)
+ groups_per_page = 1;
+ /* read all groups the page covers into the cache */
+ for (i = 0; i < groups_per_page; i++) {
+
+ if ((first_group + i) >= ngroups)
+ break;
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ down_write_nested(&grp->alloc_sem, i);
+ }
+ return i;
+}
+
+static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
+ ext4_group_t group, int locked_group)
+{
+ int i;
+ int block, pnum;
+ int blocks_per_page;
+ ext4_group_t first_group;
+ struct ext4_group_info *grp;
+
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+ /* release locks on all the groups */
+ for (i = 0; i < locked_group; i++) {
+
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ up_write(&grp->alloc_sem);
+ }
+
+}
+
/*
* Locking note: This routine calls ext4_mb_init_cache(), which takes the
* block group lock of all groups for this page; do not hold the BG lock when
return 0;
}
-/*
- * lock the group_info alloc_sem of all the groups
- * belonging to the same buddy cache page. This
- * make sure other parallel operation on the buddy
- * cache doesn't happen whild holding the buddy cache
- * lock
- */
-int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
-{
- int i;
- int block, pnum;
- int blocks_per_page;
- int groups_per_page;
- ext4_group_t ngroups = ext4_get_groups_count(sb);
- ext4_group_t first_group;
- struct ext4_group_info *grp;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- first_group = pnum * blocks_per_page / 2;
-
- groups_per_page = blocks_per_page >> 1;
- if (groups_per_page == 0)
- groups_per_page = 1;
- /* read all groups the page covers into the cache */
- for (i = 0; i < groups_per_page; i++) {
-
- if ((first_group + i) >= ngroups)
- break;
- grp = ext4_get_group_info(sb, first_group + i);
- /* take all groups write allocation
- * semaphore. This make sure there is
- * no block allocation going on in any
- * of that groups
- */
- down_write_nested(&grp->alloc_sem, i);
- }
- return i;
-}
-
-void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
- ext4_group_t group, int locked_group)
-{
- int i;
- int block, pnum;
- int blocks_per_page;
- ext4_group_t first_group;
- struct ext4_group_info *grp;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- first_group = pnum * blocks_per_page / 2;
- /* release locks on all the groups */
- for (i = 0; i < locked_group; i++) {
-
- grp = ext4_get_group_info(sb, first_group + i);
- /* take all groups write allocation
- * semaphore. This make sure there is
- * no block allocation going on in any
- * of that groups
- */
- up_write(&grp->alloc_sem);
- }
-
-}
-
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
printk(KERN_ERR "EXT4-fs: can't get new inode\n");
goto err_freesgi;
}
+ sbi->s_buddy_cache->i_ino = get_next_ino();
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
for (i = 0; i < ngroups; i++) {
desc = ext4_get_group_desc(sb, i, NULL);
return 0;
}
-static inline void ext4_issue_discard(struct super_block *sb,
+static inline int ext4_issue_discard(struct super_block *sb,
ext4_group_t block_group, ext4_grpblk_t block, int count)
{
int ret;
discard_block = block + ext4_group_first_block_no(sb, block_group);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- ret = sb_issue_discard(sb, discard_block, count);
+ ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
if (ret == -EOPNOTSUPP) {
ext4_warning(sb, "discard not supported, disabling");
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
}
+ return ret;
}
/*
#endif
-int __init init_ext4_mballoc(void)
+int __init ext4_init_mballoc(void)
{
- ext4_pspace_cachep =
- kmem_cache_create("ext4_prealloc_space",
- sizeof(struct ext4_prealloc_space),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_pspace_cachep == NULL)
return -ENOMEM;
- ext4_ac_cachep =
- kmem_cache_create("ext4_alloc_context",
- sizeof(struct ext4_allocation_context),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_ac_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
return -ENOMEM;
}
- ext4_free_ext_cachep =
- kmem_cache_create("ext4_free_block_extents",
- sizeof(struct ext4_free_data),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_free_ext_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
return 0;
}
-void exit_ext4_mballoc(void)
+void ext4_exit_mballoc(void)
{
int i;
/*
*/
static noinline_for_stack int
ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
+ struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
end = bit + pa->pa_len;
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = pa->pa_inode;
- }
-
while (bit < end) {
bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
if (bit >= end)
(unsigned) next - bit, (unsigned) group);
free += next - bit;
- if (ac) {
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = next - bit;
- ac->ac_b_ex.fe_logical = 0;
- trace_ext4_mballoc_discard(ac);
- }
-
- trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
- next - bit);
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
+ trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
+ grp_blk_start + bit, next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
static noinline_for_stack int
ext4_mb_release_group_pa(struct ext4_buddy *e4b,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
+ struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
ext4_group_t group;
ext4_grpblk_t bit;
- trace_ext4_mb_release_group_pa(sb, ac, pa);
+ trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = NULL;
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = pa->pa_len;
- ac->ac_b_ex.fe_logical = 0;
- trace_ext4_mballoc_discard(ac);
- }
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
return 0;
}
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
struct list_head list;
struct ext4_buddy e4b;
int err;
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
INIT_LIST_HEAD(&list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac)
- ac->ac_sb = sb;
repeat:
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
spin_unlock(pa->pa_obj_lock);
if (pa->pa_type == MB_GROUP_PA)
- ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_mb_release_group_pa(&e4b, pa);
else
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
out:
ext4_unlock_group(sb, group);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
ext4_mb_unload_buddy(&e4b);
put_bh(bitmap_bh);
return free;
struct super_block *sb = inode->i_sb;
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
ext4_group_t group = 0;
struct list_head list;
struct ext4_buddy e4b;
INIT_LIST_HEAD(&list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = inode;
- }
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
}
/*
struct ext4_buddy e4b;
struct list_head discard_list;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
mb_debug(1, "discard locality group preallocation\n");
INIT_LIST_HEAD(&discard_list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac)
- ac->ac_sb = sb;
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
}
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_mb_release_group_pa(&e4b, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
}
/*
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
- struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
unsigned long freed = 0;
unsigned int overflow;
if (!bh)
tbh = sb_find_get_block(inode->i_sb,
block + i);
+ if (unlikely(!tbh))
+ continue;
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
inode, tbh, block + i);
}
if (!ext4_should_writeback_data(inode))
flags |= EXT4_FREE_BLOCKS_METADATA;
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac) {
- ac->ac_inode = inode;
- ac->ac_sb = sb;
- }
-
do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
}
#endif
- if (ac) {
- ac->ac_b_ex.fe_group = block_group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = count;
- trace_ext4_mballoc_free(ac);
- }
+ trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
err = ext4_mb_load_buddy(sb, block_group, &e4b);
if (err)
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
ext4_mb_return_to_preallocation(inode, &e4b, block, count);
- if (test_opt(sb, DISCARD))
- ext4_issue_discard(sb, block_group, bit, count);
}
ret = ext4_free_blks_count(sb, gdp) + count;
dquot_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
return;
}
+
+/**
+ * ext4_trim_extent -- function to TRIM one single free extent in the group
+ * @sb: super block for the file system
+ * @start: starting block of the free extent in the alloc. group
+ * @count: number of blocks to TRIM
+ * @group: alloc. group we are working with
+ * @e4b: ext4 buddy for the group
+ *
+ * Trim "count" blocks starting at "start" in the "group". To assure that no
+ * one will allocate those blocks, mark it as used in buddy bitmap. This must
+ * be called with under the group lock.
+ */
+static int ext4_trim_extent(struct super_block *sb, int start, int count,
+ ext4_group_t group, struct ext4_buddy *e4b)
+{
+ struct ext4_free_extent ex;
+ int ret = 0;
+
+ assert_spin_locked(ext4_group_lock_ptr(sb, group));
+
+ ex.fe_start = start;
+ ex.fe_group = group;
+ ex.fe_len = count;
+
+ /*
+ * Mark blocks used, so no one can reuse them while
+ * being trimmed.
+ */
+ mb_mark_used(e4b, &ex);
+ ext4_unlock_group(sb, group);
+
+ ret = ext4_issue_discard(sb, group, start, count);
+ if (ret)
+ ext4_std_error(sb, ret);
+
+ ext4_lock_group(sb, group);
+ mb_free_blocks(NULL, e4b, start, ex.fe_len);
+ return ret;
+}
+
+/**
+ * ext4_trim_all_free -- function to trim all free space in alloc. group
+ * @sb: super block for file system
+ * @e4b: ext4 buddy
+ * @start: first group block to examine
+ * @max: last group block to examine
+ * @minblocks: minimum extent block count
+ *
+ * ext4_trim_all_free walks through group's buddy bitmap searching for free
+ * extents. When the free block is found, ext4_trim_extent is called to TRIM
+ * the extent.
+ *
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+ * bitmap. Then issue a TRIM command on this extent and free the extent in
+ * the group buddy bitmap. This is done until whole group is scanned.
+ */
+ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+ ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
+{
+ void *bitmap;
+ ext4_grpblk_t next, count = 0;
+ ext4_group_t group;
+ int ret = 0;
+
+ BUG_ON(e4b == NULL);
+
+ bitmap = e4b->bd_bitmap;
+ group = e4b->bd_group;
+ start = (e4b->bd_info->bb_first_free > start) ?
+ e4b->bd_info->bb_first_free : start;
+ ext4_lock_group(sb, group);
+
+ while (start < max) {
+ start = mb_find_next_zero_bit(bitmap, max, start);
+ if (start >= max)
+ break;
+ next = mb_find_next_bit(bitmap, max, start);
+
+ if ((next - start) >= minblocks) {
+ ret = ext4_trim_extent(sb, start,
+ next - start, group, e4b);
+ if (ret < 0)
+ break;
+ count += next - start;
+ }
+ start = next + 1;
+
+ if (fatal_signal_pending(current)) {
+ count = -ERESTARTSYS;
+ break;
+ }
+
+ if (need_resched()) {
+ ext4_unlock_group(sb, group);
+ cond_resched();
+ ext4_lock_group(sb, group);
+ }
+
+ if ((e4b->bd_info->bb_free - count) < minblocks)
+ break;
+ }
+ ext4_unlock_group(sb, group);
+
+ ext4_debug("trimmed %d blocks in the group %d\n",
+ count, group);
+
+ if (ret < 0)
+ count = ret;
+
+ return count;
+}
+
+/**
+ * ext4_trim_fs() -- trim ioctl handle function
+ * @sb: superblock for filesystem
+ * @range: fstrim_range structure
+ *
+ * start: First Byte to trim
+ * len: number of Bytes to trim from start
+ * minlen: minimum extent length in Bytes
+ * ext4_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext4_trim_all_free function
+ * is invoked to trim all free space.
+ */
+int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+{
+ struct ext4_buddy e4b;
+ ext4_group_t first_group, last_group;
+ ext4_group_t group, ngroups = ext4_get_groups_count(sb);
+ ext4_grpblk_t cnt = 0, first_block, last_block;
+ uint64_t start, len, minlen, trimmed;
+ int ret = 0;
+
+ start = range->start >> sb->s_blocksize_bits;
+ len = range->len >> sb->s_blocksize_bits;
+ minlen = range->minlen >> sb->s_blocksize_bits;
+ trimmed = 0;
+
+ if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
+ return -EINVAL;
+
+ /* Determine first and last group to examine based on start and len */
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
+ &first_group, &first_block);
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
+ &last_group, &last_block);
+ last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+ last_block = EXT4_BLOCKS_PER_GROUP(sb);
+
+ if (first_group > last_group)
+ return -EINVAL;
+
+ for (group = first_group; group <= last_group; group++) {
+ ret = ext4_mb_load_buddy(sb, group, &e4b);
+ if (ret) {
+ ext4_error(sb, "Error in loading buddy "
+ "information for %u", group);
+ break;
+ }
+
+ if (len >= EXT4_BLOCKS_PER_GROUP(sb))
+ len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
+ else
+ last_block = len;
+
+ if (e4b.bd_info->bb_free >= minlen) {
+ cnt = ext4_trim_all_free(sb, &e4b, first_block,
+ last_block, minlen);
+ if (cnt < 0) {
+ ret = cnt;
+ ext4_mb_unload_buddy(&e4b);
+ break;
+ }
+ }
+ ext4_mb_unload_buddy(&e4b);
+ trimmed += cnt;
+ first_block = 0;
+ }
+ range->len = trimmed * sb->s_blocksize;
+
+ return ret;
+}