]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
ext4: Use EXT4_GROUP_INFO_NEED_INIT_BIT during resize
authorAneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Tue, 6 Jan 2009 02:36:19 +0000 (21:36 -0500)
committerTheodore Ts'o <tytso@mit.edu>
Tue, 6 Jan 2009 02:36:19 +0000 (21:36 -0500)
The new groups added during resize are flagged as
need_init group. Make sure we properly initialize these
groups. When we have block size < page size and we are adding
new groups the page may still be marked uptodate even though
we haven't initialized the group. While forcing the init
of buddy cache we need to make sure other groups part of the
same page of buddy cache is not using the cache.
group_info->alloc_sem is added to ensure the same.

Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
cc: stable@kernel.org

fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/mballoc.c
fs/ext4/mballoc.h
fs/ext4/resize.c

index c54192e2384ef4d8d217acee4c2f2c55465d5a42..404d81cc9157c7429a6ce46f242d7b2988331af7 100644 (file)
@@ -381,6 +381,7 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
        ext4_debug("Adding block(s) %llu-%llu\n", block, block + count - 1);
 
        ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
+       grp = ext4_get_group_info(sb, block_group);
        /*
         * Check to see if we are freeing blocks across a group
         * boundary.
@@ -425,7 +426,11 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
        err = ext4_journal_get_write_access(handle, gd_bh);
        if (err)
                goto error_return;
-
+       /*
+        * make sure we don't allow a parallel init on other groups in the
+        * same buddy cache
+        */
+       down_write(&grp->alloc_sem);
        for (i = 0, blocks_freed = 0; i < count; i++) {
                BUFFER_TRACE(bitmap_bh, "clear bit");
                if (!ext4_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
@@ -450,6 +455,13 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
                sbi->s_flex_groups[flex_group].free_blocks += blocks_freed;
                spin_unlock(sb_bgl_lock(sbi, flex_group));
        }
+       /*
+        * request to reload the buddy with the
+        * new bitmap information
+        */
+       set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
+       ext4_mb_update_group_info(grp, blocks_freed);
+       up_write(&grp->alloc_sem);
 
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -461,13 +473,6 @@ void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
        if (!err)
                err = ret;
        sb->s_dirt = 1;
-       /*
-        * request to reload the buddy with the
-        * new bitmap information
-        */
-       grp = ext4_get_group_info(sb, block_group);
-       set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT, &(grp->bb_state));
-       ext4_mb_update_group_info(grp, blocks_freed);
 
 error_return:
        brelse(bitmap_bh);
index 8021bf558d1e1e7e05da7bfe5ab5fe516cc65094..8152b5603f0a7b1e2d20a5ec0ca1d61b418b0e33 100644 (file)
@@ -1060,12 +1060,13 @@ extern int __init init_ext4_mballoc(void);
 extern void exit_ext4_mballoc(void);
 extern void ext4_mb_free_blocks(handle_t *, struct inode *,
                unsigned long, unsigned long, int, unsigned long *);
-extern int ext4_mb_add_more_groupinfo(struct super_block *sb,
+extern int ext4_mb_add_groupinfo(struct super_block *sb,
                ext4_group_t i, struct ext4_group_desc *desc);
 extern void ext4_mb_update_group_info(struct ext4_group_info *grp,
                ext4_grpblk_t add);
-
-
+extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
+extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
+                                               ext4_group_t, int);
 /* inode.c */
 int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
                struct buffer_head *bh, ext4_fsblk_t blocknr);
index edf9730ba72e77af3a2e78978fc7cab1e8a68ba9..d2b1bcaf88ecb994ef8be9c1e91b1ca204fbbd94 100644 (file)
@@ -886,18 +886,20 @@ static noinline_for_stack int
 ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
                                        struct ext4_buddy *e4b)
 {
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct inode *inode = sbi->s_buddy_cache;
        int blocks_per_page;
        int block;
        int pnum;
        int poff;
        struct page *page;
        int ret;
+       struct ext4_group_info *grp;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct inode *inode = sbi->s_buddy_cache;
 
        mb_debug("load group %u\n", group);
 
        blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       grp = ext4_get_group_info(sb, group);
 
        e4b->bd_blkbits = sb->s_blocksize_bits;
        e4b->bd_info = ext4_get_group_info(sb, group);
@@ -905,6 +907,15 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
        e4b->bd_group = group;
        e4b->bd_buddy_page = NULL;
        e4b->bd_bitmap_page = NULL;
+       e4b->alloc_semp = &grp->alloc_sem;
+
+       /* Take the read lock on the group alloc
+        * sem. This would make sure a parallel
+        * ext4_mb_init_group happening on other
+        * groups mapped by the page is blocked
+        * till we are done with allocation
+        */
+       down_read(e4b->alloc_semp);
 
        /*
         * the buddy cache inode stores the block bitmap
@@ -920,6 +931,14 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
        page = find_get_page(inode->i_mapping, pnum);
        if (page == NULL || !PageUptodate(page)) {
                if (page)
+                       /*
+                        * drop the page reference and try
+                        * to get the page with lock. If we
+                        * are not uptodate that implies
+                        * somebody just created the page but
+                        * is yet to initialize the same. So
+                        * wait for it to initialize.
+                        */
                        page_cache_release(page);
                page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
                if (page) {
@@ -985,6 +1004,9 @@ err:
                page_cache_release(e4b->bd_buddy_page);
        e4b->bd_buddy = NULL;
        e4b->bd_bitmap = NULL;
+
+       /* Done with the buddy cache */
+       up_read(e4b->alloc_semp);
        return ret;
 }
 
@@ -994,6 +1016,8 @@ static void ext4_mb_release_desc(struct ext4_buddy *e4b)
                page_cache_release(e4b->bd_bitmap_page);
        if (e4b->bd_buddy_page)
                page_cache_release(e4b->bd_buddy_page);
+       /* Done with the buddy cache */
+       up_read(e4b->alloc_semp);
 }
 
 
@@ -1696,6 +1720,173 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
        return 0;
 }
 
+/*
+ * lock the group_info alloc_sem of all the groups
+ * belonging to the same buddy cache page. This
+ * make sure other parallel operation on the buddy
+ * cache doesn't happen  whild holding the buddy cache
+ * lock
+ */
+int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
+{
+       int i;
+       int block, pnum;
+       int blocks_per_page;
+       int groups_per_page;
+       ext4_group_t first_group;
+       struct ext4_group_info *grp;
+
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       first_group = pnum * blocks_per_page / 2;
+
+       groups_per_page = blocks_per_page >> 1;
+       if (groups_per_page == 0)
+               groups_per_page = 1;
+       /* read all groups the page covers into the cache */
+       for (i = 0; i < groups_per_page; i++) {
+
+               if ((first_group + i) >= EXT4_SB(sb)->s_groups_count)
+                       break;
+               grp = ext4_get_group_info(sb, first_group + i);
+               /* take all groups write allocation
+                * semaphore. This make sure there is
+                * no block allocation going on in any
+                * of that groups
+                */
+               down_write(&grp->alloc_sem);
+       }
+       return i;
+}
+
+void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
+                                       ext4_group_t group, int locked_group)
+{
+       int i;
+       int block, pnum;
+       int blocks_per_page;
+       ext4_group_t first_group;
+       struct ext4_group_info *grp;
+
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       first_group = pnum * blocks_per_page / 2;
+       /* release locks on all the groups */
+       for (i = 0; i < locked_group; i++) {
+
+               grp = ext4_get_group_info(sb, first_group + i);
+               /* take all groups write allocation
+                * semaphore. This make sure there is
+                * no block allocation going on in any
+                * of that groups
+                */
+               up_write(&grp->alloc_sem);
+       }
+
+}
+
+static int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+{
+
+       int ret;
+       void *bitmap;
+       int blocks_per_page;
+       int block, pnum, poff;
+       int num_grp_locked = 0;
+       struct ext4_group_info *this_grp;
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+       struct inode *inode = sbi->s_buddy_cache;
+       struct page *page = NULL, *bitmap_page = NULL;
+
+       mb_debug("init group %lu\n", group);
+       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+       this_grp = ext4_get_group_info(sb, group);
+       /*
+        * This ensures we don't add group
+        * to this buddy cache via resize
+        */
+       num_grp_locked =  ext4_mb_get_buddy_cache_lock(sb, group);
+       if (!EXT4_MB_GRP_NEED_INIT(this_grp)) {
+               /*
+                * somebody initialized the group
+                * return without doing anything
+                */
+               ret = 0;
+               goto err;
+       }
+       /*
+        * the buddy cache inode stores the block bitmap
+        * and buddy information in consecutive blocks.
+        * So for each group we need two blocks.
+        */
+       block = group * 2;
+       pnum = block / blocks_per_page;
+       poff = block % blocks_per_page;
+       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       if (page) {
+               BUG_ON(page->mapping != inode->i_mapping);
+               ret = ext4_mb_init_cache(page, NULL);
+               if (ret) {
+                       unlock_page(page);
+                       goto err;
+               }
+               unlock_page(page);
+       }
+       if (page == NULL || !PageUptodate(page)) {
+               ret = -EIO;
+               goto err;
+       }
+       mark_page_accessed(page);
+       bitmap_page = page;
+       bitmap = page_address(page) + (poff * sb->s_blocksize);
+
+       /* init buddy cache */
+       block++;
+       pnum = block / blocks_per_page;
+       poff = block % blocks_per_page;
+       page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+       if (page == bitmap_page) {
+               /*
+                * If both the bitmap and buddy are in
+                * the same page we don't need to force
+                * init the buddy
+                */
+               unlock_page(page);
+       } else if (page) {
+               BUG_ON(page->mapping != inode->i_mapping);
+               ret = ext4_mb_init_cache(page, bitmap);
+               if (ret) {
+                       unlock_page(page);
+                       goto err;
+               }
+               unlock_page(page);
+       }
+       if (page == NULL || !PageUptodate(page)) {
+               ret = -EIO;
+               goto err;
+       }
+       mark_page_accessed(page);
+err:
+       ext4_mb_put_buddy_cache_lock(sb, group, num_grp_locked);
+       if (bitmap_page)
+               page_cache_release(bitmap_page);
+       if (page)
+               page_cache_release(page);
+       return ret;
+}
+
 static noinline_for_stack int
 ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
 {
@@ -1779,7 +1970,7 @@ repeat:
                                group = 0;
 
                        /* quick check to skip empty groups */
-                       grp = ext4_get_group_info(ac->ac_sb, group);
+                       grp = ext4_get_group_info(sb, group);
                        if (grp->bb_free == 0)
                                continue;
 
@@ -1792,10 +1983,9 @@ repeat:
                                 * we need full data about the group
                                 * to make a good selection
                                 */
-                               err = ext4_mb_load_buddy(sb, group, &e4b);
+                               err = ext4_mb_init_group(sb, group);
                                if (err)
                                        goto out;
-                               ext4_mb_release_desc(&e4b);
                        }
 
                        /*
@@ -2246,7 +2436,7 @@ ext4_mb_store_history(struct ext4_allocation_context *ac)
 
 
 /* Create and initialize ext4_group_info data for the given group. */
-static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
+int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
                          struct ext4_group_desc *desc)
 {
        int i, len;
@@ -2304,6 +2494,7 @@ static int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
        }
 
        INIT_LIST_HEAD(&meta_group_info[i]->bb_prealloc_list);
+       init_rwsem(&meta_group_info[i]->alloc_sem);
        meta_group_info[i]->bb_free_root.rb_node = NULL;;
 
 #ifdef DOUBLE_CHECK
@@ -2330,54 +2521,6 @@ exit_meta_group_info:
        return -ENOMEM;
 } /* ext4_mb_add_groupinfo */
 
-/*
- * Add a group to the existing groups.
- * This function is used for online resize
- */
-int ext4_mb_add_more_groupinfo(struct super_block *sb, ext4_group_t group,
-                              struct ext4_group_desc *desc)
-{
-       struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct inode *inode = sbi->s_buddy_cache;
-       int blocks_per_page;
-       int block;
-       int pnum;
-       struct page *page;
-       int err;
-
-       /* Add group based on group descriptor*/
-       err = ext4_mb_add_groupinfo(sb, group, desc);
-       if (err)
-               return err;
-
-       /*
-        * Cache pages containing dynamic mb_alloc datas (buddy and bitmap
-        * datas) are set not up to date so that they will be re-initilaized
-        * during the next call to ext4_mb_load_buddy
-        */
-
-       /* Set buddy page as not up to date */
-       blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-       block = group * 2;
-       pnum = block / blocks_per_page;
-       page = find_get_page(inode->i_mapping, pnum);
-       if (page != NULL) {
-               ClearPageUptodate(page);
-               page_cache_release(page);
-       }
-
-       /* Set bitmap page as not up to date */
-       block++;
-       pnum = block / blocks_per_page;
-       page = find_get_page(inode->i_mapping, pnum);
-       if (page != NULL) {
-               ClearPageUptodate(page);
-               page_cache_release(page);
-       }
-
-       return 0;
-}
-
 /*
  * Update an existing group.
  * This function is used for online resize
@@ -4588,11 +4731,6 @@ do_more:
        err = ext4_journal_get_write_access(handle, gd_bh);
        if (err)
                goto error_return;
-
-       err = ext4_mb_load_buddy(sb, block_group, &e4b);
-       if (err)
-               goto error_return;
-
 #ifdef AGGRESSIVE_CHECK
        {
                int i;
@@ -4606,6 +4744,8 @@ do_more:
        /* We dirtied the bitmap block */
        BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
        err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
+       if (err)
+               goto error_return;
 
        if (ac) {
                ac->ac_b_ex.fe_group = block_group;
@@ -4614,6 +4754,9 @@ do_more:
                ext4_mb_store_history(ac);
        }
 
+       err = ext4_mb_load_buddy(sb, block_group, &e4b);
+       if (err)
+               goto error_return;
        if (metadata && ext4_handle_valid(handle)) {
                /* blocks being freed are metadata. these blocks shouldn't
                 * be used until this transaction is committed */
index b5dff1fff1e5b33af1c583f026b883fae74eca82..a931b6b4f6ada89d9b095bd5a022ed9839669b9b 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/version.h>
 #include <linux/blkdev.h>
 #include <linux/marker.h>
+#include <linux/mutex.h>
 #include "ext4_jbd2.h"
 #include "ext4.h"
 #include "group.h"
@@ -130,6 +131,7 @@ struct ext4_group_info {
 #ifdef DOUBLE_CHECK
        void            *bb_bitmap;
 #endif
+       struct rw_semaphore alloc_sem;
        unsigned short  bb_counters[];
 };
 
@@ -250,6 +252,7 @@ struct ext4_buddy {
        struct super_block *bd_sb;
        __u16 bd_blkbits;
        ext4_group_t bd_group;
+       struct rw_semaphore *alloc_semp;
 };
 #define EXT4_MB_BITMAP(e4b)    ((e4b)->bd_bitmap)
 #define EXT4_MB_BUDDY(e4b)     ((e4b)->bd_buddy)
index 526db73701b4b5c08d17b7f7241bda95673a41b9..92034d2c8a73e1437781c4e06aebafba7a6fb6de 100644 (file)
@@ -748,6 +748,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
        struct inode *inode = NULL;
        handle_t *handle;
        int gdb_off, gdb_num;
+       int num_grp_locked = 0;
        int err, err2;
 
        gdb_num = input->group / EXT4_DESC_PER_BLOCK(sb);
@@ -788,6 +789,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
                }
        }
 
+
        if ((err = verify_group_input(sb, input)))
                goto exit_put;
 
@@ -856,6 +858,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
          * using the new disk blocks.
          */
 
+       num_grp_locked = ext4_mb_get_buddy_cache_lock(sb, input->group);
        /* Update group descriptor block for new group */
        gdp = (struct ext4_group_desc *)((char *)primary->b_data +
                                         gdb_off * EXT4_DESC_SIZE(sb));
@@ -872,9 +875,11 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
         * We can allocate memory for mb_alloc based on the new group
         * descriptor
         */
-       err = ext4_mb_add_more_groupinfo(sb, input->group, gdp);
-       if (err)
+       err = ext4_mb_add_groupinfo(sb, input->group, gdp);
+       if (err) {
+               ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
                goto exit_journal;
+       }
 
        /*
         * Make the new blocks and inodes valid next.  We do this before
@@ -916,6 +921,7 @@ int ext4_group_add(struct super_block *sb, struct ext4_new_group_data *input)
 
        /* Update the global fs size fields */
        sbi->s_groups_count++;
+       ext4_mb_put_buddy_cache_lock(sb, input->group, num_grp_locked);
 
        ext4_handle_dirty_metadata(handle, NULL, primary);
 
@@ -1082,45 +1088,6 @@ int ext4_group_extend(struct super_block *sb, struct ext4_super_block *es,
        if ((err = ext4_journal_stop(handle)))
                goto exit_put;
 
-       /*
-        * Mark mballoc pages as not up to date so that they will be updated
-        * next time they are loaded by ext4_mb_load_buddy.
-        *
-        * XXX Bad, Bad, BAD!!!  We should not be overloading the
-        * Uptodate flag, particularly on thte bitmap bh, as way of
-        * hinting to ext4_mb_load_buddy() that it needs to be
-        * overloaded.  A user could take a LVM snapshot, then do an
-        * on-line fsck, and clear the uptodate flag, and this would
-        * not be a bug in userspace, but a bug in the kernel.  FIXME!!!
-        */
-       {
-               struct ext4_sb_info *sbi = EXT4_SB(sb);
-               struct inode *inode = sbi->s_buddy_cache;
-               int blocks_per_page;
-               int block;
-               int pnum;
-               struct page *page;
-
-               /* Set buddy page as not up to date */
-               blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
-               block = group * 2;
-               pnum = block / blocks_per_page;
-               page = find_get_page(inode->i_mapping, pnum);
-               if (page != NULL) {
-                       ClearPageUptodate(page);
-                       page_cache_release(page);
-               }
-
-               /* Set bitmap page as not up to date */
-               block++;
-               pnum = block / blocks_per_page;
-               page = find_get_page(inode->i_mapping, pnum);
-               if (page != NULL) {
-                       ClearPageUptodate(page);
-                       page_cache_release(page);
-               }
-       }
-
        if (test_opt(sb, DEBUG))
                printk(KERN_DEBUG "EXT4-fs: extended group to %llu blocks\n",
                       ext4_blocks_count(es));