]> bbs.cooldavid.org Git - net-next-2.6.git/commitdiff
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
authorLinus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Nov 2010 19:54:53 +0000 (11:54 -0800)
committerLinus Torvalds <torvalds@linux-foundation.org>
Mon, 8 Nov 2010 19:54:53 +0000 (11:54 -0800)
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4:
  ext4: Add new ext4 inode tracepoints
  ext4: Don't call sb_issue_discard() in ext4_free_blocks()
  ext4: do not try to grab the s_umount semaphore in ext4_quota_off
  ext4: fix potential race when freeing ext4_io_page structures
  ext4: handle writeback of inodes which are being freed
  ext4: initialize the percpu counters before replaying the journal
  ext4: "ret" may be used uninitialized in ext4_lazyinit_thread()
  ext4: fix lazyinit hang after removing request

fs/ext4/ext4.h
fs/ext4/inode.c
fs/ext4/mballoc.c
fs/ext4/page-io.c
fs/ext4/super.c
include/trace/events/ext4.h

index 8b5dd6369f82c19d5ba28dea07018e4fb13a025f..6a5edea2d70b3ac7c56e8b272686b5a799eabbcf 100644 (file)
@@ -177,7 +177,7 @@ struct mpage_da_data {
 
 struct ext4_io_page {
        struct page     *p_page;
-       int             p_count;
+       atomic_t        p_count;
 };
 
 #define MAX_IO_PAGES 128
@@ -858,6 +858,7 @@ struct ext4_inode_info {
        spinlock_t i_completed_io_lock;
        /* current io_end structure for async DIO write*/
        ext4_io_end_t *cur_aio_dio;
+       atomic_t i_ioend_count; /* Number of outstanding io_end structs */
 
        /*
         * Transactions that contain inode's metadata needed to complete
@@ -2060,6 +2061,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
 /* page-io.c */
 extern int __init ext4_init_pageio(void);
 extern void ext4_exit_pageio(void);
+extern void ext4_ioend_wait(struct inode *);
 extern void ext4_free_io_end(ext4_io_end_t *io);
 extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
 extern int ext4_end_io_nolock(ext4_io_end_t *io);
index 4d78342f3bf07ec61dbe8346eef88706aa5abebb..bdbe69902207c151df025d98690c4c016cf0b0e9 100644 (file)
@@ -53,6 +53,7 @@
 static inline int ext4_begin_ordered_truncate(struct inode *inode,
                                              loff_t new_size)
 {
+       trace_ext4_begin_ordered_truncate(inode, new_size);
        return jbd2_journal_begin_ordered_truncate(
                                        EXT4_SB(inode->i_sb)->s_journal,
                                        &EXT4_I(inode)->jinode,
@@ -178,6 +179,7 @@ void ext4_evict_inode(struct inode *inode)
        handle_t *handle;
        int err;
 
+       trace_ext4_evict_inode(inode);
        if (inode->i_nlink) {
                truncate_inode_pages(&inode->i_data, 0);
                goto no_delete;
@@ -5647,6 +5649,7 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
        int err, ret;
 
        might_sleep();
+       trace_ext4_mark_inode_dirty(inode, _RET_IP_);
        err = ext4_reserve_inode_write(handle, inode, &iloc);
        if (ext4_handle_valid(handle) &&
            EXT4_I(inode)->i_extra_isize < sbi->s_want_extra_isize &&
index c58eba34724a4281f1cb8bb405ab607ef09b49ff..5b4d4e3a4d58e3506c15e891e7b8df394bc0838f 100644 (file)
@@ -4640,8 +4640,6 @@ do_more:
                 * with group lock held. generate_buddy look at
                 * them with group lock_held
                 */
-               if (test_opt(sb, DISCARD))
-                       ext4_issue_discard(sb, block_group, bit, count);
                ext4_lock_group(sb, block_group);
                mb_clear_bits(bitmap_bh->b_data, bit, count);
                mb_free_blocks(inode, &e4b, bit, count);
index 46a7d6a9d9764b82075071c94f661cc65c3675fc..7f5451cd1d38bff2399471750618fc7429ce5c5e 100644 (file)
 
 static struct kmem_cache *io_page_cachep, *io_end_cachep;
 
+#define WQ_HASH_SZ             37
+#define to_ioend_wq(v) (&ioend_wq[((unsigned long)v) % WQ_HASH_SZ])
+static wait_queue_head_t ioend_wq[WQ_HASH_SZ];
+
 int __init ext4_init_pageio(void)
 {
+       int i;
+
        io_page_cachep = KMEM_CACHE(ext4_io_page, SLAB_RECLAIM_ACCOUNT);
        if (io_page_cachep == NULL)
                return -ENOMEM;
@@ -42,6 +48,8 @@ int __init ext4_init_pageio(void)
                kmem_cache_destroy(io_page_cachep);
                return -ENOMEM;
        }
+       for (i = 0; i < WQ_HASH_SZ; i++)
+               init_waitqueue_head(&ioend_wq[i]);
 
        return 0;
 }
@@ -52,24 +60,37 @@ void ext4_exit_pageio(void)
        kmem_cache_destroy(io_page_cachep);
 }
 
+void ext4_ioend_wait(struct inode *inode)
+{
+       wait_queue_head_t *wq = to_ioend_wq(inode);
+
+       wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
+}
+
+static void put_io_page(struct ext4_io_page *io_page)
+{
+       if (atomic_dec_and_test(&io_page->p_count)) {
+               end_page_writeback(io_page->p_page);
+               put_page(io_page->p_page);
+               kmem_cache_free(io_page_cachep, io_page);
+       }
+}
+
 void ext4_free_io_end(ext4_io_end_t *io)
 {
        int i;
+       wait_queue_head_t *wq;
 
        BUG_ON(!io);
        if (io->page)
                put_page(io->page);
-       for (i = 0; i < io->num_io_pages; i++) {
-               if (--io->pages[i]->p_count == 0) {
-                       struct page *page = io->pages[i]->p_page;
-
-                       end_page_writeback(page);
-                       put_page(page);
-                       kmem_cache_free(io_page_cachep, io->pages[i]);
-               }
-       }
+       for (i = 0; i < io->num_io_pages; i++)
+               put_io_page(io->pages[i]);
        io->num_io_pages = 0;
-       iput(io->inode);
+       wq = to_ioend_wq(io->inode);
+       if (atomic_dec_and_test(&EXT4_I(io->inode)->i_ioend_count) &&
+           waitqueue_active(wq))
+               wake_up_all(wq);
        kmem_cache_free(io_end_cachep, io);
 }
 
@@ -142,8 +163,8 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
        io = kmem_cache_alloc(io_end_cachep, flags);
        if (io) {
                memset(io, 0, sizeof(*io));
-               io->inode = igrab(inode);
-               BUG_ON(!io->inode);
+               atomic_inc(&EXT4_I(inode)->i_ioend_count);
+               io->inode = inode;
                INIT_WORK(&io->work, ext4_end_io_work);
                INIT_LIST_HEAD(&io->list);
        }
@@ -171,35 +192,15 @@ static void ext4_end_bio(struct bio *bio, int error)
        struct workqueue_struct *wq;
        struct inode *inode;
        unsigned long flags;
-       ext4_fsblk_t err_block;
        int i;
 
        BUG_ON(!io_end);
-       inode = io_end->inode;
        bio->bi_private = NULL;
        bio->bi_end_io = NULL;
        if (test_bit(BIO_UPTODATE, &bio->bi_flags))
                error = 0;
-       err_block = bio->bi_sector >> (inode->i_blkbits - 9);
        bio_put(bio);
 
-       if (!(inode->i_sb->s_flags & MS_ACTIVE)) {
-               pr_err("sb umounted, discard end_io request for inode %lu\n",
-                       io_end->inode->i_ino);
-               ext4_free_io_end(io_end);
-               return;
-       }
-
-       if (error) {
-               io_end->flag |= EXT4_IO_END_ERROR;
-               ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
-                            "(offset %llu size %ld starting block %llu)",
-                            inode->i_ino,
-                            (unsigned long long) io_end->offset,
-                            (long) io_end->size,
-                            (unsigned long long) err_block);
-       }
-
        for (i = 0; i < io_end->num_io_pages; i++) {
                struct page *page = io_end->pages[i]->p_page;
                struct buffer_head *bh, *head;
@@ -236,13 +237,7 @@ static void ext4_end_bio(struct bio *bio, int error)
                        } while (bh != head);
                }
 
-               if (--io_end->pages[i]->p_count == 0) {
-                       struct page *page = io_end->pages[i]->p_page;
-
-                       end_page_writeback(page);
-                       put_page(page);
-                       kmem_cache_free(io_page_cachep, io_end->pages[i]);
-               }
+               put_io_page(io_end->pages[i]);
 
                /*
                 * If this is a partial write which happened to make
@@ -254,8 +249,19 @@ static void ext4_end_bio(struct bio *bio, int error)
                if (!partial_write)
                        SetPageUptodate(page);
        }
-
        io_end->num_io_pages = 0;
+       inode = io_end->inode;
+
+       if (error) {
+               io_end->flag |= EXT4_IO_END_ERROR;
+               ext4_warning(inode->i_sb, "I/O error writing to inode %lu "
+                            "(offset %llu size %ld starting block %llu)",
+                            inode->i_ino,
+                            (unsigned long long) io_end->offset,
+                            (long) io_end->size,
+                            (unsigned long long)
+                            bio->bi_sector >> (inode->i_blkbits - 9));
+       }
 
        /* Add the io_end to per-inode completed io list*/
        spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
@@ -305,7 +311,6 @@ static int io_submit_init(struct ext4_io_submit *io,
        bio->bi_private = io->io_end = io_end;
        bio->bi_end_io = ext4_end_bio;
 
-       io_end->inode = inode;
        io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh);
 
        io->io_bio = bio;
@@ -360,7 +365,7 @@ submit_and_retry:
        if ((io_end->num_io_pages == 0) ||
            (io_end->pages[io_end->num_io_pages-1] != io_page)) {
                io_end->pages[io_end->num_io_pages++] = io_page;
-               io_page->p_count++;
+               atomic_inc(&io_page->p_count);
        }
        return 0;
 }
@@ -389,7 +394,7 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
                return -ENOMEM;
        }
        io_page->p_page = page;
-       io_page->p_count = 0;
+       atomic_set(&io_page->p_count, 1);
        get_page(page);
 
        for (bh = head = page_buffers(page), block_start = 0;
@@ -421,10 +426,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
         * PageWriteback bit from the page to prevent the system from
         * wedging later on.
         */
-       if (io_page->p_count == 0) {
-               put_page(page);
-               end_page_writeback(page);
-               kmem_cache_free(io_page_cachep, io_page);
-       }
+       put_io_page(io_page);
        return ret;
 }
index 40131b777af66c3112bb1df16265faadac435d0f..61182fe6254e94ad606a6c67fed1b30ef289ad38 100644 (file)
@@ -828,12 +828,22 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
        ei->cur_aio_dio = NULL;
        ei->i_sync_tid = 0;
        ei->i_datasync_tid = 0;
+       atomic_set(&ei->i_ioend_count, 0);
 
        return &ei->vfs_inode;
 }
 
+static int ext4_drop_inode(struct inode *inode)
+{
+       int drop = generic_drop_inode(inode);
+
+       trace_ext4_drop_inode(inode, drop);
+       return drop;
+}
+
 static void ext4_destroy_inode(struct inode *inode)
 {
+       ext4_ioend_wait(inode);
        if (!list_empty(&(EXT4_I(inode)->i_orphan))) {
                ext4_msg(inode->i_sb, KERN_ERR,
                         "Inode %lu (%p): orphan list check failed!",
@@ -1173,6 +1183,7 @@ static const struct super_operations ext4_sops = {
        .destroy_inode  = ext4_destroy_inode,
        .write_inode    = ext4_write_inode,
        .dirty_inode    = ext4_dirty_inode,
+       .drop_inode     = ext4_drop_inode,
        .evict_inode    = ext4_evict_inode,
        .put_super      = ext4_put_super,
        .sync_fs        = ext4_sync_fs,
@@ -1194,6 +1205,7 @@ static const struct super_operations ext4_nojournal_sops = {
        .destroy_inode  = ext4_destroy_inode,
        .write_inode    = ext4_write_inode,
        .dirty_inode    = ext4_dirty_inode,
+       .drop_inode     = ext4_drop_inode,
        .evict_inode    = ext4_evict_inode,
        .write_super    = ext4_write_super,
        .put_super      = ext4_put_super,
@@ -2699,7 +2711,6 @@ static int ext4_lazyinit_thread(void *arg)
        struct ext4_li_request *elr;
        unsigned long next_wakeup;
        DEFINE_WAIT(wait);
-       int ret;
 
        BUG_ON(NULL == eli);
 
@@ -2723,13 +2734,12 @@ cont_thread:
                        elr = list_entry(pos, struct ext4_li_request,
                                         lr_request);
 
-                       if (time_after_eq(jiffies, elr->lr_next_sched))
-                               ret = ext4_run_li_request(elr);
-
-                       if (ret) {
-                               ret = 0;
-                               ext4_remove_li_request(elr);
-                               continue;
+                       if (time_after_eq(jiffies, elr->lr_next_sched)) {
+                               if (ext4_run_li_request(elr) != 0) {
+                                       /* error, remove the lazy_init job */
+                                       ext4_remove_li_request(elr);
+                                       continue;
+                               }
                        }
 
                        if (time_before(elr->lr_next_sched, next_wakeup))
@@ -2740,7 +2750,8 @@ cont_thread:
                if (freezing(current))
                        refrigerator();
 
-               if (time_after_eq(jiffies, next_wakeup)) {
+               if ((time_after_eq(jiffies, next_wakeup)) ||
+                   (MAX_JIFFY_OFFSET == next_wakeup)) {
                        cond_resched();
                        continue;
                }
@@ -3348,6 +3359,24 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        get_random_bytes(&sbi->s_next_generation, sizeof(u32));
        spin_lock_init(&sbi->s_next_gen_lock);
 
+       err = percpu_counter_init(&sbi->s_freeblocks_counter,
+                       ext4_count_free_blocks(sb));
+       if (!err) {
+               err = percpu_counter_init(&sbi->s_freeinodes_counter,
+                               ext4_count_free_inodes(sb));
+       }
+       if (!err) {
+               err = percpu_counter_init(&sbi->s_dirs_counter,
+                               ext4_count_dirs(sb));
+       }
+       if (!err) {
+               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
+       }
+       if (err) {
+               ext4_msg(sb, KERN_ERR, "insufficient memory");
+               goto failed_mount3;
+       }
+
        sbi->s_stripe = ext4_get_stripe_size(sbi);
        sbi->s_max_writeback_mb_bump = 128;
 
@@ -3446,22 +3475,19 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
        }
        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 
-no_journal:
-       err = percpu_counter_init(&sbi->s_freeblocks_counter,
-                                 ext4_count_free_blocks(sb));
-       if (!err)
-               err = percpu_counter_init(&sbi->s_freeinodes_counter,
-                                         ext4_count_free_inodes(sb));
-       if (!err)
-               err = percpu_counter_init(&sbi->s_dirs_counter,
-                                         ext4_count_dirs(sb));
-       if (!err)
-               err = percpu_counter_init(&sbi->s_dirtyblocks_counter, 0);
-       if (err) {
-               ext4_msg(sb, KERN_ERR, "insufficient memory");
-               goto failed_mount_wq;
-       }
+       /*
+        * The journal may have updated the bg summary counts, so we
+        * need to update the global counters.
+        */
+       percpu_counter_set(&sbi->s_freeblocks_counter,
+                          ext4_count_free_blocks(sb));
+       percpu_counter_set(&sbi->s_freeinodes_counter,
+                          ext4_count_free_inodes(sb));
+       percpu_counter_set(&sbi->s_dirs_counter,
+                          ext4_count_dirs(sb));
+       percpu_counter_set(&sbi->s_dirtyblocks_counter, 0);
 
+no_journal:
        EXT4_SB(sb)->dio_unwritten_wq = create_workqueue("ext4-dio-unwritten");
        if (!EXT4_SB(sb)->dio_unwritten_wq) {
                printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n");
@@ -3611,10 +3637,6 @@ failed_mount_wq:
                jbd2_journal_destroy(sbi->s_journal);
                sbi->s_journal = NULL;
        }
-       percpu_counter_destroy(&sbi->s_freeblocks_counter);
-       percpu_counter_destroy(&sbi->s_freeinodes_counter);
-       percpu_counter_destroy(&sbi->s_dirs_counter);
-       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount3:
        if (sbi->s_flex_groups) {
                if (is_vmalloc_addr(sbi->s_flex_groups))
@@ -3622,6 +3644,10 @@ failed_mount3:
                else
                        kfree(sbi->s_flex_groups);
        }
+       percpu_counter_destroy(&sbi->s_freeblocks_counter);
+       percpu_counter_destroy(&sbi->s_freeinodes_counter);
+       percpu_counter_destroy(&sbi->s_dirs_counter);
+       percpu_counter_destroy(&sbi->s_dirtyblocks_counter);
 failed_mount2:
        for (i = 0; i < db_count; i++)
                brelse(sbi->s_group_desc[i]);
@@ -3949,13 +3975,11 @@ static int ext4_commit_super(struct super_block *sb, int sync)
        else
                es->s_kbytes_written =
                        cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
-       if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
-               ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
-                                       &EXT4_SB(sb)->s_freeblocks_counter));
-       if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
-               es->s_free_inodes_count =
-                       cpu_to_le32(percpu_counter_sum_positive(
-                                       &EXT4_SB(sb)->s_freeinodes_counter));
+       ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+                                          &EXT4_SB(sb)->s_freeblocks_counter));
+       es->s_free_inodes_count =
+               cpu_to_le32(percpu_counter_sum_positive(
+                               &EXT4_SB(sb)->s_freeinodes_counter));
        sb->s_dirt = 0;
        BUFFER_TRACE(sbh, "marking dirty");
        mark_buffer_dirty(sbh);
@@ -4556,12 +4580,10 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id,
 
 static int ext4_quota_off(struct super_block *sb, int type)
 {
-       /* Force all delayed allocation blocks to be allocated */
-       if (test_opt(sb, DELALLOC)) {
-               down_read(&sb->s_umount);
+       /* Force all delayed allocation blocks to be allocated.
+        * Caller already holds s_umount sem */
+       if (test_opt(sb, DELALLOC))
                sync_filesystem(sb);
-               up_read(&sb->s_umount);
-       }
 
        return dquot_quota_off(sb, type);
 }
index 289010d3270bfa99191cfbdfa95a02564b6638bf..e5e345fb2a5c37db45de06bcdf02f3796aec64c4 100644 (file)
@@ -98,6 +98,103 @@ TRACE_EVENT(ext4_allocate_inode,
                  (unsigned long) __entry->dir, __entry->mode)
 );
 
+TRACE_EVENT(ext4_evict_inode,
+       TP_PROTO(struct inode *inode),
+
+       TP_ARGS(inode),
+
+       TP_STRUCT__entry(
+               __field(        int,   dev_major                )
+               __field(        int,   dev_minor                )
+               __field(        ino_t,  ino                     )
+               __field(        int,    nlink                   )
+       ),
+
+       TP_fast_assign(
+               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->ino    = inode->i_ino;
+               __entry->nlink  = inode->i_nlink;
+       ),
+
+       TP_printk("dev %d,%d ino %lu nlink %d",
+                 __entry->dev_major, __entry->dev_minor,
+                 (unsigned long) __entry->ino, __entry->nlink)
+);
+
+TRACE_EVENT(ext4_drop_inode,
+       TP_PROTO(struct inode *inode, int drop),
+
+       TP_ARGS(inode, drop),
+
+       TP_STRUCT__entry(
+               __field(        int,    dev_major               )
+               __field(        int,    dev_minor               )
+               __field(        ino_t,  ino                     )
+               __field(        int,    drop                    )
+       ),
+
+       TP_fast_assign(
+               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->ino    = inode->i_ino;
+               __entry->drop   = drop;
+       ),
+
+       TP_printk("dev %d,%d ino %lu drop %d",
+                 __entry->dev_major, __entry->dev_minor,
+                 (unsigned long) __entry->ino, __entry->drop)
+);
+
+TRACE_EVENT(ext4_mark_inode_dirty,
+       TP_PROTO(struct inode *inode, unsigned long IP),
+
+       TP_ARGS(inode, IP),
+
+       TP_STRUCT__entry(
+               __field(        int,    dev_major               )
+               __field(        int,    dev_minor               )
+               __field(        ino_t,  ino                     )
+               __field(unsigned long,  ip                      )
+       ),
+
+       TP_fast_assign(
+               __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+               __entry->dev_minor = MINOR(inode->i_sb->s_dev);
+               __entry->ino    = inode->i_ino;
+               __entry->ip     = IP;
+       ),
+
+       TP_printk("dev %d,%d ino %lu caller %pF",
+                 __entry->dev_major, __entry->dev_minor,
+                 (unsigned long) __entry->ino, (void *)__entry->ip)
+);
+
+TRACE_EVENT(ext4_begin_ordered_truncate,
+       TP_PROTO(struct inode *inode, loff_t new_size),
+
+       TP_ARGS(inode, new_size),
+
+       TP_STRUCT__entry(
+               __field(        int,    dev_major               )
+               __field(        int,    dev_minor               )
+               __field(        ino_t,  ino                     )
+               __field(        loff_t, new_size                )
+       ),
+
+       TP_fast_assign(
+               __entry->dev_major      = MAJOR(inode->i_sb->s_dev);
+               __entry->dev_minor      = MINOR(inode->i_sb->s_dev);
+               __entry->ino            = inode->i_ino;
+               __entry->new_size       = new_size;
+       ),
+
+       TP_printk("dev %d,%d ino %lu new_size %lld",
+                 __entry->dev_major, __entry->dev_minor,
+                 (unsigned long) __entry->ino,
+                 (long long) __entry->new_size)
+);
+
 DECLARE_EVENT_CLASS(ext4__write_begin,
 
        TP_PROTO(struct inode *inode, loff_t pos, unsigned int len,