}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
+static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
+static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
+static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
+static int __ext4_journalled_writepage(struct page *page, unsigned int len);
+static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
/*
* Test whether an inode is a fast symlink.
* parent to disk.
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+ if (unlikely(!bh)) {
+ err = -EIO;
+ goto failed;
+ }
+
branch[n].bh = bh;
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
break;
idx++;
num++;
- if (num >= max_pages)
+ if (num >= max_pages) {
+ done = 1;
break;
+ }
}
pagevec_release(&pvec);
}
*
* As pages are already locked by write_cache_pages(), we can't use it
*/
-static int mpage_da_submit_io(struct mpage_da_data *mpd)
+static int mpage_da_submit_io(struct mpage_da_data *mpd,
+ struct ext4_map_blocks *map)
{
- long pages_skipped;
struct pagevec pvec;
unsigned long index, end;
int ret = 0, err, nr_pages, i;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
+ loff_t size = i_size_read(inode);
+ unsigned int len, block_start;
+ struct buffer_head *bh, *page_bufs = NULL;
+ int journal_data = ext4_should_journal_data(inode);
+ sector_t pblock = 0, cur_logical = 0;
+ struct ext4_io_submit io_submit;
BUG_ON(mpd->next_page <= mpd->first_page);
+ memset(&io_submit, 0, sizeof(io_submit));
/*
* We need to start from the first_page to the next_page - 1
* to make sure we also write the mapped dirty buffer_heads.
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
+ int commit_write = 0, redirty_page = 0;
struct page *page = pvec.pages[i];
index = page->index;
if (index > end)
break;
+
+ if (index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
+ if (map) {
+ cur_logical = index << (PAGE_CACHE_SHIFT -
+ inode->i_blkbits);
+ pblock = map->m_pblk + (cur_logical -
+ map->m_lblk);
+ }
index++;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
- pages_skipped = mpd->wbc->pages_skipped;
- err = mapping->a_ops->writepage(page, mpd->wbc);
- if (!err && (pages_skipped == mpd->wbc->pages_skipped))
- /*
- * have successfully written the page
- * without skipping the same
- */
- mpd->pages_written++;
/*
- * In error case, we have to continue because
- * remaining pages are still locked
- * XXX: unlock and re-dirty them?
+ * If the page does not have buffers (for
+ * whatever reason), try to create them using
+ * block_prepare_write. If this fails,
+ * redirty the page and move on.
*/
- if (ret == 0)
- ret = err;
- }
- pagevec_release(&pvec);
- }
- return ret;
-}
-
-/*
- * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
- *
- * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
- */
-static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
- struct ext4_map_blocks *map)
-{
- struct inode *inode = mpd->inode;
- struct address_space *mapping = inode->i_mapping;
- int blocks = map->m_len;
- sector_t pblock = map->m_pblk, cur_logical;
- struct buffer_head *head, *bh;
- pgoff_t index, end;
- struct pagevec pvec;
- int nr_pages, i;
-
- index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- pagevec_init(&pvec, 0);
-
- while (index <= end) {
- /* XXX: optimize tail */
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- index = page->index;
- if (index > end)
- break;
- index++;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
- BUG_ON(!page_has_buffers(page));
-
- bh = page_buffers(page);
- head = bh;
-
- /* skip blocks out of the range */
- do {
- if (cur_logical >= map->m_lblk)
- break;
- cur_logical++;
- } while ((bh = bh->b_this_page) != head);
+ if (!page_has_buffers(page)) {
+ if (block_prepare_write(page, 0, len,
+ noalloc_get_block_write)) {
+ redirty_page:
+ redirty_page_for_writepage(mpd->wbc,
+ page);
+ unlock_page(page);
+ continue;
+ }
+ commit_write = 1;
+ }
+ bh = page_bufs = page_buffers(page);
+ block_start = 0;
do {
- if (cur_logical >= map->m_lblk + blocks)
- break;
-
- if (buffer_delay(bh) || buffer_unwritten(bh)) {
-
- BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
-
+ if (!bh)
+ goto redirty_page;
+ if (map && (cur_logical >= map->m_lblk) &&
+ (cur_logical <= (map->m_lblk +
+ (map->m_len - 1)))) {
if (buffer_delay(bh)) {
clear_buffer_delay(bh);
bh->b_blocknr = pblock;
- } else {
- /*
- * unwritten already should have
- * blocknr assigned. Verify that
- */
- clear_buffer_unwritten(bh);
- BUG_ON(bh->b_blocknr != pblock);
}
+ if (buffer_unwritten(bh) ||
+ buffer_mapped(bh))
+ BUG_ON(bh->b_blocknr != pblock);
+ if (map->m_flags & EXT4_MAP_UNINIT)
+ set_buffer_uninit(bh);
+ clear_buffer_unwritten(bh);
+ }
- } else if (buffer_mapped(bh))
- BUG_ON(bh->b_blocknr != pblock);
-
- if (map->m_flags & EXT4_MAP_UNINIT)
- set_buffer_uninit(bh);
+ /* redirty page if block allocation undone */
+ if (buffer_delay(bh) || buffer_unwritten(bh))
+ redirty_page = 1;
+ bh = bh->b_this_page;
+ block_start += bh->b_size;
cur_logical++;
pblock++;
- } while ((bh = bh->b_this_page) != head);
+ } while (bh != page_bufs);
+
+ if (redirty_page)
+ goto redirty_page;
+
+ if (commit_write)
+ /* mark the buffer_heads as dirty & uptodate */
+ block_commit_write(page, 0, len);
+
+ /*
+ * Delalloc doesn't support data journalling,
+ * but eventually maybe we'll lift this
+ * restriction.
+ */
+ if (unlikely(journal_data && PageChecked(page)))
+ err = __ext4_journalled_writepage(page, len);
+ else
+ err = ext4_bio_write_page(&io_submit, page,
+ len, mpd->wbc);
+
+ if (!err)
+ mpd->pages_written++;
+ /*
+ * In error case, we have to continue because
+ * remaining pages are still locked
+ */
+ if (ret == 0)
+ ret = err;
}
pagevec_release(&pvec);
}
+ ext4_io_submit(&io_submit);
+ return ret;
}
-
static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
sector_t logical, long blk_cnt)
{
}
/*
- * mpage_da_map_blocks - go through given space
+ * mpage_da_map_and_submit - go through given space, map them
+ * if necessary, and then submit them for I/O
*
* @mpd - bh describing space
*
* The function skips space we know is already mapped to disk blocks.
*
*/
-static int mpage_da_map_blocks(struct mpage_da_data *mpd)
+static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
{
int err, blks, get_blocks_flags;
- struct ext4_map_blocks map;
+ struct ext4_map_blocks map, *mapp = NULL;
sector_t next = mpd->b_blocknr;
unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
handle_t *handle = NULL;
/*
- * We consider only non-mapped and non-allocated blocks
+ * If the blocks are mapped already, or we couldn't accumulate
+ * any blocks, then proceed immediately to the submission stage.
*/
- if ((mpd->b_state & (1 << BH_Mapped)) &&
- !(mpd->b_state & (1 << BH_Delay)) &&
- !(mpd->b_state & (1 << BH_Unwritten)))
- return 0;
-
- /*
- * If we didn't accumulate anything to write simply return
- */
- if (!mpd->b_size)
- return 0;
+ if ((mpd->b_size == 0) ||
+ ((mpd->b_state & (1 << BH_Mapped)) &&
+ !(mpd->b_state & (1 << BH_Delay)) &&
+ !(mpd->b_state & (1 << BH_Unwritten))))
+ goto submit_io;
handle = ext4_journal_current_handle();
BUG_ON(!handle);
err = blks;
/*
- * If get block returns with error we simply
- * return. Later writepage will redirty the page and
- * writepages will find the dirty page again
+ * If get block returns EAGAIN or ENOSPC and there
+ * appears to be free blocks we will call
+ * ext4_writepage() for all of the pages which will
+ * just redirty the pages.
*/
if (err == -EAGAIN)
- return 0;
+ goto submit_io;
if (err == -ENOSPC &&
ext4_count_free_blocks(sb)) {
mpd->retval = err;
- return 0;
+ goto submit_io;
}
/*
/* invalidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
mpd->b_size >> mpd->inode->i_blkbits);
- return err;
+ return;
}
BUG_ON(blks == 0);
+ mapp = ↦
if (map.m_flags & EXT4_MAP_NEW) {
struct block_device *bdev = mpd->inode->i_sb->s_bdev;
int i;
unmap_underlying_metadata(bdev, map.m_pblk + i);
}
- /*
- * If blocks are delayed marked, we need to
- * put actual blocknr and drop delayed bit
- */
- if ((mpd->b_state & (1 << BH_Delay)) ||
- (mpd->b_state & (1 << BH_Unwritten)))
- mpage_put_bnr_to_bhs(mpd, &map);
-
if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode);
if (err)
- return err;
+ /* This only happens if the journal is aborted */
+ return;
}
/*
disksize = i_size_read(mpd->inode);
if (disksize > EXT4_I(mpd->inode)->i_disksize) {
ext4_update_i_disksize(mpd->inode, disksize);
- return ext4_mark_inode_dirty(handle, mpd->inode);
+ err = ext4_mark_inode_dirty(handle, mpd->inode);
+ if (err)
+ ext4_error(mpd->inode->i_sb,
+ "Failed to mark inode %lu dirty",
+ mpd->inode->i_ino);
}
- return 0;
+submit_io:
+ mpage_da_submit_io(mpd, mapp);
+ mpd->io_done = 1;
}
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
* We couldn't merge the block to our extent, so we
* need to flush current extent and start new one
*/
- if (mpage_da_map_blocks(mpd) == 0)
- mpage_da_submit_io(mpd);
- mpd->io_done = 1;
+ mpage_da_map_and_submit(mpd);
return;
}
if (mpd->next_page != page->index) {
/*
* Nope, we can't. So, we map non-allocated blocks
- * and start IO on them using writepage()
+ * and start IO on them
*/
if (mpd->next_page != mpd->first_page) {
- if (mpage_da_map_blocks(mpd) == 0)
- mpage_da_submit_io(mpd);
+ mpage_da_map_and_submit(mpd);
/*
* skip rest of the page in the page_vec
*/
- mpd->io_done = 1;
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return MPAGE_DA_EXTENT_TAIL;
int ret = 0;
int err;
+ ClearPageChecked(page);
page_bufs = page_buffers(page);
BUG_ON(!page_bufs);
walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
static int ext4_writepage(struct page *page,
struct writeback_control *wbc)
{
- int ret = 0;
+ int ret = 0, commit_write = 0;
loff_t size;
unsigned int len;
struct buffer_head *page_bufs = NULL;
else
len = PAGE_CACHE_SIZE;
- if (page_has_buffers(page)) {
- page_bufs = page_buffers(page);
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- /*
- * We don't want to do block allocation
- * So redirty the page and return
- * We may reach here when we do a journal commit
- * via journal_submit_inode_data_buffers.
- * If we don't have mapping block we just ignore
- * them. We can also reach here via shrink_page_list
- */
+ /*
+ * If the page does not have buffers (for whatever reason),
+ * try to create them using block_prepare_write. If this
+ * fails, redirty the page and move on.
+ */
+ if (!page_buffers(page)) {
+ if (block_prepare_write(page, 0, len,
+ noalloc_get_block_write)) {
+ redirty_page:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
- } else {
+ commit_write = 1;
+ }
+ page_bufs = page_buffers(page);
+ if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+ ext4_bh_delay_or_unwritten)) {
/*
- * The test for page_has_buffers() is subtle:
- * We know the page is dirty but it lost buffers. That means
- * that at some moment in time after write_begin()/write_end()
- * has been called all buffers have been clean and thus they
- * must have been written at least once. So they are all
- * mapped and we can happily proceed with mapping them
- * and writing the page.
- *
- * Try to initialize the buffer_heads and check whether
- * all are mapped and non delay. We don't want to
- * do block allocation here.
+ * We don't want to do block allocation So redirty the
+ * page and return We may reach here when we do a
+ * journal commit via
+ * journal_submit_inode_data_buffers. If we don't
+ * have mapping block we just ignore them. We can also
+ * reach here via shrink_page_list
*/
- ret = block_prepare_write(page, 0, len,
- noalloc_get_block_write);
- if (!ret) {
- page_bufs = page_buffers(page);
- /* check whether all are mapped and non delay */
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
- } else {
- /*
- * We can't do block allocation here
- * so just redity the page and unlock
- * and return
- */
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
+ goto redirty_page;
+ }
+ if (commit_write)
/* now mark the buffer_heads as dirty and uptodate */
block_commit_write(page, 0, len);
- }
- if (PageChecked(page) && ext4_should_journal_data(inode)) {
+ if (PageChecked(page) && ext4_should_journal_data(inode))
/*
* It's mmapped pagecache. Add buffers and journal it. There
* doesn't seem much point in redirtying the page here.
*/
- ClearPageChecked(page);
return __ext4_journalled_writepage(page, len);
- }
- if (page_bufs && buffer_uninit(page_bufs)) {
+ if (buffer_uninit(page_bufs)) {
ext4_set_bh_endio(page_bufs, inode);
ret = block_write_full_page_endio(page, noalloc_get_block_write,
wbc, ext4_end_io_buffer_write);
* sbi->max_writeback_mb_bump whichever is smaller.
*/
max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
- if (!range_cyclic && range_whole)
- desired_nr_to_write = wbc->nr_to_write * 8;
- else
+ if (!range_cyclic && range_whole) {
+ if (wbc->nr_to_write == LONG_MAX)
+ desired_nr_to_write = wbc->nr_to_write;
+ else
+ desired_nr_to_write = wbc->nr_to_write * 8;
+ } else
desired_nr_to_write = ext4_num_dirty_pages(inode, index,
max_pages);
if (desired_nr_to_write > max_pages)
* them for I/O.
*/
if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- if (mpage_da_map_blocks(&mpd) == 0)
- mpage_da_submit_io(&mpd);
- mpd.io_done = 1;
+ mpage_da_map_and_submit(&mpd);
ret = MPAGE_DA_EXTENT_TAIL;
}
trace_ext4_da_write_pages(inode, &mpd);
return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
}
-static void ext4_free_io_end(ext4_io_end_t *io)
-{
- BUG_ON(!io);
- if (io->page)
- put_page(io->page);
- iput(io->inode);
- kfree(io);
-}
-
static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
{
struct buffer_head *head, *bh;
#endif
}
-/*
- * check a range of space and convert unwritten extents to written.
- */
-static int ext4_end_io_nolock(ext4_io_end_t *io)
-{
- struct inode *inode = io->inode;
- loff_t offset = io->offset;
- ssize_t size = io->size;
- int ret = 0;
-
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
- "list->prev 0x%p\n",
- io, inode->i_ino, io->list.next, io->list.prev);
-
- if (list_empty(&io->list))
- return ret;
-
- if (io->flag != EXT4_IO_UNWRITTEN)
- return ret;
-
- ret = ext4_convert_unwritten_extents(inode, offset, size);
- if (ret < 0) {
- printk(KERN_EMERG "%s: failed to convert unwritten"
- "extents to written extents, error is %d"
- " io is still on inode %lu aio dio list\n",
- __func__, ret, inode->i_ino);
- return ret;
- }
-
- if (io->iocb)
- aio_complete(io->iocb, io->result, 0);
- /* clear the DIO AIO unwritten flag */
- io->flag = 0;
- return ret;
-}
-
-/*
- * work on completed aio dio IO, to convert unwritten extents to extents
- */
-static void ext4_end_io_work(struct work_struct *work)
-{
- ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
- struct inode *inode = io->inode;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret;
-
- mutex_lock(&inode->i_mutex);
- ret = ext4_end_io_nolock(io);
- if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- return;
- }
-
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (!list_empty(&io->list))
- list_del_init(&io->list);
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- mutex_unlock(&inode->i_mutex);
- ext4_free_io_end(io);
-}
-
/*
* This function is called from ext4_sync_file().
*
return (ret2 < 0) ? ret2 : 0;
}
-static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
-{
- ext4_io_end_t *io = NULL;
-
- io = kmalloc(sizeof(*io), flags);
-
- if (io) {
- igrab(inode);
- io->inode = inode;
- io->flag = 0;
- io->offset = 0;
- io->size = 0;
- io->page = NULL;
- io->iocb = NULL;
- io->result = 0;
- INIT_WORK(&io->work, ext4_end_io_work);
- INIT_LIST_HEAD(&io->list);
- }
-
- return io;
-}
-
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private, int ret,
bool is_async)
size);
/* if not aio dio with unwritten extents, just free io and return */
- if (io_end->flag != EXT4_IO_UNWRITTEN){
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
iocb->private = NULL;
out:
}
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
- /* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
-
/* Add the io_end to per-inode completed aio dio list*/
ei = EXT4_I(io_end->inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+
+ /* queue the work to convert unwritten extents to written */
+ queue_work(wq, &io_end->work);
iocb->private = NULL;
}
goto out;
}
- io_end->flag = EXT4_IO_UNWRITTEN;
+ io_end->flag = EXT4_IO_END_UNWRITTEN;
inode = io_end->inode;
/* Add the io_end to per-inode completed io list*/