#include "ext4_jbd2.h"
#include "ext4_extents.h"
-
- /*
- * ext_pblock:
- * combine low and high parts of physical block number into ext4_fsblk_t
- */
- ext4_fsblk_t ext_pblock(struct ext4_extent *ex)
- {
- ext4_fsblk_t block;
-
- block = le32_to_cpu(ex->ee_start_lo);
- block |= ((ext4_fsblk_t) le16_to_cpu(ex->ee_start_hi) << 31) << 1;
- return block;
- }
-
- /*
- * idx_pblock:
- * combine low and high parts of a leaf physical block number into ext4_fsblk_t
- */
- ext4_fsblk_t idx_pblock(struct ext4_extent_idx *ix)
- {
- ext4_fsblk_t block;
-
- block = le32_to_cpu(ix->ei_leaf_lo);
- block |= ((ext4_fsblk_t) le16_to_cpu(ix->ei_leaf_hi) << 31) << 1;
- return block;
- }
-
- /*
- * ext4_ext_store_pblock:
- * stores a large physical block number into an extent struct,
- * breaking it into parts
- */
- void ext4_ext_store_pblock(struct ext4_extent *ex, ext4_fsblk_t pb)
- {
- ex->ee_start_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
- ex->ee_start_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
- }
-
- /*
- * ext4_idx_store_pblock:
- * stores a large physical block number into an index struct,
- * breaking it into parts
- */
- static void ext4_idx_store_pblock(struct ext4_extent_idx *ix, ext4_fsblk_t pb)
- {
- ix->ei_leaf_lo = cpu_to_le32((unsigned long) (pb & 0xffffffff));
- ix->ei_leaf_hi = cpu_to_le16((unsigned long) ((pb >> 31) >> 1) & 0xffff);
- }
-
static int ext4_ext_truncate_extend_restart(handle_t *handle,
struct inode *inode,
int needed)
/* try to predict block placement */
ex = path[depth].p_ext;
if (ex)
- return ext_pblock(ex)+(block-le32_to_cpu(ex->ee_block));
+ return (ext4_ext_pblock(ex) +
+ (block - le32_to_cpu(ex->ee_block)));
/* it looks like index is empty;
* try to find starting block from index itself */
static int ext4_valid_extent(struct inode *inode, struct ext4_extent *ext)
{
- ext4_fsblk_t block = ext_pblock(ext);
+ ext4_fsblk_t block = ext4_ext_pblock(ext);
int len = ext4_ext_get_actual_len(ext);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, len);
static int ext4_valid_extent_idx(struct inode *inode,
struct ext4_extent_idx *ext_idx)
{
- ext4_fsblk_t block = idx_pblock(ext_idx);
+ ext4_fsblk_t block = ext4_idx_pblock(ext_idx);
return ext4_data_block_valid(EXT4_SB(inode->i_sb), block, 1);
}
for (k = 0; k <= l; k++, path++) {
if (path->p_idx) {
ext_debug(" %d->%llu", le32_to_cpu(path->p_idx->ei_block),
- idx_pblock(path->p_idx));
+ ext4_idx_pblock(path->p_idx));
} else if (path->p_ext) {
ext_debug(" %d:[%d]%d:%llu ",
le32_to_cpu(path->p_ext->ee_block),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext),
- ext_pblock(path->p_ext));
+ ext4_ext_pblock(path->p_ext));
} else
ext_debug(" []");
}
for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext_pblock(ex));
+ ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
}
ext_debug("\n");
}
path->p_idx = l - 1;
ext_debug(" -> %d->%lld ", le32_to_cpu(path->p_idx->ei_block),
- idx_pblock(path->p_idx));
+ ext4_idx_pblock(path->p_idx));
#ifdef CHECK_BINSEARCH
{
path->p_ext = l - 1;
ext_debug(" -> %d:%llu:[%d]%d ",
le32_to_cpu(path->p_ext->ee_block),
- ext_pblock(path->p_ext),
+ ext4_ext_pblock(path->p_ext),
ext4_ext_is_uninitialized(path->p_ext),
ext4_ext_get_actual_len(path->p_ext));
ppos, le16_to_cpu(eh->eh_entries), le16_to_cpu(eh->eh_max));
ext4_ext_binsearch_idx(inode, path + ppos, block);
- path[ppos].p_block = idx_pblock(path[ppos].p_idx);
+ path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
path[ppos].p_depth = i;
path[ppos].p_ext = NULL;
ext4_ext_binsearch(inode, path + ppos, block);
/* if not an empty leaf */
if (path[ppos].p_ext)
- path[ppos].p_block = ext_pblock(path[ppos].p_ext);
+ path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
ext4_ext_show_path(inode, path);
* insert new index [@logical;@ptr] into the block at @curp;
* check where to insert: before @curp or after @curp
*/
- int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
- struct ext4_ext_path *curp,
- int logical, ext4_fsblk_t ptr)
+ static int ext4_ext_insert_index(handle_t *handle, struct inode *inode,
+ struct ext4_ext_path *curp,
+ int logical, ext4_fsblk_t ptr)
{
struct ext4_extent_idx *ix;
int len, err;
EXT_MAX_EXTENT(path[depth].p_hdr)) {
ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
le32_to_cpu(path[depth].p_ext->ee_block),
- ext_pblock(path[depth].p_ext),
+ ext4_ext_pblock(path[depth].p_ext),
ext4_ext_is_uninitialized(path[depth].p_ext),
ext4_ext_get_actual_len(path[depth].p_ext),
newblock);
while (path[i].p_idx <= EXT_MAX_INDEX(path[i].p_hdr)) {
ext_debug("%d: move %d:%llu in new index %llu\n", i,
le32_to_cpu(path[i].p_idx->ei_block),
- idx_pblock(path[i].p_idx),
+ ext4_idx_pblock(path[i].p_idx),
newblock);
/*memmove(++fidx, path[i].p_idx++,
sizeof(struct ext4_extent_idx));
ext_debug("new root: num %d(%d), lblock %d, ptr %llu\n",
le16_to_cpu(neh->eh_entries), le16_to_cpu(neh->eh_max),
le32_to_cpu(EXT_FIRST_INDEX(neh)->ei_block),
- idx_pblock(EXT_FIRST_INDEX(neh)));
+ ext4_idx_pblock(EXT_FIRST_INDEX(neh)));
neh->eh_depth = cpu_to_le16(path->p_depth + 1);
err = ext4_ext_dirty(handle, inode, curp);
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
- int
- ext4_ext_search_left(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t *logical, ext4_fsblk_t *phys)
+ static int ext4_ext_search_left(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct ext4_extent_idx *ix;
struct ext4_extent *ex;
}
*logical = le32_to_cpu(ex->ee_block) + ee_len - 1;
- *phys = ext_pblock(ex) + ee_len - 1;
+ *phys = ext4_ext_pblock(ex) + ee_len - 1;
return 0;
}
* returns 0 at @phys
* return value contains 0 (success) or error code
*/
- int
- ext4_ext_search_right(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t *logical, ext4_fsblk_t *phys)
+ static int ext4_ext_search_right(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *logical, ext4_fsblk_t *phys)
{
struct buffer_head *bh = NULL;
struct ext4_extent_header *eh;
}
}
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
return 0;
}
/* next allocated block in this leaf */
ex++;
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
return 0;
}
* follow it and find the closest allocated
* block to the right */
ix++;
- block = idx_pblock(ix);
+ block = ext4_idx_pblock(ix);
while (++depth < path->p_depth) {
bh = sb_bread(inode->i_sb, block);
if (bh == NULL)
return -EIO;
}
ix = EXT_FIRST_INDEX(eh);
- block = idx_pblock(ix);
+ block = ext4_idx_pblock(ix);
put_bh(bh);
}
}
ex = EXT_FIRST_EXTENT(eh);
*logical = le32_to_cpu(ex->ee_block);
- *phys = ext_pblock(ex);
+ *phys = ext4_ext_pblock(ex);
put_bh(bh);
return 0;
}
return 0;
#endif
- if (ext_pblock(ex1) + ext1_ee_len == ext_pblock(ex2))
+ if (ext4_ext_pblock(ex1) + ext1_ee_len == ext4_ext_pblock(ex2))
return 1;
return 0;
}
* Returns 0 if the extents (ex and ex+1) were _not_ merged and returns
* 1 if they got merged.
*/
- int ext4_ext_try_to_merge(struct inode *inode,
- struct ext4_ext_path *path,
- struct ext4_extent *ex)
+ static int ext4_ext_try_to_merge(struct inode *inode,
+ struct ext4_ext_path *path,
+ struct ext4_extent *ex)
{
struct ext4_extent_header *eh;
unsigned int depth, len;
* such that there will be no overlap, and then returns 1.
* If there is no overlap found, it returns 0.
*/
- unsigned int ext4_ext_check_overlap(struct inode *inode,
- struct ext4_extent *newext,
- struct ext4_ext_path *path)
+ static unsigned int ext4_ext_check_overlap(struct inode *inode,
+ struct ext4_extent *newext,
+ struct ext4_ext_path *path)
{
ext4_lblk_t b1, b2;
unsigned int depth, len1;
if (ex && !(flag & EXT4_GET_BLOCKS_PRE_IO)
&& ext4_can_extents_be_merged(inode, ex, newext)) {
ext_debug("append [%d]%d block to %d:[%d]%d (from %llu)\n",
- ext4_ext_is_uninitialized(newext),
- ext4_ext_get_actual_len(newext),
- le32_to_cpu(ex->ee_block),
- ext4_ext_is_uninitialized(ex),
- ext4_ext_get_actual_len(ex), ext_pblock(ex));
+ ext4_ext_is_uninitialized(newext),
+ ext4_ext_get_actual_len(newext),
+ le32_to_cpu(ex->ee_block),
+ ext4_ext_is_uninitialized(ex),
+ ext4_ext_get_actual_len(ex),
+ ext4_ext_pblock(ex));
err = ext4_ext_get_access(handle, inode, path + depth);
if (err)
return err;
/* there is no extent in this leaf, create first one */
ext_debug("first extent in the leaf: %d:%llu:[%d]%d\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext));
path[depth].p_ext = EXT_FIRST_EXTENT(eh);
ext_debug("insert %d:%llu:[%d]%d after: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
ext_debug("insert %d:%llu:[%d]%d before: nearest 0x%p, "
"move %d from 0x%p to 0x%p\n",
le32_to_cpu(newext->ee_block),
- ext_pblock(newext),
+ ext4_ext_pblock(newext),
ext4_ext_is_uninitialized(newext),
ext4_ext_get_actual_len(newext),
nearex, len, nearex + 1, nearex + 2);
le16_add_cpu(&eh->eh_entries, 1);
nearex = path[depth].p_ext;
nearex->ee_block = newext->ee_block;
- ext4_ext_store_pblock(nearex, ext_pblock(newext));
+ ext4_ext_store_pblock(nearex, ext4_ext_pblock(newext));
nearex->ee_len = newext->ee_len;
merge:
return err;
}
- int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
- ext4_lblk_t num, ext_prepare_callback func,
- void *cbdata)
+ static int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
+ ext4_lblk_t num, ext_prepare_callback func,
+ void *cbdata)
{
struct ext4_ext_path *path = NULL;
struct ext4_ext_cache cbex;
} else {
cbex.ec_block = le32_to_cpu(ex->ee_block);
cbex.ec_len = ext4_ext_get_actual_len(ex);
- cbex.ec_start = ext_pblock(ex);
+ cbex.ec_start = ext4_ext_pblock(ex);
cbex.ec_type = EXT4_EXT_CACHE_EXTENT;
}
/* free index block */
path--;
- leaf = idx_pblock(path->p_idx);
+ leaf = ext4_idx_pblock(path->p_idx);
if (unlikely(path->p_hdr->eh_entries == 0)) {
EXT4_ERROR_INODE(inode, "path->p_hdr->eh_entries == 0");
return -EIO;
ext4_fsblk_t start;
num = le32_to_cpu(ex->ee_block) + ee_len - from;
- start = ext_pblock(ex) + ee_len - num;
+ start = ext4_ext_pblock(ex) + ee_len - num;
ext_debug("free last %u blocks starting %llu\n", num, start);
ext4_free_blocks(handle, inode, 0, start, num, flags);
} else if (from == le32_to_cpu(ex->ee_block)
goto out;
ext_debug("new extent: %u:%u:%llu\n", block, num,
- ext_pblock(ex));
+ ext4_ext_pblock(ex));
ex--;
ex_ee_block = le32_to_cpu(ex->ee_block);
ex_ee_len = ext4_ext_get_actual_len(ex);
struct buffer_head *bh;
/* go to the next level */
ext_debug("move to level %d (block %llu)\n",
- i + 1, idx_pblock(path[i].p_idx));
+ i + 1, ext4_idx_pblock(path[i].p_idx));
memset(path + i + 1, 0, sizeof(*path));
- bh = sb_bread(sb, idx_pblock(path[i].p_idx));
+ bh = sb_bread(sb, ext4_idx_pblock(path[i].p_idx));
if (!bh) {
/* should we reset i_size? */
err = -EIO;
#endif
}
- static void bi_complete(struct bio *bio, int error)
- {
- complete((struct completion *)bio->bi_private);
- }
-
/* FIXME!! we need to try to merge to left or right after zero-out */
static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
{
+ ext4_fsblk_t ee_pblock;
+ unsigned int ee_len;
int ret;
- struct bio *bio;
- int blkbits, blocksize;
- sector_t ee_pblock;
- struct completion event;
- unsigned int ee_len, len, done, offset;
-
- blkbits = inode->i_blkbits;
- blocksize = inode->i_sb->s_blocksize;
ee_len = ext4_ext_get_actual_len(ex);
- ee_pblock = ext_pblock(ex);
-
- /* convert ee_pblock to 512 byte sectors */
- ee_pblock = ee_pblock << (blkbits - 9);
-
- while (ee_len > 0) {
-
- if (ee_len > BIO_MAX_PAGES)
- len = BIO_MAX_PAGES;
- else
- len = ee_len;
-
- bio = bio_alloc(GFP_NOIO, len);
- if (!bio)
- return -ENOMEM;
+ ee_pblock = ext4_ext_pblock(ex);
- bio->bi_sector = ee_pblock;
- bio->bi_bdev = inode->i_sb->s_bdev;
- ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len,
- GFP_NOFS, BLKDEV_IFL_WAIT);
++ ret = sb_issue_zeroout(inode->i_sb, ee_pblock, ee_len, GFP_NOFS);
+ if (ret > 0)
+ ret = 0;
- done = 0;
- offset = 0;
- while (done < len) {
- ret = bio_add_page(bio, ZERO_PAGE(0),
- blocksize, offset);
- if (ret != blocksize) {
- /*
- * We can't add any more pages because of
- * hardware limitations. Start a new bio.
- */
- break;
- }
- done++;
- offset += blocksize;
- if (offset >= PAGE_CACHE_SIZE)
- offset = 0;
- }
-
- init_completion(&event);
- bio->bi_private = &event;
- bio->bi_end_io = bi_complete;
- submit_bio(WRITE, bio);
- wait_for_completion(&event);
-
- if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
- bio_put(bio);
- return -EIO;
- }
- bio_put(bio);
- ee_len -= done;
- ee_pblock += done << (blkbits - 9);
- }
- return 0;
+ return ret;
}
#define EXT4_EXT_ZERO_LEN 7
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
- newblock = map->m_lblk - ee_block + ext_pblock(ex);
+ newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
- ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
return allocated;
ex->ee_block = orig_ex.ee_block;
ex->ee_len = cpu_to_le16(ee_len - allocated);
ext4_ext_mark_uninitialized(ex);
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
ex3 = &newex;
goto fix_extent_len;
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex,
+ ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* blocks available from map->m_lblk */
return allocated;
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
/* blocks available from map->m_lblk */
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
/* blocks available from map->m_lblk */
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
return allocated;
fix_extent_len:
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
return err;
ee_block = le32_to_cpu(ex->ee_block);
ee_len = ext4_ext_get_actual_len(ex);
allocated = ee_len - (map->m_lblk - ee_block);
- newblock = map->m_lblk - ee_block + ext_pblock(ex);
+ newblock = map->m_lblk - ee_block + ext4_ext_pblock(ex);
ex2 = ex;
orig_ex.ee_block = ex->ee_block;
orig_ex.ee_len = cpu_to_le16(ee_len);
- ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
+ ext4_ext_store_pblock(&orig_ex, ext4_ext_pblock(ex));
/*
* It is safe to convert extent to initialized via explicit
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zeroed the full extent */
/* blocks available from map->m_lblk */
/* update the extent length and mark as initialized */
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_dirty(handle, inode, path + depth);
/* zero out the first half */
return allocated;
fix_extent_len:
ex->ee_block = orig_ex.ee_block;
ex->ee_len = orig_ex.ee_len;
- ext4_ext_store_pblock(ex, ext_pblock(&orig_ex));
+ ext4_ext_store_pblock(ex, ext4_ext_pblock(&orig_ex));
ext4_ext_mark_uninitialized(ex);
ext4_ext_dirty(handle, inode, path + depth);
return err;
unmap_underlying_metadata(bdev, block + i);
}
+ /*
+ * Handle EOFBLOCKS_FL flag, clearing it if necessary
+ */
+ static int check_eofblocks_fl(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map,
+ struct ext4_ext_path *path,
+ unsigned int len)
+ {
+ int i, depth;
+ struct ext4_extent_header *eh;
+ struct ext4_extent *ex, *last_ex;
+
+ if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))
+ return 0;
+
+ depth = ext_depth(inode);
+ eh = path[depth].p_hdr;
+ ex = path[depth].p_ext;
+
+ if (unlikely(!eh->eh_entries)) {
+ EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and "
+ "EOFBLOCKS_FL set");
+ return -EIO;
+ }
+ last_ex = EXT_LAST_EXTENT(eh);
+ /*
+ * We should clear the EOFBLOCKS_FL flag if we are writing the
+ * last block in the last extent in the file. We test this by
+ * first checking to see if the caller to
+ * ext4_ext_get_blocks() was interested in the last block (or
+ * a block beyond the last block) in the current extent. If
+ * this turns out to be false, we can bail out from this
+ * function immediately.
+ */
+ if (map->m_lblk + len < le32_to_cpu(last_ex->ee_block) +
+ ext4_ext_get_actual_len(last_ex))
+ return 0;
+ /*
+ * If the caller does appear to be planning to write at or
+ * beyond the end of the current extent, we then test to see
+ * if the current extent is the last extent in the file, by
+ * checking to make sure it was reached via the rightmost node
+ * at each level of the tree.
+ */
+ for (i = depth-1; i >= 0; i--)
+ if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
+ return 0;
+ ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
+ return ext4_mark_inode_dirty(handle, inode);
+ }
+
static int
ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
* completed
*/
if (io)
- io->flag = EXT4_IO_UNWRITTEN;
+ io->flag = EXT4_IO_END_UNWRITTEN;
else
ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
if (ext4_should_dioread_nolock(inode))
if ((flags & EXT4_GET_BLOCKS_CONVERT)) {
ret = ext4_convert_unwritten_extents_endio(handle, inode,
path);
- if (ret >= 0)
+ if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map, path,
+ map->m_len);
+ } else
+ err = ret;
goto out2;
}
/* buffered IO case */
/* buffered write, writepage time, convert*/
ret = ext4_ext_convert_to_initialized(handle, inode, map, path);
- if (ret >= 0)
+ if (ret >= 0) {
ext4_update_inode_fsync_trans(handle, inode, 1);
+ err = check_eofblocks_fl(handle, inode, map, path, map->m_len);
+ if (err < 0)
+ goto out2;
+ }
+
out:
if (ret <= 0) {
err = ret;
}
return err ? err : allocated;
}
+
/*
* Block allocation/map/preallocation routine for extents based files
*
{
struct ext4_ext_path *path = NULL;
struct ext4_extent_header *eh;
- struct ext4_extent newex, *ex, *last_ex;
+ struct ext4_extent newex, *ex;
ext4_fsblk_t newblock;
- int i, err = 0, depth, ret, cache_type;
+ int err = 0, depth, ret, cache_type;
unsigned int allocated = 0;
struct ext4_allocation_request ar;
ext4_io_end_t *io = EXT4_I(inode)->cur_aio_dio;
/* block is already allocated */
newblock = map->m_lblk
- le32_to_cpu(newex.ee_block)
- + ext_pblock(&newex);
+ + ext4_ext_pblock(&newex);
/* number of remaining blocks in the extent */
allocated = ext4_ext_get_actual_len(&newex) -
(map->m_lblk - le32_to_cpu(newex.ee_block));
ex = path[depth].p_ext;
if (ex) {
ext4_lblk_t ee_block = le32_to_cpu(ex->ee_block);
- ext4_fsblk_t ee_start = ext_pblock(ex);
+ ext4_fsblk_t ee_start = ext4_ext_pblock(ex);
unsigned short ee_len;
/*
*/
if ((flags & EXT4_GET_BLOCKS_PRE_IO)) {
if (io)
- io->flag = EXT4_IO_UNWRITTEN;
+ io->flag = EXT4_IO_END_UNWRITTEN;
else
ext4_set_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN);
map->m_flags |= EXT4_MAP_UNINIT;
}
- if (unlikely(ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS))) {
- if (unlikely(!eh->eh_entries)) {
- EXT4_ERROR_INODE(inode,
- "eh->eh_entries == 0 and "
- "EOFBLOCKS_FL set");
- err = -EIO;
- goto out2;
- }
- last_ex = EXT_LAST_EXTENT(eh);
- /*
- * If the current leaf block was reached by looking at
- * the last index block all the way down the tree, and
- * we are extending the inode beyond the last extent
- * in the current leaf block, then clear the
- * EOFBLOCKS_FL flag.
- */
- for (i = depth-1; i >= 0; i--) {
- if (path[i].p_idx != EXT_LAST_INDEX(path[i].p_hdr))
- break;
- }
- if ((i < 0) &&
- (map->m_lblk + ar.len > le32_to_cpu(last_ex->ee_block) +
- ext4_ext_get_actual_len(last_ex)))
- ext4_clear_inode_flag(inode, EXT4_INODE_EOFBLOCKS);
- }
+ err = check_eofblocks_fl(handle, inode, map, path, ar.len);
+ if (err)
+ goto out2;
+
err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
if (err) {
/* free data blocks we just allocated */
/* not a good idea to call discard here directly,
* but otherwise we'd need to call it every free() */
ext4_discard_preallocations(inode);
- ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
+ ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex),
ext4_ext_get_actual_len(&newex), 0);
goto out2;
}
/* previous routine could use block we allocated */
- newblock = ext_pblock(&newex);
+ newblock = ext4_ext_pblock(&newex);
allocated = ext4_ext_get_actual_len(&newex);
if (allocated > map->m_len)
allocated = map->m_len;
printk(KERN_ERR "%s: ext4_ext_map_blocks "
"returned error inode#%lu, block=%u, "
"max_blocks=%u", __func__,
- inode->i_ino, block, max_blocks);
+ inode->i_ino, map.m_lblk, max_blocks);
#endif
ext4_mark_inode_dirty(handle, inode);
ret2 = ext4_journal_stop(handle);
#include <trace/events/ext4.h>
+ static void dump_completed_IO(struct inode * inode)
+ {
+ #ifdef EXT4_DEBUG
+ struct list_head *cur, *before, *after;
+ ext4_io_end_t *io, *io0, *io1;
+ unsigned long flags;
+
+ if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
+ ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
+ return;
+ }
+
+ ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
+ spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
+ list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
+ cur = &io->list;
+ before = cur->prev;
+ io0 = container_of(before, ext4_io_end_t, list);
+ after = cur->next;
+ io1 = container_of(after, ext4_io_end_t, list);
+
+ ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
+ io, inode->i_ino, io0, io1);
+ }
+ spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
+ #endif
+ }
+
+ /*
+ * This function is called from ext4_sync_file().
+ *
+ * When IO is completed, the work to convert unwritten extents to
+ * written is queued on workqueue but may not get immediately
+ * scheduled. When fsync is called, we need to ensure the
+ * conversion is complete before fsync returns.
+ * The inode keeps track of a list of pending/completed IO that
+ * might needs to do the conversion. This function walks through
+ * the list and convert the related unwritten extents for completed IO
+ * to written.
+ * The function return the number of pending IOs on success.
+ */
+ static int flush_completed_IO(struct inode *inode)
+ {
+ ext4_io_end_t *io;
+ struct ext4_inode_info *ei = EXT4_I(inode);
+ unsigned long flags;
+ int ret = 0;
+ int ret2 = 0;
+
+ if (list_empty(&ei->i_completed_io_list))
+ return ret;
+
+ dump_completed_IO(inode);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ while (!list_empty(&ei->i_completed_io_list)){
+ io = list_entry(ei->i_completed_io_list.next,
+ ext4_io_end_t, list);
+ /*
+ * Calling ext4_end_io_nolock() to convert completed
+ * IO to written.
+ *
+ * When ext4_sync_file() is called, run_queue() may already
+ * about to flush the work corresponding to this io structure.
+ * It will be upset if it founds the io structure related
+ * to the work-to-be schedule is freed.
+ *
+ * Thus we need to keep the io structure still valid here after
+ * convertion finished. The io structure has a flag to
+ * avoid double converting from both fsync and background work
+ * queue work.
+ */
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ ret = ext4_end_io_nolock(io);
+ spin_lock_irqsave(&ei->i_completed_io_lock, flags);
+ if (ret < 0)
+ ret2 = ret;
+ else
+ list_del_init(&io->list);
+ }
+ spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+ return (ret2 < 0) ? ret2 : 0;
+ }
+
/*
* If we're not journaling and this is a just-created file, we have to
* sync our parent directory (if it was freshly created) since
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL,
- NULL, BLKDEV_IFL_WAIT);
+ NULL);
ret = jbd2_log_wait_commit(journal, commit_tid);
} else if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL,
- BLKDEV_IFL_WAIT);
+ blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL);
return ret;
}
* need to use it within a single byte (to ensure we get endianness right).
* We can use memset for the rest of the bitmap as there are no other users.
*/
- void mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
+ void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
{
int i;
}
/* Initializes an uninitialized inode bitmap */
- unsigned ext4_init_inode_bitmap(struct super_block *sb, struct buffer_head *bh,
- ext4_group_t block_group,
- struct ext4_group_desc *gdp)
+ static unsigned ext4_init_inode_bitmap(struct super_block *sb,
+ struct buffer_head *bh,
+ ext4_group_t block_group,
+ struct ext4_group_desc *gdp)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
}
memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
- mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
+ ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
bh->b_data);
return EXT4_INODES_PER_GROUP(sb);
desc = ext4_get_group_desc(sb, block_group, NULL);
if (!desc)
return NULL;
+
bitmap_blk = ext4_inode_bitmap(sb, desc);
bh = sb_getblk(sb, bitmap_blk);
if (unlikely(!bh)) {
unlock_buffer(bh);
return bh;
}
+
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
ext4_init_inode_bitmap(sb, bh, block_group, desc);
return bh;
}
ext4_unlock_group(sb, block_group);
+
if (buffer_uptodate(bh)) {
/*
* if not uninit if bh is uptodate,
* for a particular block group or flex_bg. If flex_size is 1, then g
* is a block group number; otherwise it is flex_bg number.
*/
- void get_orlov_stats(struct super_block *sb, ext4_group_t g,
- int flex_size, struct orlov_stats *stats)
+ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
+ int flex_size, struct orlov_stats *stats)
{
struct ext4_group_desc *desc;
struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
{
int free = 0, retval = 0, count;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);
+ /*
+ * We have to be sure that new inode allocation does not race with
+ * inode table initialization, because otherwise we may end up
+ * allocating and writing new inode right before sb_issue_zeroout
+ * takes place and overwriting our new inode with zeroes. So we
+ * take alloc_sem to prevent it.
+ */
+ down_read(&grp->alloc_sem);
ext4_lock_group(sb, group);
if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
/* not a free inode */
if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
ino > EXT4_INODES_PER_GROUP(sb)) {
ext4_unlock_group(sb, group);
+ up_read(&grp->alloc_sem);
ext4_error(sb, "reserved inode or inode > inodes count - "
"block_group = %u, inode=%lu", group,
ino + group * EXT4_INODES_PER_GROUP(sb));
gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
err_ret:
ext4_unlock_group(sb, group);
+ up_read(&grp->alloc_sem);
return retval;
}
}
return count;
}
- unsigned long flags = BLKDEV_IFL_WAIT;
+
+ /*
+ * Zeroes not yet zeroed inode table - just write zeroes through the whole
+ * inode table. Must be called without any spinlock held. The only place
+ * where it is called from on active part of filesystem is ext4lazyinit
+ * thread, so we do not need any special locks, however we have to prevent
+ * inode allocation from the current group, so we take alloc_sem lock, to
+ * block ext4_claim_inode until we are finished.
+ */
+ extern int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
+ int barrier)
+ {
+ struct ext4_group_info *grp = ext4_get_group_info(sb, group);
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_group_desc *gdp = NULL;
+ struct buffer_head *group_desc_bh;
+ handle_t *handle;
+ ext4_fsblk_t blk;
+ int num, ret = 0, used_blks = 0;
- if (barrier)
- flags |= BLKDEV_IFL_BARRIER;
- ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS, flags);
+
+ /* This should not happen, but just to be sure check this */
+ if (sb->s_flags & MS_RDONLY) {
+ ret = 1;
+ goto out;
+ }
+
+ gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
+ if (!gdp)
+ goto out;
+
+ /*
+ * We do not need to lock this, because we are the only one
+ * handling this flag.
+ */
+ if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
+ goto out;
+
+ handle = ext4_journal_start_sb(sb, 1);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ goto out;
+ }
+
+ down_write(&grp->alloc_sem);
+ /*
+ * If inode bitmap was already initialized there may be some
+ * used inodes so we need to skip blocks with used inodes in
+ * inode table.
+ */
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
+ used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
+ ext4_itable_unused_count(sb, gdp)),
+ sbi->s_inodes_per_block);
+
+ if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
+ ext4_error(sb, "Something is wrong with group %u\n"
+ "Used itable blocks: %d"
+ "itable unused count: %u\n",
+ group, used_blks,
+ ext4_itable_unused_count(sb, gdp));
+ ret = 1;
+ goto out;
+ }
+
+ blk = ext4_inode_table(sb, gdp) + used_blks;
+ num = sbi->s_itb_per_group - used_blks;
+
+ BUFFER_TRACE(group_desc_bh, "get_write_access");
+ ret = ext4_journal_get_write_access(handle,
+ group_desc_bh);
+ if (ret)
+ goto err_out;
+
+ /*
+ * Skip zeroout if the inode table is full. But we set the ZEROED
+ * flag anyway, because obviously, when it is full it does not need
+ * further zeroing.
+ */
+ if (unlikely(num == 0))
+ goto skip_zeroout;
+
+ ext4_debug("going to zero out inode table in group %d\n",
+ group);
++ ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
+ if (ret < 0)
+ goto err_out;
++ if (barrier)
++ blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
+
+ skip_zeroout:
+ ext4_lock_group(sb, group);
+ gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
+ gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
+ ext4_unlock_group(sb, group);
+
+ BUFFER_TRACE(group_desc_bh,
+ "call ext4_handle_dirty_metadata");
+ ret = ext4_handle_dirty_metadata(handle, NULL,
+ group_desc_bh);
+
+ err_out:
+ up_write(&grp->alloc_sem);
+ ext4_journal_stop(handle);
+ out:
+ return ret;
+ }
}
static void ext4_invalidatepage(struct page *page, unsigned long offset);
+ static int noalloc_get_block_write(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
+ static int ext4_set_bh_endio(struct buffer_head *bh, struct inode *inode);
+ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate);
+ static int __ext4_journalled_writepage(struct page *page, unsigned int len);
+ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh);
/*
* Test whether an inode is a fast symlink.
* parent to disk.
*/
bh = sb_getblk(inode->i_sb, new_blocks[n-1]);
+ if (unlikely(!bh)) {
+ err = -EIO;
+ goto failed;
+ }
+
branch[n].bh = bh;
lock_buffer(bh);
BUFFER_TRACE(bh, "call get_create_access");
break;
idx++;
num++;
- if (num >= max_pages)
+ if (num >= max_pages) {
+ done = 1;
break;
+ }
}
pagevec_release(&pvec);
}
if (!buffer_mapped(bh) || buffer_freed(bh))
return 0;
/*
- * __block_prepare_write() could have dirtied some buffers. Clean
+ * __block_write_begin() could have dirtied some buffers. Clean
* the dirty bit as jbd2_journal_get_write_access() could complain
* otherwise about fs integrity issues. Setting of the dirty bit
- * by __block_prepare_write() isn't a real problem here as we clear
+ * by __block_write_begin() isn't a real problem here as we clear
* the bit before releasing a page lock and thus writeback cannot
* ever write the buffer.
*/
*
* As pages are already locked by write_cache_pages(), we can't use it
*/
- static int mpage_da_submit_io(struct mpage_da_data *mpd)
+ static int mpage_da_submit_io(struct mpage_da_data *mpd,
+ struct ext4_map_blocks *map)
{
- long pages_skipped;
struct pagevec pvec;
unsigned long index, end;
int ret = 0, err, nr_pages, i;
struct inode *inode = mpd->inode;
struct address_space *mapping = inode->i_mapping;
+ loff_t size = i_size_read(inode);
+ unsigned int len, block_start;
+ struct buffer_head *bh, *page_bufs = NULL;
+ int journal_data = ext4_should_journal_data(inode);
+ sector_t pblock = 0, cur_logical = 0;
+ struct ext4_io_submit io_submit;
BUG_ON(mpd->next_page <= mpd->first_page);
+ memset(&io_submit, 0, sizeof(io_submit));
/*
* We need to start from the first_page to the next_page - 1
* to make sure we also write the mapped dirty buffer_heads.
if (nr_pages == 0)
break;
for (i = 0; i < nr_pages; i++) {
+ int commit_write = 0, redirty_page = 0;
struct page *page = pvec.pages[i];
index = page->index;
if (index > end)
break;
+
+ if (index == size >> PAGE_CACHE_SHIFT)
+ len = size & ~PAGE_CACHE_MASK;
+ else
+ len = PAGE_CACHE_SIZE;
+ if (map) {
+ cur_logical = index << (PAGE_CACHE_SHIFT -
+ inode->i_blkbits);
+ pblock = map->m_pblk + (cur_logical -
+ map->m_lblk);
+ }
index++;
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
- pages_skipped = mpd->wbc->pages_skipped;
- err = mapping->a_ops->writepage(page, mpd->wbc);
- if (!err && (pages_skipped == mpd->wbc->pages_skipped))
- /*
- * have successfully written the page
- * without skipping the same
- */
- mpd->pages_written++;
/*
- * In error case, we have to continue because
- * remaining pages are still locked
- * XXX: unlock and re-dirty them?
+ * If the page does not have buffers (for
+ * whatever reason), try to create them using
- * block_prepare_write. If this fails,
++ * __block_write_begin. If this fails,
+ * redirty the page and move on.
*/
- if (ret == 0)
- ret = err;
- }
- pagevec_release(&pvec);
- }
- return ret;
- }
-
- /*
- * mpage_put_bnr_to_bhs - walk blocks and assign them actual numbers
- *
- * the function goes through all passed space and put actual disk
- * block numbers into buffer heads, dropping BH_Delay and BH_Unwritten
- */
- static void mpage_put_bnr_to_bhs(struct mpage_da_data *mpd,
- struct ext4_map_blocks *map)
- {
- struct inode *inode = mpd->inode;
- struct address_space *mapping = inode->i_mapping;
- int blocks = map->m_len;
- sector_t pblock = map->m_pblk, cur_logical;
- struct buffer_head *head, *bh;
- pgoff_t index, end;
- struct pagevec pvec;
- int nr_pages, i;
-
- index = map->m_lblk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- end = (map->m_lblk + blocks - 1) >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- cur_logical = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-
- pagevec_init(&pvec, 0);
-
- while (index <= end) {
- /* XXX: optimize tail */
- nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE);
- if (nr_pages == 0)
- break;
- for (i = 0; i < nr_pages; i++) {
- struct page *page = pvec.pages[i];
-
- index = page->index;
- if (index > end)
- break;
- index++;
-
- BUG_ON(!PageLocked(page));
- BUG_ON(PageWriteback(page));
- BUG_ON(!page_has_buffers(page));
-
- bh = page_buffers(page);
- head = bh;
-
- /* skip blocks out of the range */
- do {
- if (cur_logical >= map->m_lblk)
- break;
- cur_logical++;
- } while ((bh = bh->b_this_page) != head);
+ if (!page_has_buffers(page)) {
- if (block_prepare_write(page, 0, len,
++ if (__block_write_begin(page, 0, len,
+ noalloc_get_block_write)) {
+ redirty_page:
+ redirty_page_for_writepage(mpd->wbc,
+ page);
+ unlock_page(page);
+ continue;
+ }
+ commit_write = 1;
+ }
+ bh = page_bufs = page_buffers(page);
+ block_start = 0;
do {
- if (cur_logical >= map->m_lblk + blocks)
- break;
-
- if (buffer_delay(bh) || buffer_unwritten(bh)) {
-
- BUG_ON(bh->b_bdev != inode->i_sb->s_bdev);
-
+ if (!bh)
+ goto redirty_page;
+ if (map && (cur_logical >= map->m_lblk) &&
+ (cur_logical <= (map->m_lblk +
+ (map->m_len - 1)))) {
if (buffer_delay(bh)) {
clear_buffer_delay(bh);
bh->b_blocknr = pblock;
- } else {
- /*
- * unwritten already should have
- * blocknr assigned. Verify that
- */
- clear_buffer_unwritten(bh);
- BUG_ON(bh->b_blocknr != pblock);
}
+ if (buffer_unwritten(bh) ||
+ buffer_mapped(bh))
+ BUG_ON(bh->b_blocknr != pblock);
+ if (map->m_flags & EXT4_MAP_UNINIT)
+ set_buffer_uninit(bh);
+ clear_buffer_unwritten(bh);
+ }
- } else if (buffer_mapped(bh))
- BUG_ON(bh->b_blocknr != pblock);
-
- if (map->m_flags & EXT4_MAP_UNINIT)
- set_buffer_uninit(bh);
+ /* redirty page if block allocation undone */
+ if (buffer_delay(bh) || buffer_unwritten(bh))
+ redirty_page = 1;
+ bh = bh->b_this_page;
+ block_start += bh->b_size;
cur_logical++;
pblock++;
- } while ((bh = bh->b_this_page) != head);
+ } while (bh != page_bufs);
+
+ if (redirty_page)
+ goto redirty_page;
+
+ if (commit_write)
+ /* mark the buffer_heads as dirty & uptodate */
+ block_commit_write(page, 0, len);
+
+ /*
+ * Delalloc doesn't support data journalling,
+ * but eventually maybe we'll lift this
+ * restriction.
+ */
+ if (unlikely(journal_data && PageChecked(page)))
+ err = __ext4_journalled_writepage(page, len);
+ else
+ err = ext4_bio_write_page(&io_submit, page,
+ len, mpd->wbc);
+
+ if (!err)
+ mpd->pages_written++;
+ /*
+ * In error case, we have to continue because
+ * remaining pages are still locked
+ */
+ if (ret == 0)
+ ret = err;
}
pagevec_release(&pvec);
}
+ ext4_io_submit(&io_submit);
+ return ret;
}
-
static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd,
sector_t logical, long blk_cnt)
{
}
/*
- * mpage_da_map_blocks - go through given space
+ * mpage_da_map_and_submit - go through given space, map them
+ * if necessary, and then submit them for I/O
*
* @mpd - bh describing space
*
* The function skips space we know is already mapped to disk blocks.
*
*/
- static int mpage_da_map_blocks(struct mpage_da_data *mpd)
+ static void mpage_da_map_and_submit(struct mpage_da_data *mpd)
{
int err, blks, get_blocks_flags;
- struct ext4_map_blocks map;
+ struct ext4_map_blocks map, *mapp = NULL;
sector_t next = mpd->b_blocknr;
unsigned max_blocks = mpd->b_size >> mpd->inode->i_blkbits;
loff_t disksize = EXT4_I(mpd->inode)->i_disksize;
handle_t *handle = NULL;
/*
- * We consider only non-mapped and non-allocated blocks
- */
- if ((mpd->b_state & (1 << BH_Mapped)) &&
- !(mpd->b_state & (1 << BH_Delay)) &&
- !(mpd->b_state & (1 << BH_Unwritten)))
- return 0;
-
- /*
- * If we didn't accumulate anything to write simply return
+ * If the blocks are mapped already, or we couldn't accumulate
+ * any blocks, then proceed immediately to the submission stage.
*/
- if (!mpd->b_size)
- return 0;
+ if ((mpd->b_size == 0) ||
+ ((mpd->b_state & (1 << BH_Mapped)) &&
+ !(mpd->b_state & (1 << BH_Delay)) &&
+ !(mpd->b_state & (1 << BH_Unwritten))))
+ goto submit_io;
handle = ext4_journal_current_handle();
BUG_ON(!handle);
err = blks;
/*
- * If get block returns with error we simply
- * return. Later writepage will redirty the page and
- * writepages will find the dirty page again
+ * If get block returns EAGAIN or ENOSPC and there
+ * appears to be free blocks we will call
+ * ext4_writepage() for all of the pages which will
+ * just redirty the pages.
*/
if (err == -EAGAIN)
- return 0;
+ goto submit_io;
if (err == -ENOSPC &&
ext4_count_free_blocks(sb)) {
mpd->retval = err;
- return 0;
+ goto submit_io;
}
/*
/* invalidate all the pages */
ext4_da_block_invalidatepages(mpd, next,
mpd->b_size >> mpd->inode->i_blkbits);
- return err;
+ return;
}
BUG_ON(blks == 0);
+ mapp = ↦
if (map.m_flags & EXT4_MAP_NEW) {
struct block_device *bdev = mpd->inode->i_sb->s_bdev;
int i;
unmap_underlying_metadata(bdev, map.m_pblk + i);
}
- /*
- * If blocks are delayed marked, we need to
- * put actual blocknr and drop delayed bit
- */
- if ((mpd->b_state & (1 << BH_Delay)) ||
- (mpd->b_state & (1 << BH_Unwritten)))
- mpage_put_bnr_to_bhs(mpd, &map);
-
if (ext4_should_order_data(mpd->inode)) {
err = ext4_jbd2_file_inode(handle, mpd->inode);
if (err)
- return err;
+ /* This only happens if the journal is aborted */
+ return;
}
/*
disksize = i_size_read(mpd->inode);
if (disksize > EXT4_I(mpd->inode)->i_disksize) {
ext4_update_i_disksize(mpd->inode, disksize);
- return ext4_mark_inode_dirty(handle, mpd->inode);
+ err = ext4_mark_inode_dirty(handle, mpd->inode);
+ if (err)
+ ext4_error(mpd->inode->i_sb,
+ "Failed to mark inode %lu dirty",
+ mpd->inode->i_ino);
}
- return 0;
+ submit_io:
+ mpage_da_submit_io(mpd, mapp);
+ mpd->io_done = 1;
}
#define BH_FLAGS ((1 << BH_Uptodate) | (1 << BH_Mapped) | \
* We couldn't merge the block to our extent, so we
* need to flush current extent and start new one
*/
- if (mpage_da_map_blocks(mpd) == 0)
- mpage_da_submit_io(mpd);
- mpd->io_done = 1;
+ mpage_da_map_and_submit(mpd);
return;
}
* The function finds extents of pages and scan them for all blocks.
*/
static int __mpage_da_writepage(struct page *page,
- struct writeback_control *wbc, void *data)
+ struct writeback_control *wbc,
+ struct mpage_da_data *mpd)
{
- struct mpage_da_data *mpd = data;
struct inode *inode = mpd->inode;
struct buffer_head *bh, *head;
sector_t logical;
if (mpd->next_page != page->index) {
/*
* Nope, we can't. So, we map non-allocated blocks
- * and start IO on them using writepage()
+ * and start IO on them
*/
if (mpd->next_page != mpd->first_page) {
- if (mpage_da_map_blocks(mpd) == 0)
- mpage_da_submit_io(mpd);
+ mpage_da_map_and_submit(mpd);
/*
* skip rest of the page in the page_vec
*/
- mpd->io_done = 1;
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return MPAGE_DA_EXTENT_TAIL;
if (buffer_delay(bh))
return 0; /* Not sure this could or should happen */
/*
- * XXX: __block_prepare_write() unmaps passed block,
- * is it OK?
+ * XXX: __block_write_begin() unmaps passed block, is it OK?
*/
ret = ext4_da_reserve_space(inode, iblock);
if (ret)
/*
* This function is used as a standard get_block_t calback function
* when there is no desire to allocate any blocks. It is used as a
- * callback function for block_prepare_write() and block_write_full_page().
+ * callback function for block_write_begin() and block_write_full_page().
* These functions should only try to map a single block at a time.
*
* Since this function doesn't do block allocations even if the caller
int ret = 0;
int err;
+ ClearPageChecked(page);
page_bufs = page_buffers(page);
BUG_ON(!page_bufs);
walk_page_buffers(handle, page_bufs, 0, len, NULL, bget_one);
static int ext4_writepage(struct page *page,
struct writeback_control *wbc)
{
- int ret = 0;
+ int ret = 0, commit_write = 0;
loff_t size;
unsigned int len;
struct buffer_head *page_bufs = NULL;
else
len = PAGE_CACHE_SIZE;
- if (page_has_buffers(page)) {
- page_bufs = page_buffers(page);
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- /*
- * We don't want to do block allocation
- * So redirty the page and return
- * We may reach here when we do a journal commit
- * via journal_submit_inode_data_buffers.
- * If we don't have mapping block we just ignore
- * them. We can also reach here via shrink_page_list
- */
+ /*
+ * If the page does not have buffers (for whatever reason),
- * try to create them using block_prepare_write. If this
++ * try to create them using __block_write_begin. If this
+ * fails, redirty the page and move on.
+ */
+ if (!page_buffers(page)) {
- if (block_prepare_write(page, 0, len,
++ if (__block_write_begin(page, 0, len,
+ noalloc_get_block_write)) {
+ redirty_page:
redirty_page_for_writepage(wbc, page);
unlock_page(page);
return 0;
}
- } else {
+ commit_write = 1;
+ }
+ page_bufs = page_buffers(page);
+ if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
+ ext4_bh_delay_or_unwritten)) {
/*
- * The test for page_has_buffers() is subtle:
- * We know the page is dirty but it lost buffers. That means
- * that at some moment in time after write_begin()/write_end()
- * has been called all buffers have been clean and thus they
- * must have been written at least once. So they are all
- * mapped and we can happily proceed with mapping them
- * and writing the page.
- *
- * Try to initialize the buffer_heads and check whether
- * all are mapped and non delay. We don't want to
- * do block allocation here.
+ * We don't want to do block allocation So redirty the
+ * page and return We may reach here when we do a
+ * journal commit via
+ * journal_submit_inode_data_buffers. If we don't
+ * have mapping block we just ignore them. We can also
+ * reach here via shrink_page_list
*/
- ret = __block_write_begin(page, 0, len,
- noalloc_get_block_write);
- if (!ret) {
- page_bufs = page_buffers(page);
- /* check whether all are mapped and non delay */
- if (walk_page_buffers(NULL, page_bufs, 0, len, NULL,
- ext4_bh_delay_or_unwritten)) {
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
- } else {
- /*
- * We can't do block allocation here
- * so just redity the page and unlock
- * and return
- */
- redirty_page_for_writepage(wbc, page);
- unlock_page(page);
- return 0;
- }
+ goto redirty_page;
+ }
+ if (commit_write)
/* now mark the buffer_heads as dirty and uptodate */
block_commit_write(page, 0, len);
- }
- if (PageChecked(page) && ext4_should_journal_data(inode)) {
+ if (PageChecked(page) && ext4_should_journal_data(inode))
/*
* It's mmapped pagecache. Add buffers and journal it. There
* doesn't seem much point in redirtying the page here.
*/
- ClearPageChecked(page);
return __ext4_journalled_writepage(page, len);
- }
- if (page_bufs && buffer_uninit(page_bufs)) {
+ if (buffer_uninit(page_bufs)) {
ext4_set_bh_endio(page_bufs, inode);
ret = block_write_full_page_endio(page, noalloc_get_block_write,
wbc, ext4_end_io_buffer_write);
*/
static int write_cache_pages_da(struct address_space *mapping,
struct writeback_control *wbc,
- struct mpage_da_data *mpd)
+ struct mpage_da_data *mpd,
+ pgoff_t *done_index)
{
int ret = 0;
int done = 0;
struct pagevec pvec;
- int nr_pages;
+ unsigned nr_pages;
pgoff_t index;
pgoff_t end; /* Inclusive */
long nr_to_write = wbc->nr_to_write;
+ int tag;
pagevec_init(&pvec, 0);
index = wbc->range_start >> PAGE_CACHE_SHIFT;
end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag = PAGECACHE_TAG_TOWRITE;
+ else
+ tag = PAGECACHE_TAG_DIRTY;
+
+ *done_index = index;
while (!done && (index <= end)) {
int i;
- nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
- PAGECACHE_TAG_DIRTY,
+ nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
if (nr_pages == 0)
break;
break;
}
+ *done_index = page->index + 1;
+
lock_page(page);
/*
long desired_nr_to_write, nr_to_writebump = 0;
loff_t range_start = wbc->range_start;
struct ext4_sb_info *sbi = EXT4_SB(mapping->host->i_sb);
+ pgoff_t done_index = 0;
+ pgoff_t end;
trace_ext4_da_writepages(inode, wbc);
wbc->range_start = index << PAGE_CACHE_SHIFT;
wbc->range_end = LLONG_MAX;
wbc->range_cyclic = 0;
- } else
+ end = -1;
+ } else {
index = wbc->range_start >> PAGE_CACHE_SHIFT;
+ end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ }
/*
* This works around two forms of stupidity. The first is in
* sbi->max_writeback_mb_bump whichever is smaller.
*/
max_pages = sbi->s_max_writeback_mb_bump << (20 - PAGE_CACHE_SHIFT);
- if (!range_cyclic && range_whole)
- desired_nr_to_write = wbc->nr_to_write * 8;
- else
+ if (!range_cyclic && range_whole) {
+ if (wbc->nr_to_write == LONG_MAX)
+ desired_nr_to_write = wbc->nr_to_write;
+ else
+ desired_nr_to_write = wbc->nr_to_write * 8;
+ } else
desired_nr_to_write = ext4_num_dirty_pages(inode, index,
max_pages);
if (desired_nr_to_write > max_pages)
pages_skipped = wbc->pages_skipped;
retry:
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ tag_pages_for_writeback(mapping, index, end);
+
while (!ret && wbc->nr_to_write > 0) {
/*
mpd.io_done = 0;
mpd.pages_written = 0;
mpd.retval = 0;
- ret = write_cache_pages_da(mapping, wbc, &mpd);
+ ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index);
/*
* If we have a contiguous extent of pages and we
* haven't done the I/O yet, map the blocks and submit
* them for I/O.
*/
if (!mpd.io_done && mpd.next_page != mpd.first_page) {
- if (mpage_da_map_blocks(&mpd) == 0)
- mpage_da_submit_io(&mpd);
- mpd.io_done = 1;
+ mpage_da_map_and_submit(&mpd);
ret = MPAGE_DA_EXTENT_TAIL;
}
trace_ext4_da_write_pages(inode, &mpd);
__func__, wbc->nr_to_write, ret);
/* Update index */
- index += pages_written;
wbc->range_cyclic = range_cyclic;
if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
/*
* set the writeback_index so that range_cyclic
* mode will write it back later
*/
- mapping->writeback_index = index;
+ mapping->writeback_index = done_index;
out_writepages:
wbc->nr_to_write -= nr_to_writebump;
return mpage_readpages(mapping, pages, nr_pages, ext4_get_block);
}
- static void ext4_free_io_end(ext4_io_end_t *io)
- {
- BUG_ON(!io);
- if (io->page)
- put_page(io->page);
- iput(io->inode);
- kfree(io);
- }
-
static void ext4_invalidatepage_free_endio(struct page *page, unsigned long offset)
{
struct buffer_head *head, *bh;
EXT4_GET_BLOCKS_IO_CREATE_EXT);
}
- static void dump_completed_IO(struct inode * inode)
- {
- #ifdef EXT4_DEBUG
- struct list_head *cur, *before, *after;
- ext4_io_end_t *io, *io0, *io1;
- unsigned long flags;
-
- if (list_empty(&EXT4_I(inode)->i_completed_io_list)){
- ext4_debug("inode %lu completed_io list is empty\n", inode->i_ino);
- return;
- }
-
- ext4_debug("Dump inode %lu completed_io list \n", inode->i_ino);
- spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags);
- list_for_each_entry(io, &EXT4_I(inode)->i_completed_io_list, list){
- cur = &io->list;
- before = cur->prev;
- io0 = container_of(before, ext4_io_end_t, list);
- after = cur->next;
- io1 = container_of(after, ext4_io_end_t, list);
-
- ext4_debug("io 0x%p from inode %lu,prev 0x%p,next 0x%p\n",
- io, inode->i_ino, io0, io1);
- }
- spin_unlock_irqrestore(&EXT4_I(inode)->i_completed_io_lock, flags);
- #endif
- }
-
- /*
- * check a range of space and convert unwritten extents to written.
- */
- static int ext4_end_io_nolock(ext4_io_end_t *io)
- {
- struct inode *inode = io->inode;
- loff_t offset = io->offset;
- ssize_t size = io->size;
- int ret = 0;
-
- ext4_debug("ext4_end_io_nolock: io 0x%p from inode %lu,list->next 0x%p,"
- "list->prev 0x%p\n",
- io, inode->i_ino, io->list.next, io->list.prev);
-
- if (list_empty(&io->list))
- return ret;
-
- if (io->flag != EXT4_IO_UNWRITTEN)
- return ret;
-
- ret = ext4_convert_unwritten_extents(inode, offset, size);
- if (ret < 0) {
- printk(KERN_EMERG "%s: failed to convert unwritten"
- "extents to written extents, error is %d"
- " io is still on inode %lu aio dio list\n",
- __func__, ret, inode->i_ino);
- return ret;
- }
-
- if (io->iocb)
- aio_complete(io->iocb, io->result, 0);
- /* clear the DIO AIO unwritten flag */
- io->flag = 0;
- return ret;
- }
-
- /*
- * work on completed aio dio IO, to convert unwritten extents to extents
- */
- static void ext4_end_io_work(struct work_struct *work)
- {
- ext4_io_end_t *io = container_of(work, ext4_io_end_t, work);
- struct inode *inode = io->inode;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret;
-
- mutex_lock(&inode->i_mutex);
- ret = ext4_end_io_nolock(io);
- if (ret < 0) {
- mutex_unlock(&inode->i_mutex);
- return;
- }
-
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (!list_empty(&io->list))
- list_del_init(&io->list);
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- mutex_unlock(&inode->i_mutex);
- ext4_free_io_end(io);
- }
-
- /*
- * This function is called from ext4_sync_file().
- *
- * When IO is completed, the work to convert unwritten extents to
- * written is queued on workqueue but may not get immediately
- * scheduled. When fsync is called, we need to ensure the
- * conversion is complete before fsync returns.
- * The inode keeps track of a list of pending/completed IO that
- * might needs to do the conversion. This function walks through
- * the list and convert the related unwritten extents for completed IO
- * to written.
- * The function return the number of pending IOs on success.
- */
- int flush_completed_IO(struct inode *inode)
- {
- ext4_io_end_t *io;
- struct ext4_inode_info *ei = EXT4_I(inode);
- unsigned long flags;
- int ret = 0;
- int ret2 = 0;
-
- if (list_empty(&ei->i_completed_io_list))
- return ret;
-
- dump_completed_IO(inode);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- while (!list_empty(&ei->i_completed_io_list)){
- io = list_entry(ei->i_completed_io_list.next,
- ext4_io_end_t, list);
- /*
- * Calling ext4_end_io_nolock() to convert completed
- * IO to written.
- *
- * When ext4_sync_file() is called, run_queue() may already
- * about to flush the work corresponding to this io structure.
- * It will be upset if it founds the io structure related
- * to the work-to-be schedule is freed.
- *
- * Thus we need to keep the io structure still valid here after
- * convertion finished. The io structure has a flag to
- * avoid double converting from both fsync and background work
- * queue work.
- */
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- ret = ext4_end_io_nolock(io);
- spin_lock_irqsave(&ei->i_completed_io_lock, flags);
- if (ret < 0)
- ret2 = ret;
- else
- list_del_init(&io->list);
- }
- spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
- return (ret2 < 0) ? ret2 : 0;
- }
-
- static ext4_io_end_t *ext4_init_io_end (struct inode *inode, gfp_t flags)
- {
- ext4_io_end_t *io = NULL;
-
- io = kmalloc(sizeof(*io), flags);
-
- if (io) {
- igrab(inode);
- io->inode = inode;
- io->flag = 0;
- io->offset = 0;
- io->size = 0;
- io->page = NULL;
- io->iocb = NULL;
- io->result = 0;
- INIT_WORK(&io->work, ext4_end_io_work);
- INIT_LIST_HEAD(&io->list);
- }
-
- return io;
- }
-
static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private, int ret,
bool is_async)
size);
/* if not aio dio with unwritten extents, just free io and return */
- if (io_end->flag != EXT4_IO_UNWRITTEN){
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
ext4_free_io_end(io_end);
iocb->private = NULL;
out:
}
wq = EXT4_SB(io_end->inode->i_sb)->dio_unwritten_wq;
- /* queue the work to convert unwritten extents to written */
- queue_work(wq, &io_end->work);
-
/* Add the io_end to per-inode completed aio dio list*/
ei = EXT4_I(io_end->inode);
spin_lock_irqsave(&ei->i_completed_io_lock, flags);
list_add_tail(&io_end->list, &ei->i_completed_io_list);
spin_unlock_irqrestore(&ei->i_completed_io_lock, flags);
+
+ /* queue the work to convert unwritten extents to written */
+ queue_work(wq, &io_end->work);
iocb->private = NULL;
}
goto out;
}
- io_end->flag = EXT4_IO_UNWRITTEN;
+ io_end->flag = EXT4_IO_END_UNWRITTEN;
inode = io_end->inode;
/* Add the io_end to per-inode completed io list*/
{
struct inode *inode = dentry->d_inode;
int error, rc = 0;
+ int orphan = 0;
const unsigned int ia_valid = attr->ia_valid;
error = inode_change_ok(inode, attr);
error = PTR_ERR(handle);
goto err_out;
}
-
- error = ext4_orphan_add(handle, inode);
+ if (ext4_handle_valid(handle)) {
+ error = ext4_orphan_add(handle, inode);
+ orphan = 1;
+ }
EXT4_I(inode)->i_disksize = attr->ia_size;
rc = ext4_mark_inode_dirty(handle, inode);
if (!error)
goto err_out;
}
ext4_orphan_del(handle, inode);
+ orphan = 0;
ext4_journal_stop(handle);
goto err_out;
}
* If the call to ext4_truncate failed to get a transaction handle at
* all, we need to clean up the in-core orphan list manually.
*/
- if (inode->i_nlink)
+ if (orphan && inode->i_nlink)
ext4_orphan_del(NULL, inode);
if (!rc && (ia_valid & ATTR_MODE))
*
* Also account for superblock, inode, quota and xattr blocks
*/
- int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
+ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk)
{
ext4_group_t groups, ngroups = ext4_get_groups_count(inode->i_sb);
int gdpblocks;
static struct kmem_cache *ext4_pspace_cachep;
static struct kmem_cache *ext4_ac_cachep;
static struct kmem_cache *ext4_free_ext_cachep;
+
+ /* We create slab caches for groupinfo data structures based on the
+ * superblock block size. There will be one per mounted filesystem for
+ * each unique s_blocksize_bits */
+ #define NR_GRPINFO_CACHES \
+ (EXT4_MAX_BLOCK_LOG_SIZE - EXT4_MIN_BLOCK_LOG_SIZE + 1)
+ static struct kmem_cache *ext4_groupinfo_caches[NR_GRPINFO_CACHES];
+
static void ext4_mb_generate_from_pa(struct super_block *sb, void *bitmap,
ext4_group_t group);
static void ext4_mb_generate_from_freelist(struct super_block *sb, void *bitmap,
return err;
}
+ /*
+ * lock the group_info alloc_sem of all the groups
+ * belonging to the same buddy cache page. This
+ * make sure other parallel operation on the buddy
+ * cache doesn't happen whild holding the buddy cache
+ * lock
+ */
+ static int ext4_mb_get_buddy_cache_lock(struct super_block *sb,
+ ext4_group_t group)
+ {
+ int i;
+ int block, pnum;
+ int blocks_per_page;
+ int groups_per_page;
+ ext4_group_t ngroups = ext4_get_groups_count(sb);
+ ext4_group_t first_group;
+ struct ext4_group_info *grp;
+
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+
+ groups_per_page = blocks_per_page >> 1;
+ if (groups_per_page == 0)
+ groups_per_page = 1;
+ /* read all groups the page covers into the cache */
+ for (i = 0; i < groups_per_page; i++) {
+
+ if ((first_group + i) >= ngroups)
+ break;
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ down_write_nested(&grp->alloc_sem, i);
+ }
+ return i;
+ }
+
+ static void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
+ ext4_group_t group, int locked_group)
+ {
+ int i;
+ int block, pnum;
+ int blocks_per_page;
+ ext4_group_t first_group;
+ struct ext4_group_info *grp;
+
+ blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ /*
+ * the buddy cache inode stores the block bitmap
+ * and buddy information in consecutive blocks.
+ * So for each group we need two blocks.
+ */
+ block = group * 2;
+ pnum = block / blocks_per_page;
+ first_group = pnum * blocks_per_page / 2;
+ /* release locks on all the groups */
+ for (i = 0; i < locked_group; i++) {
+
+ grp = ext4_get_group_info(sb, first_group + i);
+ /* take all groups write allocation
+ * semaphore. This make sure there is
+ * no block allocation going on in any
+ * of that groups
+ */
+ up_write(&grp->alloc_sem);
+ }
+
+ }
+
/*
* Locking note: This routine calls ext4_mb_init_cache(), which takes the
* block group lock of all groups for this page; do not hold the BG lock when
return 0;
}
- /*
- * lock the group_info alloc_sem of all the groups
- * belonging to the same buddy cache page. This
- * make sure other parallel operation on the buddy
- * cache doesn't happen whild holding the buddy cache
- * lock
- */
- int ext4_mb_get_buddy_cache_lock(struct super_block *sb, ext4_group_t group)
- {
- int i;
- int block, pnum;
- int blocks_per_page;
- int groups_per_page;
- ext4_group_t ngroups = ext4_get_groups_count(sb);
- ext4_group_t first_group;
- struct ext4_group_info *grp;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- first_group = pnum * blocks_per_page / 2;
-
- groups_per_page = blocks_per_page >> 1;
- if (groups_per_page == 0)
- groups_per_page = 1;
- /* read all groups the page covers into the cache */
- for (i = 0; i < groups_per_page; i++) {
-
- if ((first_group + i) >= ngroups)
- break;
- grp = ext4_get_group_info(sb, first_group + i);
- /* take all groups write allocation
- * semaphore. This make sure there is
- * no block allocation going on in any
- * of that groups
- */
- down_write_nested(&grp->alloc_sem, i);
- }
- return i;
- }
-
- void ext4_mb_put_buddy_cache_lock(struct super_block *sb,
- ext4_group_t group, int locked_group)
- {
- int i;
- int block, pnum;
- int blocks_per_page;
- ext4_group_t first_group;
- struct ext4_group_info *grp;
-
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
- /*
- * the buddy cache inode stores the block bitmap
- * and buddy information in consecutive blocks.
- * So for each group we need two blocks.
- */
- block = group * 2;
- pnum = block / blocks_per_page;
- first_group = pnum * blocks_per_page / 2;
- /* release locks on all the groups */
- for (i = 0; i < locked_group; i++) {
-
- grp = ext4_get_group_info(sb, first_group + i);
- /* take all groups write allocation
- * semaphore. This make sure there is
- * no block allocation going on in any
- * of that groups
- */
- up_write(&grp->alloc_sem);
- }
-
- }
-
static noinline_for_stack int
ext4_mb_regular_allocator(struct ext4_allocation_context *ac)
{
.release = seq_release,
};
+ static struct kmem_cache *get_groupinfo_cache(int blocksize_bits)
+ {
+ int cache_index = blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+ struct kmem_cache *cachep = ext4_groupinfo_caches[cache_index];
+
+ BUG_ON(!cachep);
+ return cachep;
+ }
/* Create and initialize ext4_group_info data for the given group. */
int ext4_mb_add_groupinfo(struct super_block *sb, ext4_group_t group,
struct ext4_group_desc *desc)
{
- int i, len;
+ int i;
int metalen = 0;
struct ext4_sb_info *sbi = EXT4_SB(sb);
struct ext4_group_info **meta_group_info;
+ struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
/*
* First check if this group is the first of a reserved block.
meta_group_info;
}
- /*
- * calculate needed size. if change bb_counters size,
- * don't forget about ext4_mb_generate_buddy()
- */
- len = offsetof(typeof(**meta_group_info),
- bb_counters[sb->s_blocksize_bits + 2]);
-
meta_group_info =
sbi->s_group_info[group >> EXT4_DESC_PER_BLOCK_BITS(sb)];
i = group & (EXT4_DESC_PER_BLOCK(sb) - 1);
- meta_group_info[i] = kzalloc(len, GFP_KERNEL);
+ meta_group_info[i] = kmem_cache_alloc(cachep, GFP_KERNEL);
if (meta_group_info[i] == NULL) {
printk(KERN_ERR "EXT4-fs: can't allocate buddy mem\n");
goto exit_group_info;
}
+ memset(meta_group_info[i], 0, kmem_cache_size(cachep));
set_bit(EXT4_GROUP_INFO_NEED_INIT_BIT,
&(meta_group_info[i]->bb_state));
int num_meta_group_infos_max;
int array_size;
struct ext4_group_desc *desc;
+ struct kmem_cache *cachep;
/* This is the number of blocks used by GDT */
num_meta_group_infos = (ngroups + EXT4_DESC_PER_BLOCK(sb) -
printk(KERN_ERR "EXT4-fs: can't get new inode\n");
goto err_freesgi;
}
+ sbi->s_buddy_cache->i_ino = get_next_ino();
EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
for (i = 0; i < ngroups; i++) {
desc = ext4_get_group_desc(sb, i, NULL);
return 0;
err_freebuddy:
+ cachep = get_groupinfo_cache(sb->s_blocksize_bits);
while (i-- > 0)
- kfree(ext4_get_group_info(sb, i));
+ kmem_cache_free(cachep, ext4_get_group_info(sb, i));
i = num_meta_group_infos;
while (i-- > 0)
kfree(sbi->s_group_info[i]);
unsigned offset;
unsigned max;
int ret;
+ int cache_index;
+ struct kmem_cache *cachep;
+ char *namep = NULL;
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_offsets);
sbi->s_mb_offsets = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_offsets == NULL) {
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
i = (sb->s_blocksize_bits + 2) * sizeof(*sbi->s_mb_maxs);
sbi->s_mb_maxs = kmalloc(i, GFP_KERNEL);
if (sbi->s_mb_maxs == NULL) {
- kfree(sbi->s_mb_offsets);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ cache_index = sb->s_blocksize_bits - EXT4_MIN_BLOCK_LOG_SIZE;
+ cachep = ext4_groupinfo_caches[cache_index];
+ if (!cachep) {
+ char name[32];
+ int len = offsetof(struct ext4_group_info,
+ bb_counters[sb->s_blocksize_bits + 2]);
+
+ sprintf(name, "ext4_groupinfo_%d", sb->s_blocksize_bits);
+ namep = kstrdup(name, GFP_KERNEL);
+ if (!namep) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ /* Need to free the kmem_cache_name() when we
+ * destroy the slab */
+ cachep = kmem_cache_create(namep, len, 0,
+ SLAB_RECLAIM_ACCOUNT, NULL);
+ if (!cachep) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ ext4_groupinfo_caches[cache_index] = cachep;
}
/* order 0 is regular bitmap */
/* init file for buddy data */
ret = ext4_mb_init_backend(sb);
if (ret != 0) {
- kfree(sbi->s_mb_offsets);
- kfree(sbi->s_mb_maxs);
- return ret;
+ goto out;
}
spin_lock_init(&sbi->s_md_lock);
sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
if (sbi->s_locality_groups == NULL) {
- kfree(sbi->s_mb_offsets);
- kfree(sbi->s_mb_maxs);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto out;
}
for_each_possible_cpu(i) {
struct ext4_locality_group *lg;
if (sbi->s_journal)
sbi->s_journal->j_commit_callback = release_blocks_on_commit;
- return 0;
+ out:
+ if (ret) {
+ kfree(sbi->s_mb_offsets);
+ kfree(sbi->s_mb_maxs);
+ kfree(namep);
+ }
+ return ret;
}
/* need to called with the ext4 group lock held */
int num_meta_group_infos;
struct ext4_group_info *grinfo;
struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct kmem_cache *cachep = get_groupinfo_cache(sb->s_blocksize_bits);
if (sbi->s_group_info) {
for (i = 0; i < ngroups; i++) {
ext4_lock_group(sb, i);
ext4_mb_cleanup_pa(grinfo);
ext4_unlock_group(sb, i);
- kfree(grinfo);
+ kmem_cache_free(cachep, grinfo);
}
num_meta_group_infos = (ngroups +
EXT4_DESC_PER_BLOCK(sb) - 1) >>
return 0;
}
- static inline void ext4_issue_discard(struct super_block *sb,
+ static inline int ext4_issue_discard(struct super_block *sb,
ext4_group_t block_group, ext4_grpblk_t block, int count)
{
int ret;
discard_block = block + ext4_group_first_block_no(sb, block_group);
trace_ext4_discard_blocks(sb,
(unsigned long long) discard_block, count);
- ret = sb_issue_discard(sb, discard_block, count);
+ ret = sb_issue_discard(sb, discard_block, count, GFP_NOFS, 0);
- if (ret == EOPNOTSUPP) {
+ if (ret == -EOPNOTSUPP) {
ext4_warning(sb, "discard not supported, disabling");
clear_opt(EXT4_SB(sb)->s_mount_opt, DISCARD);
}
+ return ret;
}
/*
#endif
- int __init init_ext4_mballoc(void)
+ int __init ext4_init_mballoc(void)
{
- ext4_pspace_cachep =
- kmem_cache_create("ext4_prealloc_space",
- sizeof(struct ext4_prealloc_space),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_pspace_cachep = KMEM_CACHE(ext4_prealloc_space,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_pspace_cachep == NULL)
return -ENOMEM;
- ext4_ac_cachep =
- kmem_cache_create("ext4_alloc_context",
- sizeof(struct ext4_allocation_context),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_ac_cachep = KMEM_CACHE(ext4_allocation_context,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_ac_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
return -ENOMEM;
}
- ext4_free_ext_cachep =
- kmem_cache_create("ext4_free_block_extents",
- sizeof(struct ext4_free_data),
- 0, SLAB_RECLAIM_ACCOUNT, NULL);
+ ext4_free_ext_cachep = KMEM_CACHE(ext4_free_data,
+ SLAB_RECLAIM_ACCOUNT);
if (ext4_free_ext_cachep == NULL) {
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
return 0;
}
- void exit_ext4_mballoc(void)
+ void ext4_exit_mballoc(void)
{
+ int i;
/*
* Wait for completion of call_rcu()'s on ext4_pspace_cachep
* before destroying the slab cache.
kmem_cache_destroy(ext4_pspace_cachep);
kmem_cache_destroy(ext4_ac_cachep);
kmem_cache_destroy(ext4_free_ext_cachep);
+
+ for (i = 0; i < NR_GRPINFO_CACHES; i++) {
+ struct kmem_cache *cachep = ext4_groupinfo_caches[i];
+ if (cachep) {
+ char *name = (char *)kmem_cache_name(cachep);
+ kmem_cache_destroy(cachep);
+ kfree(name);
+ }
+ }
ext4_remove_debugfs_entry();
}
*/
static noinline_for_stack int
ext4_mb_release_inode_pa(struct ext4_buddy *e4b, struct buffer_head *bitmap_bh,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
+ struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
struct ext4_sb_info *sbi = EXT4_SB(sb);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
end = bit + pa->pa_len;
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = pa->pa_inode;
- }
-
while (bit < end) {
bit = mb_find_next_zero_bit(bitmap_bh->b_data, end, bit);
if (bit >= end)
(unsigned) next - bit, (unsigned) group);
free += next - bit;
- if (ac) {
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = next - bit;
- ac->ac_b_ex.fe_logical = 0;
- trace_ext4_mballoc_discard(ac);
- }
-
- trace_ext4_mb_release_inode_pa(sb, ac, pa, grp_blk_start + bit,
- next - bit);
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, next - bit);
+ trace_ext4_mb_release_inode_pa(sb, pa->pa_inode, pa,
+ grp_blk_start + bit, next - bit);
mb_free_blocks(pa->pa_inode, e4b, bit, next - bit);
bit = next + 1;
}
static noinline_for_stack int
ext4_mb_release_group_pa(struct ext4_buddy *e4b,
- struct ext4_prealloc_space *pa,
- struct ext4_allocation_context *ac)
+ struct ext4_prealloc_space *pa)
{
struct super_block *sb = e4b->bd_sb;
ext4_group_t group;
ext4_grpblk_t bit;
- trace_ext4_mb_release_group_pa(sb, ac, pa);
+ trace_ext4_mb_release_group_pa(sb, pa);
BUG_ON(pa->pa_deleted == 0);
ext4_get_group_no_and_offset(sb, pa->pa_pstart, &group, &bit);
BUG_ON(group != e4b->bd_group && pa->pa_len != 0);
mb_free_blocks(pa->pa_inode, e4b, bit, pa->pa_len);
atomic_add(pa->pa_len, &EXT4_SB(sb)->s_mb_discarded);
-
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = NULL;
- ac->ac_b_ex.fe_group = group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = pa->pa_len;
- ac->ac_b_ex.fe_logical = 0;
- trace_ext4_mballoc_discard(ac);
- }
+ trace_ext4_mballoc_discard(sb, NULL, group, bit, pa->pa_len);
return 0;
}
struct ext4_group_info *grp = ext4_get_group_info(sb, group);
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
struct list_head list;
struct ext4_buddy e4b;
int err;
needed = EXT4_BLOCKS_PER_GROUP(sb) + 1;
INIT_LIST_HEAD(&list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac)
- ac->ac_sb = sb;
repeat:
ext4_lock_group(sb, group);
list_for_each_entry_safe(pa, tmp,
spin_unlock(pa->pa_obj_lock);
if (pa->pa_type == MB_GROUP_PA)
- ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_mb_release_group_pa(&e4b, pa);
else
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
out:
ext4_unlock_group(sb, group);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
ext4_mb_unload_buddy(&e4b);
put_bh(bitmap_bh);
return free;
struct super_block *sb = inode->i_sb;
struct buffer_head *bitmap_bh = NULL;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
ext4_group_t group = 0;
struct list_head list;
struct ext4_buddy e4b;
INIT_LIST_HEAD(&list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac) {
- ac->ac_sb = sb;
- ac->ac_inode = inode;
- }
repeat:
/* first, collect all pa's in the inode */
spin_lock(&ei->i_prealloc_lock);
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa, ac);
+ ext4_mb_release_inode_pa(&e4b, bitmap_bh, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
}
/*
struct ext4_buddy e4b;
struct list_head discard_list;
struct ext4_prealloc_space *pa, *tmp;
- struct ext4_allocation_context *ac;
mb_debug(1, "discard locality group preallocation\n");
INIT_LIST_HEAD(&discard_list);
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac)
- ac->ac_sb = sb;
spin_lock(&lg->lg_prealloc_lock);
list_for_each_entry_rcu(pa, &lg->lg_prealloc_list[order],
}
ext4_lock_group(sb, group);
list_del(&pa->pa_group_list);
- ext4_mb_release_group_pa(&e4b, pa, ac);
+ ext4_mb_release_group_pa(&e4b, pa);
ext4_unlock_group(sb, group);
ext4_mb_unload_buddy(&e4b);
list_del(&pa->u.pa_tmp_list);
call_rcu(&(pa)->u.pa_rcu, ext4_mb_pa_callback);
}
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
}
/*
{
struct buffer_head *bitmap_bh = NULL;
struct super_block *sb = inode->i_sb;
- struct ext4_allocation_context *ac = NULL;
struct ext4_group_desc *gdp;
unsigned long freed = 0;
unsigned int overflow;
if (!bh)
tbh = sb_find_get_block(inode->i_sb,
block + i);
+ if (unlikely(!tbh))
+ continue;
ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
inode, tbh, block + i);
}
if (!ext4_should_writeback_data(inode))
flags |= EXT4_FREE_BLOCKS_METADATA;
- ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
- if (ac) {
- ac->ac_inode = inode;
- ac->ac_sb = sb;
- }
-
do_more:
overflow = 0;
ext4_get_group_no_and_offset(sb, block, &block_group, &bit);
BUG_ON(!mb_test_bit(bit + i, bitmap_bh->b_data));
}
#endif
- if (ac) {
- ac->ac_b_ex.fe_group = block_group;
- ac->ac_b_ex.fe_start = bit;
- ac->ac_b_ex.fe_len = count;
- trace_ext4_mballoc_free(ac);
- }
+ trace_ext4_mballoc_free(sb, inode, block_group, bit, count);
err = ext4_mb_load_buddy(sb, block_group, &e4b);
if (err)
* with group lock held. generate_buddy look at
* them with group lock_held
*/
+ if (test_opt(sb, DISCARD))
+ ext4_issue_discard(sb, block_group, bit, count);
ext4_lock_group(sb, block_group);
mb_clear_bits(bitmap_bh->b_data, bit, count);
mb_free_blocks(inode, &e4b, bit, count);
ext4_mb_return_to_preallocation(inode, &e4b, block, count);
- if (test_opt(sb, DISCARD))
- ext4_issue_discard(sb, block_group, bit, count);
}
ret = ext4_free_blks_count(sb, gdp) + count;
dquot_free_block(inode, freed);
brelse(bitmap_bh);
ext4_std_error(sb, err);
- if (ac)
- kmem_cache_free(ext4_ac_cachep, ac);
return;
}
+
+ /**
+ * ext4_trim_extent -- function to TRIM one single free extent in the group
+ * @sb: super block for the file system
+ * @start: starting block of the free extent in the alloc. group
+ * @count: number of blocks to TRIM
+ * @group: alloc. group we are working with
+ * @e4b: ext4 buddy for the group
+ *
+ * Trim "count" blocks starting at "start" in the "group". To assure that no
+ * one will allocate those blocks, mark it as used in buddy bitmap. This must
+ * be called with under the group lock.
+ */
+ static int ext4_trim_extent(struct super_block *sb, int start, int count,
+ ext4_group_t group, struct ext4_buddy *e4b)
+ {
+ struct ext4_free_extent ex;
+ int ret = 0;
+
+ assert_spin_locked(ext4_group_lock_ptr(sb, group));
+
+ ex.fe_start = start;
+ ex.fe_group = group;
+ ex.fe_len = count;
+
+ /*
+ * Mark blocks used, so no one can reuse them while
+ * being trimmed.
+ */
+ mb_mark_used(e4b, &ex);
+ ext4_unlock_group(sb, group);
+
+ ret = ext4_issue_discard(sb, group, start, count);
+ if (ret)
+ ext4_std_error(sb, ret);
+
+ ext4_lock_group(sb, group);
+ mb_free_blocks(NULL, e4b, start, ex.fe_len);
+ return ret;
+ }
+
+ /**
+ * ext4_trim_all_free -- function to trim all free space in alloc. group
+ * @sb: super block for file system
+ * @e4b: ext4 buddy
+ * @start: first group block to examine
+ * @max: last group block to examine
+ * @minblocks: minimum extent block count
+ *
+ * ext4_trim_all_free walks through group's buddy bitmap searching for free
+ * extents. When the free block is found, ext4_trim_extent is called to TRIM
+ * the extent.
+ *
+ *
+ * ext4_trim_all_free walks through group's block bitmap searching for free
+ * extents. When the free extent is found, mark it as used in group buddy
+ * bitmap. Then issue a TRIM command on this extent and free the extent in
+ * the group buddy bitmap. This is done until whole group is scanned.
+ */
+ ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b,
+ ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks)
+ {
+ void *bitmap;
+ ext4_grpblk_t next, count = 0;
+ ext4_group_t group;
+ int ret = 0;
+
+ BUG_ON(e4b == NULL);
+
+ bitmap = e4b->bd_bitmap;
+ group = e4b->bd_group;
+ start = (e4b->bd_info->bb_first_free > start) ?
+ e4b->bd_info->bb_first_free : start;
+ ext4_lock_group(sb, group);
+
+ while (start < max) {
+ start = mb_find_next_zero_bit(bitmap, max, start);
+ if (start >= max)
+ break;
+ next = mb_find_next_bit(bitmap, max, start);
+
+ if ((next - start) >= minblocks) {
+ ret = ext4_trim_extent(sb, start,
+ next - start, group, e4b);
+ if (ret < 0)
+ break;
+ count += next - start;
+ }
+ start = next + 1;
+
+ if (fatal_signal_pending(current)) {
+ count = -ERESTARTSYS;
+ break;
+ }
+
+ if (need_resched()) {
+ ext4_unlock_group(sb, group);
+ cond_resched();
+ ext4_lock_group(sb, group);
+ }
+
+ if ((e4b->bd_info->bb_free - count) < minblocks)
+ break;
+ }
+ ext4_unlock_group(sb, group);
+
+ ext4_debug("trimmed %d blocks in the group %d\n",
+ count, group);
+
+ if (ret < 0)
+ count = ret;
+
+ return count;
+ }
+
+ /**
+ * ext4_trim_fs() -- trim ioctl handle function
+ * @sb: superblock for filesystem
+ * @range: fstrim_range structure
+ *
+ * start: First Byte to trim
+ * len: number of Bytes to trim from start
+ * minlen: minimum extent length in Bytes
+ * ext4_trim_fs goes through all allocation groups containing Bytes from
+ * start to start+len. For each such a group ext4_trim_all_free function
+ * is invoked to trim all free space.
+ */
+ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
+ {
+ struct ext4_buddy e4b;
+ ext4_group_t first_group, last_group;
+ ext4_group_t group, ngroups = ext4_get_groups_count(sb);
+ ext4_grpblk_t cnt = 0, first_block, last_block;
+ uint64_t start, len, minlen, trimmed;
+ int ret = 0;
+
+ start = range->start >> sb->s_blocksize_bits;
+ len = range->len >> sb->s_blocksize_bits;
+ minlen = range->minlen >> sb->s_blocksize_bits;
+ trimmed = 0;
+
+ if (unlikely(minlen > EXT4_BLOCKS_PER_GROUP(sb)))
+ return -EINVAL;
+
+ /* Determine first and last group to examine based on start and len */
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) start,
+ &first_group, &first_block);
+ ext4_get_group_no_and_offset(sb, (ext4_fsblk_t) (start + len),
+ &last_group, &last_block);
+ last_group = (last_group > ngroups - 1) ? ngroups - 1 : last_group;
+ last_block = EXT4_BLOCKS_PER_GROUP(sb);
+
+ if (first_group > last_group)
+ return -EINVAL;
+
+ for (group = first_group; group <= last_group; group++) {
+ ret = ext4_mb_load_buddy(sb, group, &e4b);
+ if (ret) {
+ ext4_error(sb, "Error in loading buddy "
+ "information for %u", group);
+ break;
+ }
+
+ if (len >= EXT4_BLOCKS_PER_GROUP(sb))
+ len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block);
+ else
+ last_block = len;
+
+ if (e4b.bd_info->bb_free >= minlen) {
+ cnt = ext4_trim_all_free(sb, &e4b, first_block,
+ last_block, minlen);
+ if (cnt < 0) {
+ ret = cnt;
+ ext4_mb_unload_buddy(&e4b);
+ break;
+ }
+ }
+ ext4_mb_unload_buddy(&e4b);
+ trimmed += cnt;
+ first_block = 0;
+ }
+ range->len = trimmed * sb->s_blocksize;
+
+ return ret;
+ }
struct buffer_head *bh_use[NAMEI_RA_SIZE];
struct buffer_head *bh, *ret = NULL;
ext4_lblk_t start, block, b;
+ const u8 *name = d_name->name;
int ra_max = 0; /* Number of bh's in the readahead
buffer, bh_use[] */
int ra_ptr = 0; /* Current index into readahead
namelen = d_name->len;
if (namelen > EXT4_NAME_LEN)
return NULL;
+ if ((namelen <= 2) && (name[0] == '.') &&
+ (name[1] == '.' || name[1] == '0')) {
+ /*
+ * "." or ".." will only be in the first block
+ * NFS may look up ".."; "." should be handled by the VFS
+ */
+ block = start = 0;
+ nblocks = 1;
+ goto restart;
+ }
if (is_dx(dir)) {
bh = ext4_dx_find_entry(dir, d_name, res_dir, &err);
/*
static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
struct ext4_dir_entry_2 **res_dir, int *err)
{
- struct super_block * sb;
+ struct super_block * sb = dir->i_sb;
struct dx_hash_info hinfo;
- u32 hash;
struct dx_frame frames[2], *frame;
- struct ext4_dir_entry_2 *de, *top;
struct buffer_head *bh;
ext4_lblk_t block;
int retval;
- int namelen = d_name->len;
- const u8 *name = d_name->name;
- sb = dir->i_sb;
- /* NFS may look up ".." - look at dx_root directory block */
- if (namelen > 2 || name[0] != '.'||(name[1] != '.' && name[1] != '\0')){
- if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
- return NULL;
- } else {
- frame = frames;
- frame->bh = NULL; /* for dx_release() */
- frame->at = (struct dx_entry *)frames; /* hack for zero entry*/
- dx_set_block(frame->at, 0); /* dx_root block is 0 */
- }
- hash = hinfo.hash;
+ if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err)))
+ return NULL;
do {
block = dx_get_block(frame->at);
- if (!(bh = ext4_bread (NULL,dir, block, 0, err)))
+ if (!(bh = ext4_bread(NULL, dir, block, 0, err)))
goto errout;
- de = (struct ext4_dir_entry_2 *) bh->b_data;
- top = (struct ext4_dir_entry_2 *) ((char *) de + sb->s_blocksize -
- EXT4_DIR_REC_LEN(0));
- for (; de < top; de = ext4_next_entry(de, sb->s_blocksize)) {
- int off = (block << EXT4_BLOCK_SIZE_BITS(sb))
- + ((char *) de - bh->b_data);
-
- if (!ext4_check_dir_entry(dir, de, bh, off)) {
- brelse(bh);
- *err = ERR_BAD_DX_DIR;
- goto errout;
- }
- if (ext4_match(namelen, name, de)) {
- *res_dir = de;
- dx_release(frames);
- return bh;
- }
+ retval = search_dirblock(bh, dir, d_name,
+ block << EXT4_BLOCK_SIZE_BITS(sb),
+ res_dir);
+ if (retval == 1) { /* Success! */
+ dx_release(frames);
+ return bh;
}
brelse(bh);
+ if (retval == -1) {
+ *err = ERR_BAD_DX_DIR;
+ goto errout;
+ }
+
/* Check to see if we should continue to search */
- retval = ext4_htree_next_block(dir, hash, frame,
+ retval = ext4_htree_next_block(dir, hinfo.hash, frame,
frames, NULL);
if (retval < 0) {
ext4_warning(sb,
inode->i_ctime = ext4_current_time(inode);
ext4_inc_count(handle, inode);
- atomic_inc(&inode->i_count);
+ ihold(inode);
err = ext4_add_entry(handle, dentry, inode);
if (!err) {
}
/* Zero out all of the reserved backup group descriptor table blocks */
- for (i = 0, bit = gdblocks + 1, block = start + bit;
- i < reserved_gdb; i++, block++, bit++) {
- struct buffer_head *gdb;
-
- ext4_debug("clear reserved block %#04llx (+%d)\n", block, bit);
-
- if ((err = extend_or_restart_transaction(handle, 1, bh)))
- goto exit_bh;
+ ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ block, sbi->s_itb_per_group);
+ err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb,
- GFP_NOFS, BLKDEV_IFL_WAIT);
++ GFP_NOFS);
+ if (err)
+ goto exit_bh;
- if (IS_ERR(gdb = bclean(handle, sb, block))) {
- err = PTR_ERR(gdb);
- goto exit_bh;
- }
- ext4_handle_dirty_metadata(handle, NULL, gdb);
- ext4_set_bit(bit, bh->b_data);
- brelse(gdb);
- }
ext4_debug("mark block bitmap %#04llx (+%llu)\n", input->block_bitmap,
input->block_bitmap - start);
ext4_set_bit(input->block_bitmap - start, bh->b_data);
ext4_set_bit(input->inode_bitmap - start, bh->b_data);
/* Zero out all of the inode table blocks */
- for (i = 0, block = input->inode_table, bit = block - start;
- i < sbi->s_itb_per_group; i++, bit++, block++) {
- struct buffer_head *it;
-
- ext4_debug("clear inode block %#04llx (+%d)\n", block, bit);
-
- if ((err = extend_or_restart_transaction(handle, 1, bh)))
- goto exit_bh;
-
- if (IS_ERR(it = bclean(handle, sb, block))) {
- err = PTR_ERR(it);
- goto exit_bh;
- }
- ext4_handle_dirty_metadata(handle, NULL, it);
- brelse(it);
- ext4_set_bit(bit, bh->b_data);
- }
+ block = input->inode_table;
+ ext4_debug("clear inode table blocks %#04llx -> %#04llx\n",
+ block, sbi->s_itb_per_group);
- err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group,
- GFP_NOFS, BLKDEV_IFL_WAIT);
++ err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS);
+ if (err)
+ goto exit_bh;
if ((err = extend_or_restart_transaction(handle, 2, bh)))
goto exit_bh;
- mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8, bh->b_data);
+ ext4_mark_bitmap_end(input->blocks_count, sb->s_blocksize * 8,
+ bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
brelse(bh);
/* Mark unused entries in inode bitmap used */
goto exit_journal;
}
- mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
- bh->b_data);
+ ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
+ bh->b_data);
ext4_handle_dirty_metadata(handle, NULL, bh);
exit_bh:
brelse(bh);
#include <linux/init.h>
#include <linux/blkdev.h>
#include <linux/parser.h>
-#include <linux/smp_lock.h>
#include <linux/buffer_head.h>
#include <linux/exportfs.h>
#include <linux/vfs.h>
#include <linux/crc16.h>
#include <asm/uaccess.h>
+ #include <linux/kthread.h>
+ #include <linux/freezer.h>
+
#include "ext4.h"
#include "ext4_jbd2.h"
#include "xattr.h"
#define CREATE_TRACE_POINTS
#include <trace/events/ext4.h>
- struct proc_dir_entry *ext4_proc_root;
+ static struct proc_dir_entry *ext4_proc_root;
static struct kset *ext4_kset;
+ struct ext4_lazy_init *ext4_li_info;
+ struct mutex ext4_li_mtx;
+ struct ext4_features *ext4_feat;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
unsigned long journal_devnum);
static int ext4_freeze(struct super_block *sb);
static int ext4_get_sb(struct file_system_type *fs_type, int flags,
const char *dev_name, void *data, struct vfsmount *mnt);
+ static void ext4_destroy_lazyinit_thread(void);
+ static void ext4_unregister_li_request(struct super_block *sb);
#if !defined(CONFIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
static struct file_system_type ext3_fs_type = {
struct ext4_super_block *es = sbi->s_es;
int i, err;
+ ext4_unregister_li_request(sb);
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
flush_workqueue(sbi->dio_unwritten_wq);
destroy_workqueue(sbi->dio_unwritten_wq);
lock_super(sb);
- lock_kernel();
if (sb->s_dirt)
ext4_commit_super(sb, 1);
ext4_abort(sb, "Couldn't clean up the journal");
}
+ del_timer(&sbi->s_err_report);
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
* Now that we are completely done shutting down the
* superblock, we need to actually destroy the kobject.
*/
- unlock_kernel();
unlock_super(sb);
kobject_put(&sbi->s_kobj);
wait_for_completion(&sbi->s_kobj_unregister);
!(def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY))
seq_puts(seq, ",block_validity");
+ if (!test_opt(sb, INIT_INODE_TABLE))
+ seq_puts(seq, ",noinit_inode_table");
+ else if (sbi->s_li_wait_mult)
+ seq_printf(seq, ",init_inode_table=%u",
+ (unsigned) sbi->s_li_wait_mult);
+
ext4_show_quota_options(seq, sb);
return 0;
.quota_write = ext4_quota_write,
#endif
.bdev_try_to_free_page = bdev_try_to_free_page,
+ .trim_fs = ext4_trim_fs
};
static const struct super_operations ext4_nojournal_sops = {
Opt_inode_readahead_blks, Opt_journal_ioprio,
Opt_dioread_nolock, Opt_dioread_lock,
Opt_discard, Opt_nodiscard,
+ Opt_init_inode_table, Opt_noinit_inode_table,
};
static const match_table_t tokens = {
{Opt_dioread_lock, "dioread_lock"},
{Opt_discard, "discard"},
{Opt_nodiscard, "nodiscard"},
+ {Opt_init_inode_table, "init_itable=%u"},
+ {Opt_init_inode_table, "init_itable"},
+ {Opt_noinit_inode_table, "noinit_itable"},
{Opt_err, NULL},
};
case Opt_dioread_lock:
clear_opt(sbi->s_mount_opt, DIOREAD_NOLOCK);
break;
+ case Opt_init_inode_table:
+ set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ if (args[0].from) {
+ if (match_int(&args[0], &option))
+ return 0;
+ } else
+ option = EXT4_DEF_LI_WAIT_MULT;
+ if (option < 0)
+ return 0;
+ sbi->s_li_wait_mult = option;
+ break;
+ case Opt_noinit_inode_table:
+ clear_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
+ break;
default:
ext4_msg(sb, KERN_ERR,
"Unrecognized mount option \"%s\" "
}
/* Called at mount-time, super-block is locked */
- static int ext4_check_descriptors(struct super_block *sb)
+ static int ext4_check_descriptors(struct super_block *sb,
+ ext4_group_t *first_not_zeroed)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
ext4_fsblk_t first_block = le32_to_cpu(sbi->s_es->s_first_data_block);
ext4_fsblk_t inode_bitmap;
ext4_fsblk_t inode_table;
int flexbg_flag = 0;
- ext4_group_t i;
+ ext4_group_t i, grp = sbi->s_groups_count;
if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
flexbg_flag = 1;
last_block = first_block +
(EXT4_BLOCKS_PER_GROUP(sb) - 1);
+ if ((grp == sbi->s_groups_count) &&
+ !(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ grp = i;
+
block_bitmap = ext4_block_bitmap(sb, gdp);
if (block_bitmap < first_block || block_bitmap > last_block) {
ext4_msg(sb, KERN_ERR, "ext4_check_descriptors: "
if (!flexbg_flag)
first_block += EXT4_BLOCKS_PER_GROUP(sb);
}
+ if (NULL != first_not_zeroed)
+ *first_not_zeroed = grp;
ext4_free_blocks_count_set(sbi->s_es, ext4_count_free_blocks(sb));
sbi->s_es->s_free_inodes_count =cpu_to_le32(ext4_count_free_inodes(sb));
#define EXT4_ATTR(name, mode, show, store) \
static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
+ #define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
#define EXT4_RW_ATTR_SBI_UI(name, elname) \
NULL,
};
+ /* Features this copy of ext4 supports */
+ EXT4_INFO_ATTR(lazy_itable_init);
+ EXT4_INFO_ATTR(batched_discard);
+
+ static struct attribute *ext4_feat_attrs[] = {
+ ATTR_LIST(lazy_itable_init),
+ ATTR_LIST(batched_discard),
+ NULL,
+ };
+
static ssize_t ext4_attr_show(struct kobject *kobj,
struct attribute *attr, char *buf)
{
complete(&sbi->s_kobj_unregister);
}
-
static const struct sysfs_ops ext4_attr_ops = {
.show = ext4_attr_show,
.store = ext4_attr_store,
.release = ext4_sb_release,
};
+ static void ext4_feat_release(struct kobject *kobj)
+ {
+ complete(&ext4_feat->f_kobj_unregister);
+ }
+
+ static struct kobj_type ext4_feat_ktype = {
+ .default_attrs = ext4_feat_attrs,
+ .sysfs_ops = &ext4_attr_ops,
+ .release = ext4_feat_release,
+ };
+
/*
* Check whether this filesystem can be mounted based on
* the features present and the RDONLY/RDWR mount requested.
mod_timer(&sbi->s_err_report, jiffies + 24*60*60*HZ); /* Once a day */
}
+ static void ext4_lazyinode_timeout(unsigned long data)
+ {
+ struct task_struct *p = (struct task_struct *)data;
+ wake_up_process(p);
+ }
+
+ /* Find next suitable group and run ext4_init_inode_table */
+ static int ext4_run_li_request(struct ext4_li_request *elr)
+ {
+ struct ext4_group_desc *gdp = NULL;
+ ext4_group_t group, ngroups;
+ struct super_block *sb;
+ unsigned long timeout = 0;
+ int ret = 0;
+
+ sb = elr->lr_super;
+ ngroups = EXT4_SB(sb)->s_groups_count;
+
+ for (group = elr->lr_next_group; group < ngroups; group++) {
+ gdp = ext4_get_group_desc(sb, group, NULL);
+ if (!gdp) {
+ ret = 1;
+ break;
+ }
+
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ break;
+ }
+
+ if (group == ngroups)
+ ret = 1;
+
+ if (!ret) {
+ timeout = jiffies;
+ ret = ext4_init_inode_table(sb, group,
+ elr->lr_timeout ? 0 : 1);
+ if (elr->lr_timeout == 0) {
+ timeout = jiffies - timeout;
+ if (elr->lr_sbi->s_li_wait_mult)
+ timeout *= elr->lr_sbi->s_li_wait_mult;
+ else
+ timeout *= 20;
+ elr->lr_timeout = timeout;
+ }
+ elr->lr_next_sched = jiffies + elr->lr_timeout;
+ elr->lr_next_group = group + 1;
+ }
+
+ return ret;
+ }
+
+ /*
+ * Remove lr_request from the list_request and free the
+ * request tructure. Should be called with li_list_mtx held
+ */
+ static void ext4_remove_li_request(struct ext4_li_request *elr)
+ {
+ struct ext4_sb_info *sbi;
+
+ if (!elr)
+ return;
+
+ sbi = elr->lr_sbi;
+
+ list_del(&elr->lr_request);
+ sbi->s_li_request = NULL;
+ kfree(elr);
+ }
+
+ static void ext4_unregister_li_request(struct super_block *sb)
+ {
+ struct ext4_li_request *elr = EXT4_SB(sb)->s_li_request;
+
+ if (!ext4_li_info)
+ return;
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ ext4_remove_li_request(elr);
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+ }
+
+ /*
+ * This is the function where ext4lazyinit thread lives. It walks
+ * through the request list searching for next scheduled filesystem.
+ * When such a fs is found, run the lazy initialization request
+ * (ext4_rn_li_request) and keep track of the time spend in this
+ * function. Based on that time we compute next schedule time of
+ * the request. When walking through the list is complete, compute
+ * next waking time and put itself into sleep.
+ */
+ static int ext4_lazyinit_thread(void *arg)
+ {
+ struct ext4_lazy_init *eli = (struct ext4_lazy_init *)arg;
+ struct list_head *pos, *n;
+ struct ext4_li_request *elr;
+ unsigned long next_wakeup;
+ DEFINE_WAIT(wait);
+ int ret;
+
+ BUG_ON(NULL == eli);
+
+ eli->li_timer.data = (unsigned long)current;
+ eli->li_timer.function = ext4_lazyinode_timeout;
+
+ eli->li_task = current;
+ wake_up(&eli->li_wait_task);
+
+ cont_thread:
+ while (true) {
+ next_wakeup = MAX_JIFFY_OFFSET;
+
+ mutex_lock(&eli->li_list_mtx);
+ if (list_empty(&eli->li_request_list)) {
+ mutex_unlock(&eli->li_list_mtx);
+ goto exit_thread;
+ }
+
+ list_for_each_safe(pos, n, &eli->li_request_list) {
+ elr = list_entry(pos, struct ext4_li_request,
+ lr_request);
+
+ if (time_after_eq(jiffies, elr->lr_next_sched))
+ ret = ext4_run_li_request(elr);
+
+ if (ret) {
+ ret = 0;
+ ext4_remove_li_request(elr);
+ continue;
+ }
+
+ if (time_before(elr->lr_next_sched, next_wakeup))
+ next_wakeup = elr->lr_next_sched;
+ }
+ mutex_unlock(&eli->li_list_mtx);
+
+ if (freezing(current))
+ refrigerator();
+
+ if (time_after_eq(jiffies, next_wakeup)) {
+ cond_resched();
+ continue;
+ }
+
+ eli->li_timer.expires = next_wakeup;
+ add_timer(&eli->li_timer);
+ prepare_to_wait(&eli->li_wait_daemon, &wait,
+ TASK_INTERRUPTIBLE);
+ if (time_before(jiffies, next_wakeup))
+ schedule();
+ finish_wait(&eli->li_wait_daemon, &wait);
+ }
+
+ exit_thread:
+ /*
+ * It looks like the request list is empty, but we need
+ * to check it under the li_list_mtx lock, to prevent any
+ * additions into it, and of course we should lock ext4_li_mtx
+ * to atomically free the list and ext4_li_info, because at
+ * this point another ext4 filesystem could be registering
+ * new one.
+ */
+ mutex_lock(&ext4_li_mtx);
+ mutex_lock(&eli->li_list_mtx);
+ if (!list_empty(&eli->li_request_list)) {
+ mutex_unlock(&eli->li_list_mtx);
+ mutex_unlock(&ext4_li_mtx);
+ goto cont_thread;
+ }
+ mutex_unlock(&eli->li_list_mtx);
+ del_timer_sync(&ext4_li_info->li_timer);
+ eli->li_task = NULL;
+ wake_up(&eli->li_wait_task);
+
+ kfree(ext4_li_info);
+ ext4_li_info = NULL;
+ mutex_unlock(&ext4_li_mtx);
+
+ return 0;
+ }
+
+ static void ext4_clear_request_list(void)
+ {
+ struct list_head *pos, *n;
+ struct ext4_li_request *elr;
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ if (list_empty(&ext4_li_info->li_request_list))
+ return;
+
+ list_for_each_safe(pos, n, &ext4_li_info->li_request_list) {
+ elr = list_entry(pos, struct ext4_li_request,
+ lr_request);
+ ext4_remove_li_request(elr);
+ }
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+ }
+
+ static int ext4_run_lazyinit_thread(void)
+ {
+ struct task_struct *t;
+
+ t = kthread_run(ext4_lazyinit_thread, ext4_li_info, "ext4lazyinit");
+ if (IS_ERR(t)) {
+ int err = PTR_ERR(t);
+ ext4_clear_request_list();
+ del_timer_sync(&ext4_li_info->li_timer);
+ kfree(ext4_li_info);
+ ext4_li_info = NULL;
+ printk(KERN_CRIT "EXT4: error %d creating inode table "
+ "initialization thread\n",
+ err);
+ return err;
+ }
+ ext4_li_info->li_state |= EXT4_LAZYINIT_RUNNING;
+
+ wait_event(ext4_li_info->li_wait_task, ext4_li_info->li_task != NULL);
+ return 0;
+ }
+
+ /*
+ * Check whether it make sense to run itable init. thread or not.
+ * If there is at least one uninitialized inode table, return
+ * corresponding group number, else the loop goes through all
+ * groups and return total number of groups.
+ */
+ static ext4_group_t ext4_has_uninit_itable(struct super_block *sb)
+ {
+ ext4_group_t group, ngroups = EXT4_SB(sb)->s_groups_count;
+ struct ext4_group_desc *gdp = NULL;
+
+ for (group = 0; group < ngroups; group++) {
+ gdp = ext4_get_group_desc(sb, group, NULL);
+ if (!gdp)
+ continue;
+
+ if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED)))
+ break;
+ }
+
+ return group;
+ }
+
+ static int ext4_li_info_new(void)
+ {
+ struct ext4_lazy_init *eli = NULL;
+
+ eli = kzalloc(sizeof(*eli), GFP_KERNEL);
+ if (!eli)
+ return -ENOMEM;
+
+ eli->li_task = NULL;
+ INIT_LIST_HEAD(&eli->li_request_list);
+ mutex_init(&eli->li_list_mtx);
+
+ init_waitqueue_head(&eli->li_wait_daemon);
+ init_waitqueue_head(&eli->li_wait_task);
+ init_timer(&eli->li_timer);
+ eli->li_state |= EXT4_LAZYINIT_QUIT;
+
+ ext4_li_info = eli;
+
+ return 0;
+ }
+
+ static struct ext4_li_request *ext4_li_request_new(struct super_block *sb,
+ ext4_group_t start)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_li_request *elr;
+ unsigned long rnd;
+
+ elr = kzalloc(sizeof(*elr), GFP_KERNEL);
+ if (!elr)
+ return NULL;
+
+ elr->lr_super = sb;
+ elr->lr_sbi = sbi;
+ elr->lr_next_group = start;
+
+ /*
+ * Randomize first schedule time of the request to
+ * spread the inode table initialization requests
+ * better.
+ */
+ get_random_bytes(&rnd, sizeof(rnd));
+ elr->lr_next_sched = jiffies + (unsigned long)rnd %
+ (EXT4_DEF_LI_MAX_START_DELAY * HZ);
+
+ return elr;
+ }
+
+ static int ext4_register_li_request(struct super_block *sb,
+ ext4_group_t first_not_zeroed)
+ {
+ struct ext4_sb_info *sbi = EXT4_SB(sb);
+ struct ext4_li_request *elr;
+ ext4_group_t ngroups = EXT4_SB(sb)->s_groups_count;
+ int ret;
+
+ if (sbi->s_li_request != NULL)
+ return 0;
+
+ if (first_not_zeroed == ngroups ||
+ (sb->s_flags & MS_RDONLY) ||
+ !test_opt(sb, INIT_INODE_TABLE)) {
+ sbi->s_li_request = NULL;
+ return 0;
+ }
+
+ if (first_not_zeroed == ngroups) {
+ sbi->s_li_request = NULL;
+ return 0;
+ }
+
+ elr = ext4_li_request_new(sb, first_not_zeroed);
+ if (!elr)
+ return -ENOMEM;
+
+ mutex_lock(&ext4_li_mtx);
+
+ if (NULL == ext4_li_info) {
+ ret = ext4_li_info_new();
+ if (ret)
+ goto out;
+ }
+
+ mutex_lock(&ext4_li_info->li_list_mtx);
+ list_add(&elr->lr_request, &ext4_li_info->li_request_list);
+ mutex_unlock(&ext4_li_info->li_list_mtx);
+
+ sbi->s_li_request = elr;
+
+ if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) {
+ ret = ext4_run_lazyinit_thread();
+ if (ret)
+ goto out;
+ }
+ out:
+ mutex_unlock(&ext4_li_mtx);
+ if (ret)
+ kfree(elr);
+ return ret;
+ }
+
+ /*
+ * We do not need to lock anything since this is called on
+ * module unload.
+ */
+ static void ext4_destroy_lazyinit_thread(void)
+ {
+ /*
+ * If thread exited earlier
+ * there's nothing to be done.
+ */
+ if (!ext4_li_info)
+ return;
+
+ ext4_clear_request_list();
+
+ while (ext4_li_info->li_task) {
+ wake_up(&ext4_li_info->li_wait_daemon);
+ wait_event(ext4_li_info->li_wait_task,
+ ext4_li_info->li_task == NULL);
+ }
+ }
+
static int ext4_fill_super(struct super_block *sb, void *data, int silent)
__releases(kernel_lock)
__acquires(kernel_lock)
__u64 blocks_count;
int err;
unsigned int journal_ioprio = DEFAULT_JOURNAL_IOPRIO;
+ ext4_group_t first_not_zeroed;
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
sbi->s_sectors_written_start =
part_stat_read(sb->s_bdev->bd_part, sectors[1]);
- unlock_kernel();
-
/* Cleanup superblock name */
for (cp = sb->s_id; (cp = strchr(cp, '/'));)
*cp = '!';
/* Set defaults before we parse the mount options */
def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
+ set_opt(sbi->s_mount_opt, INIT_INODE_TABLE);
if (def_mount_opts & EXT4_DEFM_DEBUG)
set_opt(sbi->s_mount_opt, DEBUG);
if (def_mount_opts & EXT4_DEFM_BSDGROUPS) {
* Test whether we have more sectors than will fit in sector_t,
* and whether the max offset is addressable by the page cache.
*/
- if ((ext4_blocks_count(es) >
- (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) ||
- (ext4_blocks_count(es) >
- (pgoff_t)(~0ULL) >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits))) {
+ ret = generic_check_addressable(sb->s_blocksize_bits,
+ ext4_blocks_count(es));
+ if (ret) {
ext4_msg(sb, KERN_ERR, "filesystem"
" too large to mount safely on this system");
if (sizeof(sector_t) < 8)
ext4_msg(sb, KERN_WARNING, "CONFIG_LBDAF not enabled");
- ret = -EFBIG;
goto failed_mount;
}
goto failed_mount2;
}
}
- if (!ext4_check_descriptors(sb)) {
+ if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
goto failed_mount2;
}
goto failed_mount4;
}
+ err = ext4_register_li_request(sb, first_not_zeroed);
+ if (err)
+ goto failed_mount4;
+
sbi->s_kobj.kset = ext4_kset;
init_completion(&sbi->s_kobj_unregister);
err = kobject_init_and_add(&sbi->s_kobj, &ext4_ktype, NULL,
if (es->s_error_count)
mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */
- lock_kernel();
kfree(orig_data);
return 0;
sb->s_fs_info = NULL;
kfree(sbi->s_blockgroup_lock);
kfree(sbi);
- lock_kernel();
out_free_orig:
kfree(orig_data);
return ret;
EXT4_SB(sb)->s_journal = journal;
ext4_clear_journal_err(sb, es);
- if (journal_devnum &&
+ if (!really_read_only && journal_devnum &&
journal_devnum != le32_to_cpu(es->s_journal_dev)) {
es->s_journal_dev = cpu_to_le32(journal_devnum);
else
es->s_kbytes_written =
cpu_to_le64(EXT4_SB(sb)->s_kbytes_written);
- ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
+ if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeblocks_counter))
+ ext4_free_blocks_count_set(es, percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeblocks_counter));
- es->s_free_inodes_count = cpu_to_le32(percpu_counter_sum_positive(
+ if (percpu_counter_initialized(&EXT4_SB(sb)->s_freeinodes_counter))
+ es->s_free_inodes_count =
+ cpu_to_le32(percpu_counter_sum_positive(
&EXT4_SB(sb)->s_freeinodes_counter));
sb->s_dirt = 0;
BUFFER_TRACE(sbh, "marking dirty");
#endif
char *orig_data = kstrdup(data, GFP_KERNEL);
- lock_kernel();
-
/* Store the original options */
lock_super(sb);
old_sb_flags = sb->s_flags;
enable_quota = 1;
}
}
+
+ /*
+ * Reinitialize lazy itable initialization thread based on
+ * current settings
+ */
+ if ((sb->s_flags & MS_RDONLY) || !test_opt(sb, INIT_INODE_TABLE))
+ ext4_unregister_li_request(sb);
+ else {
+ ext4_group_t first_not_zeroed;
+ first_not_zeroed = ext4_has_uninit_itable(sb);
+ ext4_register_li_request(sb, first_not_zeroed);
+ }
+
ext4_setup_system_zone(sb);
if (sbi->s_journal == NULL)
ext4_commit_super(sb, 1);
kfree(old_opts.s_qf_names[i]);
#endif
unlock_super(sb);
- unlock_kernel();
if (enable_quota)
dquot_resume(sb, -1);
}
#endif
unlock_super(sb);
- unlock_kernel();
kfree(orig_data);
return err;
}
.fs_flags = FS_REQUIRES_DEV,
};
- static int __init init_ext4_fs(void)
+ int __init ext4_init_feat_adverts(void)
+ {
+ struct ext4_features *ef;
+ int ret = -ENOMEM;
+
+ ef = kzalloc(sizeof(struct ext4_features), GFP_KERNEL);
+ if (!ef)
+ goto out;
+
+ ef->f_kobj.kset = ext4_kset;
+ init_completion(&ef->f_kobj_unregister);
+ ret = kobject_init_and_add(&ef->f_kobj, &ext4_feat_ktype, NULL,
+ "features");
+ if (ret) {
+ kfree(ef);
+ goto out;
+ }
+
+ ext4_feat = ef;
+ ret = 0;
+ out:
+ return ret;
+ }
+
+ static int __init ext4_init_fs(void)
{
int err;
ext4_check_flag_values();
- err = init_ext4_system_zone();
+ err = ext4_init_pageio();
if (err)
return err;
+ err = ext4_init_system_zone();
+ if (err)
+ goto out5;
ext4_kset = kset_create_and_add("ext4", NULL, fs_kobj);
if (!ext4_kset)
goto out4;
ext4_proc_root = proc_mkdir("fs/ext4", NULL);
- err = init_ext4_mballoc();
+
+ err = ext4_init_feat_adverts();
+
+ err = ext4_init_mballoc();
if (err)
goto out3;
- err = init_ext4_xattr();
+ err = ext4_init_xattr();
if (err)
goto out2;
err = init_inodecache();
err = register_filesystem(&ext4_fs_type);
if (err)
goto out;
+
+ ext4_li_info = NULL;
+ mutex_init(&ext4_li_mtx);
return 0;
out:
unregister_as_ext2();
unregister_as_ext3();
destroy_inodecache();
out1:
- exit_ext4_xattr();
+ ext4_exit_xattr();
out2:
- exit_ext4_mballoc();
+ ext4_exit_mballoc();
out3:
+ kfree(ext4_feat);
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
out4:
- exit_ext4_system_zone();
+ ext4_exit_system_zone();
+ out5:
+ ext4_exit_pageio();
return err;
}
- static void __exit exit_ext4_fs(void)
+ static void __exit ext4_exit_fs(void)
{
+ ext4_destroy_lazyinit_thread();
unregister_as_ext2();
unregister_as_ext3();
unregister_filesystem(&ext4_fs_type);
destroy_inodecache();
- exit_ext4_xattr();
- exit_ext4_mballoc();
+ ext4_exit_xattr();
+ ext4_exit_mballoc();
remove_proc_entry("fs/ext4", NULL);
kset_unregister(ext4_kset);
- exit_ext4_system_zone();
+ ext4_exit_system_zone();
+ ext4_exit_pageio();
}
MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
MODULE_DESCRIPTION("Fourth Extended Filesystem");
MODULE_LICENSE("GPL");
- module_init(init_ext4_fs)
- module_exit(exit_ext4_fs)
+ module_init(ext4_init_fs)
+ module_exit(ext4_exit_fs)
transaction->t_chp_stats.cs_forced_to_close++;
spin_unlock(&journal->j_list_lock);
jbd_unlock_bh_state(bh);
+ if (unlikely(journal->j_flags & JBD2_UNMOUNT))
+ /*
+ * The journal thread is dead; so starting and
+ * waiting for a commit to finish will cause
+ * us to wait for a _very_ long time.
+ */
+ printk(KERN_ERR "JBD2: %s: "
+ "Waiting for Godot: block %llu\n",
+ journal->j_devname,
+ (unsigned long long) bh->b_blocknr);
jbd2_log_start_commit(journal, tid);
jbd2_log_wait_commit(journal, tid);
ret = 1;
*/
if ((journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
- BLKDEV_IFL_WAIT);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
if (!(journal->j_flags & JBD2_ABORT))
jbd2_journal_update_superblock(journal, 1);
return 0;
#include <linux/backing-dev.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+ #include <linux/bitops.h>
#include <trace/events/jbd2.h>
+ #include <asm/system.h>
/*
* Default IO end handler for temporary BJ_IO buffer_heads.
if (journal->j_flags & JBD2_BARRIER &&
!JBD2_HAS_INCOMPAT_FEATURE(journal,
- JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
- ret = submit_bh(WRITE_SYNC_PLUG | WRITE_BARRIER, bh);
- if (ret == -EOPNOTSUPP) {
- printk(KERN_WARNING
- "JBD2: Disabling barriers on %s, "
- "not supported by device\n", journal->j_devname);
- write_lock(&journal->j_state_lock);
- journal->j_flags &= ~JBD2_BARRIER;
- write_unlock(&journal->j_state_lock);
-
- /* And try again, without the barrier */
- lock_buffer(bh);
- set_buffer_uptodate(bh);
- clear_buffer_dirty(bh);
- ret = submit_bh(WRITE_SYNC_PLUG, bh);
- }
- } else {
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT))
+ ret = submit_bh(WRITE_SYNC_PLUG | WRITE_FLUSH_FUA, bh);
+ else
ret = submit_bh(WRITE_SYNC_PLUG, bh);
- }
+
*cbh = bh;
return ret;
}
{
int ret = 0;
-retry:
clear_buffer_dirty(bh);
wait_on_buffer(bh);
- if (buffer_eopnotsupp(bh) && (journal->j_flags & JBD2_BARRIER)) {
- printk(KERN_WARNING
- "JBD2: %s: disabling barries on %s - not supported "
- "by device\n", __func__, journal->j_devname);
- write_lock(&journal->j_state_lock);
- journal->j_flags &= ~JBD2_BARRIER;
- write_unlock(&journal->j_state_lock);
-
- lock_buffer(bh);
- clear_buffer_dirty(bh);
- set_buffer_uptodate(bh);
- bh->b_end_io = journal_end_buffer_io_sync;
-
- ret = submit_bh(WRITE_SYNC_PLUG, bh);
- if (ret) {
- unlock_buffer(bh);
- return ret;
- }
- goto retry;
- }
if (unlikely(!buffer_uptodate(bh)))
ret = -EIO;
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
mapping = jinode->i_vfs_inode->i_mapping;
- jinode->i_flags |= JI_COMMIT_RUNNING;
+ set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
/*
* submit the inode data buffers. We use writepage
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
commit_transaction->t_flushed_data_blocks = 1;
- jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
spin_unlock(&journal->j_list_lock);
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
- jinode->i_flags |= JI_COMMIT_RUNNING;
+ set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) {
ret = err;
}
spin_lock(&journal->j_list_lock);
- jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ smp_mb__after_clear_bit();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
int tag_bytes = journal_tag_bytes(journal);
struct buffer_head *cbh = NULL; /* For transactional checksums */
__u32 crc32_sum = ~0;
- int write_op = WRITE;
+ int write_op = WRITE_SYNC;
/*
* First job: lock down the current transaction and wait for
}
}
+ err = journal_finish_inode_data_buffers(journal, commit_transaction);
+ if (err) {
+ printk(KERN_WARNING
+ "JBD2: Detected IO errors while flushing file data "
+ "on %s\n", journal->j_devname);
+ if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
+ jbd2_journal_abort(journal, err);
+ err = 0;
+ }
+
/*
* If the journal is not located on the file system device,
* then we must flush the file system device before we issue
if (commit_transaction->t_flushed_data_blocks &&
(journal->j_fs_dev != journal->j_dev) &&
(journal->j_flags & JBD2_BARRIER))
- blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL,
- BLKDEV_IFL_WAIT);
+ blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL);
/* Done it all: now write the commit record asynchronously. */
if (JBD2_HAS_INCOMPAT_FEATURE(journal,
&cbh, crc32_sum);
if (err)
__jbd2_journal_abort_hard(journal);
- if (journal->j_flags & JBD2_BARRIER)
- blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL,
- BLKDEV_IFL_WAIT);
- }
-
- err = journal_finish_inode_data_buffers(journal, commit_transaction);
- if (err) {
- printk(KERN_WARNING
- "JBD2: Detected IO errors while flushing file data "
- "on %s\n", journal->j_devname);
- if (journal->j_flags & JBD2_ABORT_ON_SYNCDATA_ERR)
- jbd2_journal_abort(journal, err);
- err = 0;
}
/* Lo and behold: we have just managed to send a transaction to
}
if (!err && !is_journal_aborted(journal))
err = journal_wait_on_commit_record(journal, cbh);
+ if (JBD2_HAS_INCOMPAT_FEATURE(journal,
+ JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) &&
+ journal->j_flags & JBD2_BARRIER) {
+ blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL);
+ }
if (err)
jbd2_journal_abort(journal, err);
#include <linux/log2.h>
#include <linux/vmalloc.h>
#include <linux/backing-dev.h>
+ #include <linux/bitops.h>
#define CREATE_TRACE_POINTS
#include <trace/events/jbd2.h>
#include <asm/uaccess.h>
#include <asm/page.h>
+ #include <asm/system.h>
EXPORT_SYMBOL(jbd2_journal_extend);
EXPORT_SYMBOL(jbd2_journal_stop);
if (!compat && !ro && !incompat)
return 1;
+ /* Load journal superblock if it is not loaded yet. */
+ if (journal->j_format_version == 0 &&
+ journal_get_superblock(journal) != 0)
+ return 0;
if (journal->j_format_version == 1)
return 0;
restart:
spin_lock(&journal->j_list_lock);
/* Is commit writing out inode - we have to wait */
- if (jinode->i_flags & JI_COMMIT_RUNNING) {
+ if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
* physical address coalescing is performed.
*/
unsigned short nr_phys_segments;
+#if defined(CONFIG_BLK_DEV_INTEGRITY)
+ unsigned short nr_integrity_segments;
+#endif
unsigned short ioprio;
unsigned short logical_block_size;
unsigned short max_segments;
+ unsigned short max_integrity_segments;
unsigned char misaligned;
unsigned char discard_misaligned;
struct blk_trace *blk_trace;
#endif
/*
- * reserved for flush operations
+ * for flush operations
*/
- unsigned int ordered, next_ordered, ordseq;
- int orderr, ordcolor;
- struct request pre_flush_rq, bar_rq, post_flush_rq;
- struct request *orig_bar_rq;
+ unsigned int flush_flags;
+ unsigned int flush_seq;
+ int flush_err;
+ struct request flush_rq;
+ struct request *orig_flush_rq;
+ struct list_head pending_flushes;
struct mutex sysfs_lock;
#if defined(CONFIG_BLK_DEV_BSG)
struct bsg_class_device bsg_dev;
#endif
+
+#ifdef CONFIG_BLK_DEV_THROTTLING
+ /* Throttle data */
+ struct throtl_data *td;
+#endif
};
#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */
__clear_bit(flag, &q->queue_flags);
}
-enum {
- /*
- * Hardbarrier is supported with one of the following methods.
- *
- * NONE : hardbarrier unsupported
- * DRAIN : ordering by draining is enough
- * DRAIN_FLUSH : ordering by draining w/ pre and post flushes
- * DRAIN_FUA : ordering by draining w/ pre flush and FUA write
- * TAG : ordering by tag is enough
- * TAG_FLUSH : ordering by tag w/ pre and post flushes
- * TAG_FUA : ordering by tag w/ pre flush and FUA write
- */
- QUEUE_ORDERED_BY_DRAIN = 0x01,
- QUEUE_ORDERED_BY_TAG = 0x02,
- QUEUE_ORDERED_DO_PREFLUSH = 0x10,
- QUEUE_ORDERED_DO_BAR = 0x20,
- QUEUE_ORDERED_DO_POSTFLUSH = 0x40,
- QUEUE_ORDERED_DO_FUA = 0x80,
-
- QUEUE_ORDERED_NONE = 0x00,
-
- QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN |
- QUEUE_ORDERED_DO_BAR,
- QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN |
- QUEUE_ORDERED_DO_PREFLUSH |
- QUEUE_ORDERED_DO_POSTFLUSH,
- QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN |
- QUEUE_ORDERED_DO_PREFLUSH |
- QUEUE_ORDERED_DO_FUA,
-
- QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG |
- QUEUE_ORDERED_DO_BAR,
- QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG |
- QUEUE_ORDERED_DO_PREFLUSH |
- QUEUE_ORDERED_DO_POSTFLUSH,
- QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG |
- QUEUE_ORDERED_DO_PREFLUSH |
- QUEUE_ORDERED_DO_FUA,
-
- /*
- * Ordered operation sequence
- */
- QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */
- QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */
- QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */
- QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */
- QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */
- QUEUE_ORDSEQ_DONE = 0x20,
-};
-
#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags)
#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags)
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags)
#define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags)
#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags)
-#define blk_queue_flushing(q) ((q)->ordseq)
#define blk_queue_stackable(q) \
test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags)
#define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags)
* it already be started by driver.
*/
#define RQ_NOMERGE_FLAGS \
- (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER)
+ (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \
+ REQ_FLUSH | REQ_FUA)
#define rq_mergeable(rq) \
(!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \
(((rq)->cmd_flags & REQ_DISCARD) || \
extern void blk_queue_max_discard_sectors(struct request_queue *q,
unsigned int max_discard_sectors);
extern void blk_queue_logical_block_size(struct request_queue *, unsigned short);
-extern void blk_queue_physical_block_size(struct request_queue *, unsigned short);
+extern void blk_queue_physical_block_size(struct request_queue *, unsigned int);
extern void blk_queue_alignment_offset(struct request_queue *q,
unsigned int alignment);
extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min);
extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *);
extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *);
extern void blk_queue_rq_timeout(struct request_queue *, unsigned int);
+extern void blk_queue_flush(struct request_queue *q, unsigned int flush);
extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev);
-extern int blk_queue_ordered(struct request_queue *, unsigned);
-extern bool blk_do_ordered(struct request_queue *, struct request **);
-extern unsigned blk_ordered_cur_seq(struct request_queue *);
-extern unsigned blk_ordered_req_seq(struct request *);
-extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int);
extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
return NULL;
return bqt->tag_index[tag];
}
-enum{
- BLKDEV_WAIT, /* wait for completion */
- BLKDEV_BARRIER, /* issue request with barrier */
- BLKDEV_SECURE, /* secure discard */
-};
-#define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT)
-#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER)
-#define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE)
-extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *,
- unsigned long);
+
+#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */
+
+extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *);
extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector,
- sector_t nr_sects, gfp_t gfp_mask, unsigned long flags);
-static inline int sb_issue_discard(struct super_block *sb,
- sector_t block, sector_t nr_blocks)
+ sector_t nr_sects, gfp_t gfp_mask);
+static inline int sb_issue_discard(struct super_block *sb, sector_t block,
+ sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
{
- block <<= (sb->s_blocksize_bits - 9);
- nr_blocks <<= (sb->s_blocksize_bits - 9);
- return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS,
- BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER);
+ return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9),
+ nr_blocks << (sb->s_blocksize_bits - 9),
+ gfp_mask, flags);
}
- sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags)
+ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block,
- gfp_mask, flags);
++ sector_t nr_blocks, gfp_t gfp_mask)
+ {
+ return blkdev_issue_zeroout(sb->s_bdev,
+ block << (sb->s_blocksize_bits - 9),
+ nr_blocks << (sb->s_blocksize_bits - 9),
++ gfp_mask);
+ }
extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
return q->limits.physical_block_size;
}
-static inline int bdev_physical_block_size(struct block_device *bdev)
+static inline unsigned int bdev_physical_block_size(struct block_device *bdev)
{
return queue_physical_block_size(bdev_get_queue(bdev));
}
return q ? q->dma_alignment : 511;
}
-static inline int blk_rq_aligned(struct request_queue *q, void *addr,
+static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
unsigned int len)
{
unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask;
- return !((unsigned long)addr & alignment) && !(len & alignment);
+ return !(addr & alignment) && !(len & alignment);
}
/* assumes size > 256 */
struct work_struct;
int kblockd_schedule_work(struct request_queue *q, struct work_struct *work);
+int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay);
#ifdef CONFIG_BLK_CGROUP
/*
}
#endif
+#ifdef CONFIG_BLK_DEV_THROTTLING
+extern int blk_throtl_init(struct request_queue *q);
+extern void blk_throtl_exit(struct request_queue *q);
+extern int blk_throtl_bio(struct request_queue *q, struct bio **bio);
+extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay);
+extern void throtl_shutdown_timer_wq(struct request_queue *q);
+#else /* CONFIG_BLK_DEV_THROTTLING */
+static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio)
+{
+ return 0;
+}
+
+static inline int blk_throtl_init(struct request_queue *q) { return 0; }
+static inline int blk_throtl_exit(struct request_queue *q) { return 0; }
+static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {}
+static inline void throtl_shutdown_timer_wq(struct request_queue *q) {}
+#endif /* CONFIG_BLK_DEV_THROTTLING */
+
#define MODULE_ALIAS_BLOCKDEV(major,minor) \
MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor))
#define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \
extern int blk_integrity_register(struct gendisk *, struct blk_integrity *);
extern void blk_integrity_unregister(struct gendisk *);
extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
-extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
-extern int blk_rq_count_integrity_sg(struct request *);
+extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
+ struct scatterlist *);
+extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
+extern int blk_integrity_merge_rq(struct request_queue *, struct request *,
+ struct request *);
+extern int blk_integrity_merge_bio(struct request_queue *, struct request *,
+ struct bio *);
static inline
struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
return bio_integrity(rq->bio);
}
+static inline void blk_queue_max_integrity_segments(struct request_queue *q,
+ unsigned int segs)
+{
+ q->limits.max_integrity_segments = segs;
+}
+
+static inline unsigned short
+queue_max_integrity_segments(struct request_queue *q)
+{
+ return q->limits.max_integrity_segments;
+}
+
#else /* CONFIG_BLK_DEV_INTEGRITY */
#define blk_integrity_rq(rq) (0)
-#define blk_rq_count_integrity_sg(a) (0)
-#define blk_rq_map_integrity_sg(a, b) (0)
+#define blk_rq_count_integrity_sg(a, b) (0)
+#define blk_rq_map_integrity_sg(a, b, c) (0)
#define bdev_get_integrity(a) (0)
#define blk_get_integrity(a) (0)
#define blk_integrity_compare(a, b) (0)
#define blk_integrity_register(a, b) (0)
#define blk_integrity_unregister(a) do { } while (0);
+#define blk_queue_max_integrity_segments(a, b) do { } while (0);
+#define queue_max_integrity_segments(a) (0)
+#define blk_integrity_merge_rq(a, b, c) (0)
+#define blk_integrity_merge_bio(a, b, c) (0)
#endif /* CONFIG_BLK_DEV_INTEGRITY */
#define SEEK_END 2 /* seek relative to end of file */
#define SEEK_MAX SEEK_END
+ struct fstrim_range {
+ uint64_t start;
+ uint64_t len;
+ uint64_t minlen;
+ };
+
/* And dynamically-tunable limits and defaults: */
struct files_stat_struct {
- int nr_files; /* read only */
- int nr_free_files; /* read only */
- int max_files; /* tunable */
+ unsigned long nr_files; /* read only */
+ unsigned long nr_free_files; /* read only */
+ unsigned long max_files; /* tunable */
};
struct inodes_stat_t {
/* Expect random access pattern */
#define FMODE_RANDOM ((__force fmode_t)0x1000)
+/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
+#define FMODE_UNSIGNED_OFFSET ((__force fmode_t)0x2000)
+
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
* immediately after submission. The write equivalent
* of READ_SYNC.
* WRITE_ODIRECT_PLUG Special case write for O_DIRECT only.
- * WRITE_BARRIER Like WRITE_SYNC, but tells the block layer that all
- * previously submitted writes must be safely on storage
- * before this one is started. Also guarantees that when
- * this write is complete, it itself is also safely on
- * storage. Prevents reordering of writes on both sides
- * of this IO.
+ * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush.
+ * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on
+ * non-volatile media on completion.
+ * WRITE_FLUSH_FUA Combination of WRITE_FLUSH and FUA. The IO is preceded
+ * by a cache flush and data is guaranteed to be on
+ * non-volatile media on completion.
*
*/
#define RW_MASK REQ_WRITE
#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG)
#define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC)
#define WRITE_META (WRITE | REQ_META)
-#define WRITE_BARRIER (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
- REQ_HARDBARRIER)
-
-/*
- * These aren't really reads or writes, they pass down information about
- * parts of device that are now unused by the file system.
- */
-#define DISCARD_NOBARRIER (WRITE | REQ_DISCARD)
-#define DISCARD_BARRIER (WRITE | REQ_DISCARD | REQ_HARDBARRIER)
-#define DISCARD_SECURE (DISCARD_NOBARRIER | REQ_SECURE)
+#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
+ REQ_FLUSH)
+#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
+ REQ_FUA)
+#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \
+ REQ_FLUSH | REQ_FUA)
#define SEL_IN 1
#define SEL_OUT 2
#define S_NOCMTIME 128 /* Do not update file c/mtime */
#define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
#define S_PRIVATE 512 /* Inode is fs-internal */
+#define S_IMA 1024 /* Inode has an associated IMA struct */
/*
* Note that nosuid etc flags are inode-specific: setting some file-system
#define IS_NOCMTIME(inode) ((inode)->i_flags & S_NOCMTIME)
#define IS_SWAPFILE(inode) ((inode)->i_flags & S_SWAPFILE)
#define IS_PRIVATE(inode) ((inode)->i_flags & S_PRIVATE)
+#define IS_IMA(inode) ((inode)->i_flags & S_IMA)
/* the read-only stuff doesn't really belong here, but any other place is
probably as bad and I don't want to create yet another include file. */
#define FIGETBSZ _IO(0x00,2) /* get the block size used for bmap */
#define FIFREEZE _IOWR('X', 119, int) /* Freeze */
#define FITHAW _IOWR('X', 120, int) /* Thaw */
+ #define FITRIM _IOWR('X', 121, struct fstrim_range) /* Trim */
#define FS_IOC_GETFLAGS _IOR('f', 1, long)
#define FS_IOC_SETFLAGS _IOW('f', 2, long)
extern void __init files_init(unsigned long);
extern struct files_stat_struct files_stat;
-extern int get_max_files(void);
+extern unsigned long get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
struct inode {
struct hlist_node i_hash;
- struct list_head i_list; /* backing dev IO list */
+ struct list_head i_wb_list; /* backing dev IO list */
+ struct list_head i_lru; /* inode LRU list */
struct list_head i_sb_list;
struct list_head i_dentry;
unsigned long i_ino;
unsigned int i_flags;
+#ifdef CONFIG_IMA
+ /* protected by i_lock */
+ unsigned int i_readcount; /* struct files open RO */
+#endif
atomic_t i_writecount;
#ifdef CONFIG_SECURITY
void *i_security;
void *i_private; /* fs or device private pointer */
};
+static inline int inode_unhashed(struct inode *inode)
+{
+ return hlist_unhashed(&inode->i_hash);
+}
+
/*
* inode->i_mutex nesting subclasses for the lock validator:
*
#include <linux/fcntl.h>
-/* temporary stubs for BKL removal */
-#define lock_flocks() lock_kernel()
-#define unlock_flocks() unlock_kernel()
-
extern void send_sigio(struct fown_struct *fown, int fd, int band);
#ifdef CONFIG_FILE_LOCKING
/* fs/locks.c */
extern void locks_init_lock(struct file_lock *);
+extern struct file_lock * locks_alloc_lock(void);
extern void locks_copy_lock(struct file_lock *, struct file_lock *);
extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
extern void locks_remove_posix(struct file *, fl_owner_t);
extern int lease_modify(struct file_lock **, int);
extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
+extern void lock_flocks(void);
+extern void unlock_flocks(void);
#else /* !CONFIG_FILE_LOCKING */
static inline int fcntl_getlk(struct file *file, struct flock __user *user)
{
return 1;
}
+static inline void lock_flocks(void)
+{
+}
+
+static inline void unlock_flocks(void)
+{
+}
+
#endif /* !CONFIG_FILE_LOCKING */
/* SMP safe fasync helpers: */
extern int fasync_helper(int, struct file *, int, struct fasync_struct **);
+extern struct fasync_struct *fasync_insert_entry(int, struct file *, struct fasync_struct **, struct fasync_struct *);
+extern int fasync_remove_entry(struct file *, struct fasync_struct **);
+extern struct fasync_struct *fasync_alloc(void);
+extern void fasync_free(struct fasync_struct *);
+
/* can be called from interrupts */
extern void kill_fasync(struct fasync_struct **, int, int);
* Saved mount options for lazy filesystems using
* generic_show_options()
*/
- char *s_options;
+ char __rcu *s_options;
};
extern struct timespec current_fs_time(struct super_block *sb);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
+ int (*trim_fs) (struct super_block *, struct fstrim_range *);
};
/*
*
* Q: What is the difference between I_WILL_FREE and I_FREEING?
*/
-#define I_DIRTY_SYNC 1
-#define I_DIRTY_DATASYNC 2
-#define I_DIRTY_PAGES 4
+#define I_DIRTY_SYNC (1 << 0)
+#define I_DIRTY_DATASYNC (1 << 1)
+#define I_DIRTY_PAGES (1 << 2)
#define __I_NEW 3
#define I_NEW (1 << __I_NEW)
-#define I_WILL_FREE 16
-#define I_FREEING 32
-#define I_CLEAR 64
+#define I_WILL_FREE (1 << 4)
+#define I_FREEING (1 << 5)
+#define I_CLEAR (1 << 6)
#define __I_SYNC 7
#define I_SYNC (1 << __I_SYNC)
+#define I_REFERENCED (1 << 8)
#define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
}
int sync_inode(struct inode *inode, struct writeback_control *wbc);
+int sync_inode_metadata(struct inode *inode, int wait);
struct file_system_type {
const char *name;
extern int __invalidate_device(struct block_device *);
extern int invalidate_partition(struct gendisk *, int);
#endif
-extern int invalidate_inodes(struct super_block *);
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
extern int inode_init_always(struct super_block *, struct inode *);
extern void inode_init_once(struct inode *);
-extern void inode_add_to_lists(struct super_block *, struct inode *);
+extern void ihold(struct inode * inode);
extern void iput(struct inode *);
extern struct inode * igrab(struct inode *);
extern ino_t iunique(struct super_block *, ino_t);
extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
extern int insert_inode_locked(struct inode *);
extern void unlock_new_inode(struct inode *);
+extern unsigned int get_next_ino(void);
extern void __iget(struct inode * inode);
extern void iget_failed(struct inode *);
extern void end_writeback(struct inode *);
-extern void destroy_inode(struct inode *);
extern void __destroy_inode(struct inode *);
extern struct inode *new_inode(struct super_block *);
extern int should_remove_suid(struct dentry *);
extern void __insert_inode_hash(struct inode *, unsigned long hashval);
extern void remove_inode_hash(struct inode *);
-static inline void insert_inode_hash(struct inode *inode) {
+static inline void insert_inode_hash(struct inode *inode)
+{
__insert_inode_hash(inode, inode->i_ino);
}
+extern void inode_sb_list_add(struct inode *inode);
#ifdef CONFIG_BLOCK
extern void submit_bio(int, struct bio *);
extern int generic_file_fsync(struct file *, int);
+extern int generic_check_addressable(unsigned, u64);
+
#ifdef CONFIG_MIGRATION
extern int buffer_migrate_page(struct address_space *,
struct page *, struct page *);
.release = simple_attr_release, \
.read = simple_attr_read, \
.write = simple_attr_write, \
+ .llseek = generic_file_llseek, \
};
static inline void __attribute__((format(printf, 1, 2)))
struct ctl_table;
int proc_nr_files(struct ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_dentry(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+int proc_nr_inodes(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
int __init get_filesystem_list(char *buf);
#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
struct backing_dev_info;
extern spinlock_t inode_lock;
-extern struct list_head inode_in_use;
-extern struct list_head inode_unused;
/*
* fs/fs-writeback.c
int generic_writepages(struct address_space *mapping,
struct writeback_control *wbc);
+ void tag_pages_for_writeback(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
int write_cache_pages(struct address_space *mapping,
struct writeback_control *wbc, writepage_t writepage,
void *data);
int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
void set_page_dirty_balance(struct page *page, int page_mkwrite);
void writeback_set_ratelimit(void);
+void tag_pages_for_writeback(struct address_space *mapping,
+ pgoff_t start, pgoff_t end);
/* pdflush.c */
extern int nr_pdflush_threads; /* Global so it can be exported to sysctl
TP_ARGS(inode),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( uid_t, uid )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->uid = inode->i_uid;
__entry->blocks = inode->i_blocks;
),
- TP_printk("dev %s ino %lu mode 0%o uid %u gid %u blocks %llu",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->mode, __entry->uid, __entry->gid,
+ TP_printk("dev %d,%d ino %lu mode 0%o uid %u gid %u blocks %llu",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino, __entry->mode,
+ __entry->uid, __entry->gid,
(unsigned long long) __entry->blocks)
);
TP_ARGS(dir, mode),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
- __entry->dev = dir->i_sb->s_dev;
+ __entry->dev_major = MAJOR(dir->i_sb->s_dev);
+ __entry->dev_minor = MINOR(dir->i_sb->s_dev);
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
- TP_printk("dev %s dir %lu mode 0%o",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->dir,
- __entry->mode)
+ TP_printk("dev %d,%d dir %lu mode 0%o",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->dir, __entry->mode)
);
TRACE_EVENT(ext4_allocate_inode,
TP_ARGS(inode, dir, mode),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( ino_t, dir )
__field( umode_t, mode )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->dir = dir->i_ino;
__entry->mode = mode;
),
- TP_printk("dev %s ino %lu dir %lu mode 0%o",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %lu dir %lu mode 0%o",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
(unsigned long) __entry->dir, __entry->mode)
);
TP_ARGS(inode, pos, len, flags),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->flags = flags;
),
- TP_printk("dev %s ino %lu pos %llu len %u flags %u",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %lu pos %llu len %u flags %u",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->pos, __entry->len, __entry->flags)
);
TP_ARGS(inode, pos, len, copied),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( loff_t, pos )
__field( unsigned int, len )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->pos = pos;
__entry->len = len;
__entry->copied = copied;
),
- TP_printk("dev %s ino %lu pos %llu len %u copied %u",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->pos, __entry->len, __entry->copied)
+ TP_printk("dev %d,%d ino %lu pos %llu len %u copied %u",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino, __entry->pos,
+ __entry->len, __entry->copied)
);
DEFINE_EVENT(ext4__write_end, ext4_ordered_write_end,
TP_ARGS(inode, page),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( pgoff_t, index )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->index = page->index;
),
- TP_printk("dev %s ino %lu page_index %lu",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->index)
+ TP_printk("dev %d,%d ino %lu page_index %lu",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino, __entry->index)
);
TRACE_EVENT(ext4_da_writepages,
TP_ARGS(inode, wbc),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( long, nr_to_write )
__field( long, pages_skipped )
__field( loff_t, range_start )
__field( loff_t, range_end )
-- __field( char, nonblocking )
__field( char, for_kupdate )
__field( char, for_reclaim )
__field( char, range_cyclic )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->nr_to_write = wbc->nr_to_write;
__entry->pages_skipped = wbc->pages_skipped;
__entry->range_start = wbc->range_start;
__entry->range_end = wbc->range_end;
- __entry->nonblocking = wbc->nonblocking;
__entry->for_kupdate = wbc->for_kupdate;
__entry->for_reclaim = wbc->for_reclaim;
__entry->range_cyclic = wbc->range_cyclic;
__entry->writeback_index = inode->i_mapping->writeback_index;
),
- TP_printk("dev %s ino %lu nr_to_write %ld pages_skipped %ld "
- TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld range_start %llu range_end %llu nonblocking %d for_kupdate %d for_reclaim %d range_cyclic %d writeback_index %lu",
++ TP_printk("dev %d,%d ino %lu nr_to_write %ld pages_skipped %ld "
+ "range_start %llu range_end %llu "
+ "for_kupdate %d for_reclaim %d "
+ "range_cyclic %d writeback_index %lu",
- jbd2_dev_to_name(__entry->dev),
+ __entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->nr_to_write,
__entry->pages_skipped, __entry->range_start,
- __entry->range_end, __entry->nonblocking,
+ __entry->range_end,
__entry->for_kupdate, __entry->for_reclaim,
__entry->range_cyclic,
(unsigned long) __entry->writeback_index)
TP_ARGS(inode, mpd),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( __u64, b_blocknr )
__field( __u32, b_size )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->b_blocknr = mpd->b_blocknr;
__entry->b_size = mpd->b_size;
__entry->pages_written = mpd->pages_written;
),
- TP_printk("dev %s ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %lu b_blocknr %llu b_size %u b_state 0x%04x first_page %lu io_done %d pages_written %d",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->b_blocknr, __entry->b_size,
__entry->b_state, __entry->first_page,
__entry->io_done, __entry->pages_written)
TP_ARGS(inode, wbc, ret, pages_written),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( int, ret )
__field( int, pages_written )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->ret = ret;
__entry->pages_written = pages_written;
__entry->writeback_index = inode->i_mapping->writeback_index;
),
- TP_printk("dev %s ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
- jbd2_dev_to_name(__entry->dev),
+ TP_printk("dev %d,%d ino %lu ret %d pages_written %d pages_skipped %ld more_io %d writeback_index %lu",
+ __entry->dev_major, __entry->dev_minor,
(unsigned long) __entry->ino, __entry->ret,
__entry->pages_written, __entry->pages_skipped,
__entry->more_io,
TP_ARGS(sb, blk, count),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( __u64, blk )
__field( __u64, count )
),
TP_fast_assign(
- __entry->dev = sb->s_dev;
+ __entry->dev_major = MAJOR(sb->s_dev);
+ __entry->dev_minor = MINOR(sb->s_dev);
__entry->blk = blk;
__entry->count = count;
),
- TP_printk("dev %s blk %llu count %llu",
- jbd2_dev_to_name(__entry->dev), __entry->blk, __entry->count)
+ TP_printk("dev %d,%d blk %llu count %llu",
+ __entry->dev_major, __entry->dev_minor,
+ __entry->blk, __entry->count)
);
DECLARE_EVENT_CLASS(ext4__mb_new_pa,
TP_ARGS(ac, pa),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
),
TP_fast_assign(
- __entry->dev = ac->ac_sb->s_dev;
+ __entry->dev_major = MAJOR(ac->ac_sb->s_dev);
+ __entry->dev_minor = MINOR(ac->ac_sb->s_dev);
__entry->ino = ac->ac_inode->i_ino;
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
__entry->pa_lstart = pa->pa_lstart;
),
- TP_printk("dev %s ino %lu pstart %llu len %u lstart %llu",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->pa_pstart, __entry->pa_len, __entry->pa_lstart)
+ TP_printk("dev %d,%d ino %lu pstart %llu len %u lstart %llu",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino, __entry->pa_pstart,
+ __entry->pa_len, __entry->pa_lstart)
);
DEFINE_EVENT(ext4__mb_new_pa, ext4_mb_new_inode_pa,
TRACE_EVENT(ext4_mb_release_inode_pa,
TP_PROTO(struct super_block *sb,
- struct ext4_allocation_context *ac,
+ struct inode *inode,
struct ext4_prealloc_space *pa,
unsigned long long block, unsigned int count),
- TP_ARGS(sb, ac, pa, block, count),
+ TP_ARGS(sb, inode, pa, block, count),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( __u64, block )
__field( __u32, count )
),
TP_fast_assign(
- __entry->dev = sb->s_dev;
- __entry->ino = (ac && ac->ac_inode) ?
- ac->ac_inode->i_ino : 0;
+ __entry->dev_major = MAJOR(sb->s_dev);
+ __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->ino = inode->i_ino;
__entry->block = block;
__entry->count = count;
),
- TP_printk("dev %s ino %lu block %llu count %u",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->block, __entry->count)
+ TP_printk("dev %d,%d ino %lu block %llu count %u",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino, __entry->block, __entry->count)
);
TRACE_EVENT(ext4_mb_release_group_pa,
TP_PROTO(struct super_block *sb,
- struct ext4_allocation_context *ac,
struct ext4_prealloc_space *pa),
- TP_ARGS(sb, ac, pa),
+ TP_ARGS(sb, pa),
TP_STRUCT__entry(
- __field( dev_t, dev )
- __field( ino_t, ino )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( __u64, pa_pstart )
__field( __u32, pa_len )
),
TP_fast_assign(
- __entry->dev = sb->s_dev;
- __entry->ino = (ac && ac->ac_inode) ?
- ac->ac_inode->i_ino : 0;
+ __entry->dev_major = MAJOR(sb->s_dev);
+ __entry->dev_minor = MINOR(sb->s_dev);
__entry->pa_pstart = pa->pa_pstart;
__entry->pa_len = pa->pa_len;
),
- TP_printk("dev %s pstart %llu len %u",
- jbd2_dev_to_name(__entry->dev), __entry->pa_pstart, __entry->pa_len)
+ TP_printk("dev %d,%d pstart %llu len %u",
+ __entry->dev_major, __entry->dev_minor,
+ __entry->pa_pstart, __entry->pa_len)
);
TRACE_EVENT(ext4_discard_preallocations,
TP_ARGS(inode),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
),
- TP_printk("dev %s ino %lu",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino)
+ TP_printk("dev %d,%d ino %lu",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino)
);
TRACE_EVENT(ext4_mb_discard_preallocations,
TP_ARGS(sb, needed),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( int, needed )
),
TP_fast_assign(
- __entry->dev = sb->s_dev;
+ __entry->dev_major = MAJOR(sb->s_dev);
+ __entry->dev_minor = MINOR(sb->s_dev);
__entry->needed = needed;
),
- TP_printk("dev %s needed %d",
- jbd2_dev_to_name(__entry->dev), __entry->needed)
+ TP_printk("dev %d,%d needed %d",
+ __entry->dev_major, __entry->dev_minor, __entry->needed)
);
TRACE_EVENT(ext4_request_blocks,
TP_ARGS(ar),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( unsigned int, flags )
__field( unsigned int, len )
),
TP_fast_assign(
- __entry->dev = ar->inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
__entry->ino = ar->inode->i_ino;
__entry->flags = ar->flags;
__entry->len = ar->len;
__entry->pright = ar->pright;
),
- TP_printk("dev %s ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %lu flags %u len %u lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->flags, __entry->len,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
TP_ARGS(ar, block),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( __u64, block )
__field( unsigned int, flags )
),
TP_fast_assign(
- __entry->dev = ar->inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(ar->inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(ar->inode->i_sb->s_dev);
__entry->ino = ar->inode->i_ino;
__entry->block = block;
__entry->flags = ar->flags;
__entry->pright = ar->pright;
),
- TP_printk("dev %s ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->flags, __entry->len, __entry->block,
+ TP_printk("dev %d,%d ino %lu flags %u len %u block %llu lblk %llu goal %llu lleft %llu lright %llu pleft %llu pright %llu ",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino, __entry->flags,
+ __entry->len, __entry->block,
(unsigned long long) __entry->logical,
(unsigned long long) __entry->goal,
(unsigned long long) __entry->lleft,
TP_ARGS(inode, block, count, flags),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, block )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->block = block;
__entry->flags = flags;
),
- TP_printk("dev %s ino %lu mode 0%o block %llu count %lu flags %d",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %lu mode 0%o block %llu count %lu flags %d",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->mode, __entry->block, __entry->count,
__entry->flags)
);
TP_ARGS(file, datasync),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( ino_t, parent )
__field( int, datasync )
TP_fast_assign(
struct dentry *dentry = file->f_path.dentry;
- __entry->dev = dentry->d_inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(dentry->d_inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(dentry->d_inode->i_sb->s_dev);
__entry->ino = dentry->d_inode->i_ino;
__entry->datasync = datasync;
__entry->parent = dentry->d_parent->d_inode->i_ino;
),
- TP_printk("dev %s ino %ld parent %ld datasync %d ",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %ld parent %ld datasync %d ",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
(unsigned long) __entry->parent, __entry->datasync)
);
TP_ARGS(sb, wait),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( int, wait )
),
TP_fast_assign(
- __entry->dev = sb->s_dev;
+ __entry->dev_major = MAJOR(sb->s_dev);
+ __entry->dev_minor = MINOR(sb->s_dev);
__entry->wait = wait;
),
- TP_printk("dev %s wait %d", jbd2_dev_to_name(__entry->dev),
- __entry->wait)
+ TP_printk("dev %d,%d wait %d", __entry->dev_major,
+ __entry->dev_minor, __entry->wait)
);
TRACE_EVENT(ext4_alloc_da_blocks,
TP_ARGS(inode),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( unsigned int, data_blocks )
__field( unsigned int, meta_blocks )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->data_blocks = EXT4_I(inode)->i_reserved_data_blocks;
__entry->meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
),
- TP_printk("dev %s ino %lu data_blocks %u meta_blocks %u",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %lu data_blocks %u meta_blocks %u",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->data_blocks, __entry->meta_blocks)
);
TP_ARGS(ac),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( __u16, found )
__field( __u16, groups )
),
TP_fast_assign(
- __entry->dev = ac->ac_inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev);
__entry->ino = ac->ac_inode->i_ino;
__entry->found = ac->ac_found;
__entry->flags = ac->ac_flags;
__entry->result_len = ac->ac_f_ex.fe_len;
),
- TP_printk("dev %s inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
+ TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u goal %u/%d/%u@%u "
"result %u/%d/%u@%u blks %u grps %u cr %u flags 0x%04x "
"tail %u broken %u",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->orig_group, __entry->orig_start,
__entry->orig_len, __entry->orig_logical,
__entry->goal_group, __entry->goal_start,
TP_ARGS(ac),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( __u32, orig_logical )
__field( int, orig_start )
),
TP_fast_assign(
- __entry->dev = ac->ac_inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(ac->ac_inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(ac->ac_inode->i_sb->s_dev);
__entry->ino = ac->ac_inode->i_ino;
__entry->orig_logical = ac->ac_o_ex.fe_logical;
__entry->orig_start = ac->ac_o_ex.fe_start;
__entry->result_len = ac->ac_b_ex.fe_len;
),
- TP_printk("dev %s inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d inode %lu orig %u/%d/%u@%u result %u/%d/%u@%u",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->orig_group, __entry->orig_start,
__entry->orig_len, __entry->orig_logical,
__entry->result_group, __entry->result_start,
);
DECLARE_EVENT_CLASS(ext4__mballoc,
- TP_PROTO(struct ext4_allocation_context *ac),
+ TP_PROTO(struct super_block *sb,
+ struct inode *inode,
+ ext4_group_t group,
+ ext4_grpblk_t start,
+ ext4_grpblk_t len),
- TP_ARGS(ac),
+ TP_ARGS(sb, inode, group, start, len),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
- __field( __u32, result_logical )
__field( int, result_start )
__field( __u32, result_group )
__field( int, result_len )
),
TP_fast_assign(
- __entry->dev = ac->ac_inode->i_sb->s_dev;
- __entry->ino = ac->ac_inode->i_ino;
- __entry->result_logical = ac->ac_b_ex.fe_logical;
- __entry->result_start = ac->ac_b_ex.fe_start;
- __entry->result_group = ac->ac_b_ex.fe_group;
- __entry->result_len = ac->ac_b_ex.fe_len;
+ __entry->dev_major = MAJOR(sb->s_dev);
+ __entry->dev_minor = MINOR(sb->s_dev);
+ __entry->ino = inode ? inode->i_ino : 0;
+ __entry->result_start = start;
+ __entry->result_group = group;
+ __entry->result_len = len;
),
- TP_printk("dev %s inode %lu extent %u/%d/%u@%u ",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d inode %lu extent %u/%d/%u ",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->result_group, __entry->result_start,
- __entry->result_len, __entry->result_logical)
+ __entry->result_len)
);
DEFINE_EVENT(ext4__mballoc, ext4_mballoc_discard,
- TP_PROTO(struct ext4_allocation_context *ac),
+ TP_PROTO(struct super_block *sb,
+ struct inode *inode,
+ ext4_group_t group,
+ ext4_grpblk_t start,
+ ext4_grpblk_t len),
- TP_ARGS(ac)
+ TP_ARGS(sb, inode, group, start, len)
);
DEFINE_EVENT(ext4__mballoc, ext4_mballoc_free,
- TP_PROTO(struct ext4_allocation_context *ac),
+ TP_PROTO(struct super_block *sb,
+ struct inode *inode,
+ ext4_group_t group,
+ ext4_grpblk_t start,
+ ext4_grpblk_t len),
- TP_ARGS(ac)
+ TP_ARGS(sb, inode, group, start, len)
);
TRACE_EVENT(ext4_forget,
TP_ARGS(inode, is_metadata, block),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( int, is_metadata )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->is_metadata = is_metadata;
__entry->block = block;
),
- TP_printk("dev %s ino %lu mode 0%o is_metadata %d block %llu",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->mode, __entry->is_metadata, __entry->block)
+ TP_printk("dev %d,%d ino %lu mode 0%o is_metadata %d block %llu",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino, __entry->mode,
+ __entry->is_metadata, __entry->block)
);
TRACE_EVENT(ext4_da_update_reserve_space,
TP_ARGS(inode, used_blocks),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
),
- TP_printk("dev %s ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
- __entry->mode, (unsigned long long) __entry->i_blocks,
+ TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu used_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino, __entry->mode,
+ (unsigned long long) __entry->i_blocks,
__entry->used_blocks, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
);
TP_ARGS(inode, md_needed),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->reserved_meta_blocks = EXT4_I(inode)->i_reserved_meta_blocks;
),
- TP_printk("dev %s ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu md_needed %d reserved_data_blocks %d reserved_meta_blocks %d",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->md_needed, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks)
TP_ARGS(inode, freed_blocks),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( ino_t, ino )
__field( umode_t, mode )
__field( __u64, i_blocks )
),
TP_fast_assign(
- __entry->dev = inode->i_sb->s_dev;
+ __entry->dev_major = MAJOR(inode->i_sb->s_dev);
+ __entry->dev_minor = MINOR(inode->i_sb->s_dev);
__entry->ino = inode->i_ino;
__entry->mode = inode->i_mode;
__entry->i_blocks = inode->i_blocks;
__entry->allocated_meta_blocks = EXT4_I(inode)->i_allocated_meta_blocks;
),
- TP_printk("dev %s ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
- jbd2_dev_to_name(__entry->dev), (unsigned long) __entry->ino,
+ TP_printk("dev %d,%d ino %lu mode 0%o i_blocks %llu freed_blocks %d reserved_data_blocks %d reserved_meta_blocks %d allocated_meta_blocks %d",
+ __entry->dev_major, __entry->dev_minor,
+ (unsigned long) __entry->ino,
__entry->mode, (unsigned long long) __entry->i_blocks,
__entry->freed_blocks, __entry->reserved_data_blocks,
__entry->reserved_meta_blocks, __entry->allocated_meta_blocks)
TP_ARGS(sb, group),
TP_STRUCT__entry(
- __field( dev_t, dev )
+ __field( int, dev_major )
+ __field( int, dev_minor )
__field( __u32, group )
),
TP_fast_assign(
- __entry->dev = sb->s_dev;
+ __entry->dev_major = MAJOR(sb->s_dev);
+ __entry->dev_minor = MINOR(sb->s_dev);
__entry->group = group;
),
- TP_printk("dev %s group %u",
- jbd2_dev_to_name(__entry->dev), __entry->group)
+ TP_printk("dev %d,%d group %u",
+ __entry->dev_major, __entry->dev_minor, __entry->group)
);
DEFINE_EVENT(ext4__bitmap_load, ext4_mb_bitmap_load,