[net-next-2.6.git] / fs / jfs / jfs_metapage.c

/*
 *   Copyright (C) International Business Machines Corp., 2000-2005
 *   Portions Copyright (C) Christoph Hellwig, 2001-2002
 *
 *   This program is free software;  you can redistribute it and/or modify
 *   it under the terms of the GNU General Public License as published by
 *   the Free Software Foundation; either version 2 of the License, or
 *   (at your option) any later version.
 *
 *   This program is distributed in the hope that it will be useful,
 *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
 *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
 *   the GNU General Public License for more details.
 *
 *   You should have received a copy of the GNU General Public License
 *   along with this program;  if not, write to the Free Software
 *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
 */

#include <linux/fs.h>
#include <linux/mm.h>
#include <linux/module.h>
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/init.h>
#include <linux/buffer_head.h>
#include <linux/mempool.h>
#include <linux/seq_file.h>
#include "jfs_incore.h"
#include "jfs_superblock.h"
#include "jfs_filsys.h"
#include "jfs_metapage.h"
#include "jfs_txnmgr.h"
#include "jfs_debug.h"

#ifdef CONFIG_JFS_STATISTICS
static struct {
	uint	pagealloc;	/* # of page allocations */
	uint	pagefree;	/* # of page frees */
	uint	lockwait;	/* # of sleeping lock_metapage() calls */
} mpStat;
#endif

#define metapage_locked(mp) test_bit(META_locked, &(mp)->flag)
#define trylock_metapage(mp) test_and_set_bit_lock(META_locked, &(mp)->flag)

static inline void unlock_metapage(struct metapage *mp)
{
	clear_bit_unlock(META_locked, &mp->flag);
	wake_up(&mp->wait);
}

static inline void __lock_metapage(struct metapage *mp)
{
	DECLARE_WAITQUEUE(wait, current);
	INCREMENT(mpStat.lockwait);
	add_wait_queue_exclusive(&mp->wait, &wait);
	do {
		set_current_state(TASK_UNINTERRUPTIBLE);
		if (metapage_locked(mp)) {
			unlock_page(mp->page);
			io_schedule();
			lock_page(mp->page);
		}
	} while (trylock_metapage(mp));
	__set_current_state(TASK_RUNNING);
	remove_wait_queue(&mp->wait, &wait);
}

/*
 * Must have mp->page locked
 */
static inline void lock_metapage(struct metapage *mp)
{
	if (trylock_metapage(mp))
		__lock_metapage(mp);
}

#define METAPOOL_MIN_PAGES 32
static struct kmem_cache *metapage_cache;
static mempool_t *metapage_mempool;

#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)

#if MPS_PER_PAGE > 1

struct meta_anchor {
	int mp_count;
	atomic_t io_count;
	struct metapage *mp[MPS_PER_PAGE];
};
#define mp_anchor(page) ((struct meta_anchor *)page_private(page))

static inline struct metapage *page_to_mp(struct page *page, int offset)
{
	if (!PagePrivate(page))
		return NULL;
	return mp_anchor(page)->mp[offset >> L2PSIZE];
}

static inline int insert_metapage(struct page *page, struct metapage *mp)
{
	struct meta_anchor *a;
	int index;
	int l2mp_blocks;	/* log2 blocks per metapage */

	if (PagePrivate(page))
		a = mp_anchor(page);
	else {
		a = kzalloc(sizeof(struct meta_anchor), GFP_NOFS);
		if (!a)
			return -ENOMEM;
		set_page_private(page, (unsigned long)a);
		SetPagePrivate(page);
		kmap(page);
	}

	if (mp) {
		l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
		index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);
		a->mp_count++;
		a->mp[index] = mp;
	}

	return 0;
}

static inline void remove_metapage(struct page *page, struct metapage *mp)
{
	struct meta_anchor *a = mp_anchor(page);
	int l2mp_blocks = L2PSIZE - page->mapping->host->i_blkbits;
	int index;

	index = (mp->index >> l2mp_blocks) & (MPS_PER_PAGE - 1);

	BUG_ON(a->mp[index] != mp);

	a->mp[index] = NULL;
	if (--a->mp_count == 0) {
		kfree(a);
		set_page_private(page, 0);
		ClearPagePrivate(page);
		kunmap(page);
	}
}

static inline void inc_io(struct page *page)
{
	atomic_inc(&mp_anchor(page)->io_count);
}

static inline void dec_io(struct page *page, void (*handler) (struct page *))
{
	if (atomic_dec_and_test(&mp_anchor(page)->io_count))
		handler(page);
}

#else
static inline struct metapage *page_to_mp(struct page *page, int offset)
{
	return PagePrivate(page) ? (struct metapage *)page_private(page) : NULL;
}

static inline int insert_metapage(struct page *page, struct metapage *mp)
{
	if (mp) {
		set_page_private(page, (unsigned long)mp);
		SetPagePrivate(page);
		kmap(page);
	}
	return 0;
}

static inline void remove_metapage(struct page *page, struct metapage *mp)
{
	set_page_private(page, 0);
	ClearPagePrivate(page);
	kunmap(page);
}

#define inc_io(page) do {} while(0)
#define dec_io(page, handler) handler(page)

#endif

static void init_once(void *foo)
{
	struct metapage *mp = (struct metapage *)foo;

	mp->lid = 0;
	mp->lsn = 0;
	mp->flag = 0;
	mp->data = NULL;
	mp->clsn = 0;
	mp->log = NULL;
	set_bit(META_free, &mp->flag);
	init_waitqueue_head(&mp->wait);
}

static inline struct metapage *alloc_metapage(gfp_t gfp_mask)
{
	return mempool_alloc(metapage_mempool, gfp_mask);
}

static inline void free_metapage(struct metapage *mp)
{
	mp->flag = 0;
	set_bit(META_free, &mp->flag);

	mempool_free(mp, metapage_mempool);
}

int __init metapage_init(void)
{
	/*
	 * Allocate the metapage structures
	 */
	metapage_cache = kmem_cache_create("jfs_mp", sizeof(struct metapage),
					   0, 0, init_once);
	if (metapage_cache == NULL)
		return -ENOMEM;

	metapage_mempool = mempool_create_slab_pool(METAPOOL_MIN_PAGES,
						    metapage_cache);

	if (metapage_mempool == NULL) {
		kmem_cache_destroy(metapage_cache);
		return -ENOMEM;
	}

	return 0;
}

void metapage_exit(void)
{
	mempool_destroy(metapage_mempool);
	kmem_cache_destroy(metapage_cache);
}

static inline void drop_metapage(struct page *page, struct metapage *mp)
{
	if (mp->count || mp->nohomeok || test_bit(META_dirty, &mp->flag) ||
	    test_bit(META_io, &mp->flag))
		return;
	remove_metapage(page, mp);
	INCREMENT(mpStat.pagefree);
	free_metapage(mp);
}

/*
 * Metapage address space operations
 */

static sector_t metapage_get_blocks(struct inode *inode, sector_t lblock,
				    int *len)
{
	int rc = 0;
	int xflag;
	s64 xaddr;
	sector_t file_blocks = (inode->i_size + inode->i_sb->s_blocksize - 1) >>
			       inode->i_blkbits;

	if (lblock >= file_blocks)
		return 0;
	if (lblock + *len > file_blocks)
		*len = file_blocks - lblock;

	if (inode->i_ino) {
		rc = xtLookup(inode, (s64)lblock, *len, &xflag, &xaddr, len, 0);
		if ((rc == 0) && *len)
			lblock = (sector_t)xaddr;
		else
			lblock = 0;
	} /* else no mapping */

	return lblock;
}

static void last_read_complete(struct page *page)
{
	if (!PageError(page))
		SetPageUptodate(page);
	unlock_page(page);
}

static void metapage_read_end_io(struct bio *bio, int err)
{
	struct page *page = bio->bi_private;

	if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
		printk(KERN_ERR "metapage_read_end_io: I/O error\n");
		SetPageError(page);
	}

	dec_io(page, last_read_complete);
	bio_put(bio);
}

static void remove_from_logsync(struct metapage *mp)
{
	struct jfs_log *log = mp->log;
	unsigned long flags;
/*
 * This can race.  Recheck that log hasn't been set to null, and after
 * acquiring logsync lock, recheck lsn
 */
	if (!log)
		return;

	LOGSYNC_LOCK(log, flags);
	if (mp->lsn) {
		mp->log = NULL;
		mp->lsn = 0;
		mp->clsn = 0;
		log->count--;
		list_del(&mp->synclist);
	}
	LOGSYNC_UNLOCK(log, flags);
}

static void last_write_complete(struct page *page)
{
	struct metapage *mp;
	unsigned int offset;

	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
		mp = page_to_mp(page, offset);
		if (mp && test_bit(META_io, &mp->flag)) {
			if (mp->lsn)
				remove_from_logsync(mp);
			clear_bit(META_io, &mp->flag);
		}
		/*
		 * I'd like to call drop_metapage here, but I don't think it's
		 * safe unless I have the page locked
		 */
	}
	end_page_writeback(page);
}

static void metapage_write_end_io(struct bio *bio, int err)
{
	struct page *page = bio->bi_private;

	BUG_ON(!PagePrivate(page));

	if (! test_bit(BIO_UPTODATE, &bio->bi_flags)) {
		printk(KERN_ERR "metapage_write_end_io: I/O error\n");
		SetPageError(page);
	}
	dec_io(page, last_write_complete);
	bio_put(bio);
}

static int metapage_writepage(struct page *page, struct writeback_control *wbc)
{
	struct bio *bio = NULL;
	int block_offset;	/* block offset of mp within page */
	struct inode *inode = page->mapping->host;
	int blocks_per_mp = JFS_SBI(inode->i_sb)->nbperpage;
	int len;
	int xlen;
	struct metapage *mp;
	int redirty = 0;
	sector_t lblock;
	int nr_underway = 0;
	sector_t pblock;
	sector_t next_block = 0;
	sector_t page_start;
	unsigned long bio_bytes = 0;
	unsigned long bio_offset = 0;
	int offset;
	int bad_blocks = 0;

	page_start = (sector_t)page->index <<
		     (PAGE_CACHE_SHIFT - inode->i_blkbits);
	BUG_ON(!PageLocked(page));
	BUG_ON(PageWriteback(page));
	set_page_writeback(page);

	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
		mp = page_to_mp(page, offset);

		if (!mp || !test_bit(META_dirty, &mp->flag))
			continue;

		if (mp->nohomeok && !test_bit(META_forcewrite, &mp->flag)) {
			redirty = 1;
			/*
			 * Make sure this page isn't blocked indefinitely.
			 * If the journal isn't undergoing I/O, push it
			 */
			if (mp->log && !(mp->log->cflag & logGC_PAGEOUT))
				jfs_flush_journal(mp->log, 0);
			continue;
		}

		clear_bit(META_dirty, &mp->flag);
		set_bit(META_io, &mp->flag);
		block_offset = offset >> inode->i_blkbits;
		lblock = page_start + block_offset;
		if (bio) {
			if (xlen && lblock == next_block) {
				/* Contiguous, in memory & on disk */
				len = min(xlen, blocks_per_mp);
				xlen -= len;
				bio_bytes += len << inode->i_blkbits;
				continue;
			}
			/* Not contiguous */
			if (bio_add_page(bio, page, bio_bytes, bio_offset) <
			    bio_bytes)
				goto add_failed;
			/*
			 * Increment counter before submitting i/o to keep
			 * count from hitting zero before we're through
			 */
			inc_io(page);
			if (!bio->bi_size)
				goto dump_bio;
			submit_bio(WRITE, bio);
			nr_underway++;
			bio = NULL;
		} else
			inc_io(page);
		xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
		pblock = metapage_get_blocks(inode, lblock, &xlen);
		if (!pblock) {
			printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
			/*
			 * We already called inc_io(), but can't cancel it
			 * with dec_io() until we're done with the page
			 */
			bad_blocks++;
			continue;
		}
		len = min(xlen, (int)JFS_SBI(inode->i_sb)->nbperpage);

		bio = bio_alloc(GFP_NOFS, 1);
		bio->bi_bdev = inode->i_sb->s_bdev;
		bio->bi_sector = pblock << (inode->i_blkbits - 9);
		bio->bi_end_io = metapage_write_end_io;
		bio->bi_private = page;

		/* Don't call bio_add_page yet, we may add to this vec */
		bio_offset = offset;
		bio_bytes = len << inode->i_blkbits;

		xlen -= len;
		next_block = lblock + len;
	}
	if (bio) {
		if (bio_add_page(bio, page, bio_bytes, bio_offset) < bio_bytes)
				goto add_failed;
		if (!bio->bi_size)
			goto dump_bio;

		submit_bio(WRITE, bio);
		nr_underway++;
	}
	if (redirty)
		redirty_page_for_writepage(wbc, page);

	unlock_page(page);

	if (bad_blocks)
		goto err_out;

	if (nr_underway == 0)
		end_page_writeback(page);

	return 0;
add_failed:
	/* We should never reach here, since we're only adding one vec */
	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
	goto skip;
dump_bio:
	print_hex_dump(KERN_ERR, "JFS: dump of bio: ", DUMP_PREFIX_ADDRESS, 16,
		       4, bio, sizeof(*bio), 0);
skip:
	bio_put(bio);
	unlock_page(page);
	dec_io(page, last_write_complete);
err_out:
	while (bad_blocks--)
		dec_io(page, last_write_complete);
	return -EIO;
}

static int metapage_readpage(struct file *fp, struct page *page)
{
	struct inode *inode = page->mapping->host;
	struct bio *bio = NULL;
	int block_offset;
	int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
	sector_t page_start;	/* address of page in fs blocks */
	sector_t pblock;
	int xlen;
	unsigned int len;
	int offset;

	BUG_ON(!PageLocked(page));
	page_start = (sector_t)page->index <<
		     (PAGE_CACHE_SHIFT - inode->i_blkbits);

	block_offset = 0;
	while (block_offset < blocks_per_page) {
		xlen = blocks_per_page - block_offset;
		pblock = metapage_get_blocks(inode, page_start + block_offset,
					     &xlen);
		if (pblock) {
			if (!PagePrivate(page))
				insert_metapage(page, NULL);
			inc_io(page);
			if (bio)
				submit_bio(READ, bio);

			bio = bio_alloc(GFP_NOFS, 1);
			bio->bi_bdev = inode->i_sb->s_bdev;
			bio->bi_sector = pblock << (inode->i_blkbits - 9);
			bio->bi_end_io = metapage_read_end_io;
			bio->bi_private = page;
			len = xlen << inode->i_blkbits;
			offset = block_offset << inode->i_blkbits;
			if (bio_add_page(bio, page, len, offset) < len)
				goto add_failed;
			block_offset += xlen;
		} else
			block_offset++;
	}
	if (bio)
		submit_bio(READ, bio);
	else
		unlock_page(page);

	return 0;

add_failed:
	printk(KERN_ERR "JFS: bio_add_page failed unexpectedly\n");
	bio_put(bio);
	dec_io(page, last_read_complete);
	return -EIO;
}

static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
{
	struct metapage *mp;
	int ret = 1;
	int offset;

	for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
		mp = page_to_mp(page, offset);

		if (!mp)
			continue;

		jfs_info("metapage_releasepage: mp = 0x%p", mp);
		if (mp->count || mp->nohomeok ||
		    test_bit(META_dirty, &mp->flag)) {
			jfs_info("count = %ld, nohomeok = %d", mp->count,
				 mp->nohomeok);
			ret = 0;
			continue;
		}
		if (mp->lsn)
			remove_from_logsync(mp);
		remove_metapage(page, mp);
		INCREMENT(mpStat.pagefree);
		free_metapage(mp);
	}
	return ret;
}

static void metapage_invalidatepage(struct page *page, unsigned long offset)
{
	BUG_ON(offset);

	BUG_ON(PageWriteback(page));

	metapage_releasepage(page, 0);
}

const struct address_space_operations jfs_metapage_aops = {
	.readpage	= metapage_readpage,
	.writepage	= metapage_writepage,
	.sync_page	= block_sync_page,
	.releasepage	= metapage_releasepage,
	.invalidatepage	= metapage_invalidatepage,
	.set_page_dirty	= __set_page_dirty_nobuffers,
};

struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
				unsigned int size, int absolute,
				unsigned long new)
{
	int l2BlocksPerPage;
	int l2bsize;
	struct address_space *mapping;
	struct metapage *mp = NULL;
	struct page *page;
	unsigned long page_index;
	unsigned long page_offset;

	jfs_info("__get_metapage: ino = %ld, lblock = 0x%lx, abs=%d",
		 inode->i_ino, lblock, absolute);

	l2bsize = inode->i_blkbits;
	l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
	page_index = lblock >> l2BlocksPerPage;
	page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
	if ((page_offset + size) > PAGE_CACHE_SIZE) {
		jfs_err("MetaData crosses page boundary!!");
		jfs_err("lblock = %lx, size  = %d", lblock, size);
		dump_stack();
		return NULL;
	}
	if (absolute)
		mapping = JFS_SBI(inode->i_sb)->direct_inode->i_mapping;
	else {
		/*
		 * If an nfs client tries to read an inode that is larger
		 * than any existing inodes, we may try to read past the
		 * end of the inode map
		 */
		if ((lblock << inode->i_blkbits) >= inode->i_size)
			return NULL;
		mapping = inode->i_mapping;
	}

	if (new && (PSIZE == PAGE_CACHE_SIZE)) {
		page = grab_cache_page(mapping, page_index);
		if (!page) {
			jfs_err("grab_cache_page failed!");
			return NULL;
		}
		SetPageUptodate(page);
	} else {
		page = read_mapping_page(mapping, page_index, NULL);
		if (IS_ERR(page) || !PageUptodate(page)) {
			jfs_err("read_mapping_page failed!");
			return NULL;
		}
		lock_page(page);
	}

	mp = page_to_mp(page, page_offset);
	if (mp) {
		if (mp->logical_size != size) {
			jfs_error(inode->i_sb,
				  "__get_metapage: mp->logical_size != size");
			jfs_err("logical_size = %d, size = %d",
				mp->logical_size, size);
			dump_stack();
			goto unlock;
		}
		mp->count++;
		lock_metapage(mp);
		if (test_bit(META_discard, &mp->flag)) {
			if (!new) {
				jfs_error(inode->i_sb,
					  "__get_metapage: using a "
					  "discarded metapage");
				discard_metapage(mp);
				goto unlock;
			}
			clear_bit(META_discard, &mp->flag);
		}
	} else {
		INCREMENT(mpStat.pagealloc);
		mp = alloc_metapage(GFP_NOFS);
		mp->page = page;
		mp->flag = 0;
		mp->xflag = COMMIT_PAGE;
		mp->count = 1;
		mp->nohomeok = 0;
		mp->logical_size = size;
		mp->data = page_address(page) + page_offset;
		mp->index = lblock;
		if (unlikely(insert_metapage(page, mp))) {
			free_metapage(mp);
			goto unlock;
		}
		lock_metapage(mp);
	}

	if (new) {
		jfs_info("zeroing mp = 0x%p", mp);
		memset(mp->data, 0, PSIZE);
	}

	unlock_page(page);
	jfs_info("__get_metapage: returning = 0x%p data = 0x%p", mp, mp->data);
	return mp;

unlock:
	unlock_page(page);
	return NULL;
}

void grab_metapage(struct metapage * mp)
{
	jfs_info("grab_metapage: mp = 0x%p", mp);
	page_cache_get(mp->page);
	lock_page(mp->page);
	mp->count++;
	lock_metapage(mp);
	unlock_page(mp->page);
}

void force_metapage(struct metapage *mp)
{
	struct page *page = mp->page;
	jfs_info("force_metapage: mp = 0x%p", mp);
	set_bit(META_forcewrite, &mp->flag);
	clear_bit(META_sync, &mp->flag);
	page_cache_get(page);
	lock_page(page);
	set_page_dirty(page);
	write_one_page(page, 1);
	clear_bit(META_forcewrite, &mp->flag);
	page_cache_release(page);
}

void hold_metapage(struct metapage *mp)
{
	lock_page(mp->page);
}

void put_metapage(struct metapage *mp)
{
	if (mp->count || mp->nohomeok) {
		/* Someone else will release this */
		unlock_page(mp->page);
		return;
	}
	page_cache_get(mp->page);
	mp->count++;
	lock_metapage(mp);
	unlock_page(mp->page);
	release_metapage(mp);
}

void release_metapage(struct metapage * mp)
{
	struct page *page = mp->page;
	jfs_info("release_metapage: mp = 0x%p, flag = 0x%lx", mp, mp->flag);

	BUG_ON(!page);

	lock_page(page);
	unlock_metapage(mp);

	assert(mp->count);
	if (--mp->count || mp->nohomeok) {
		unlock_page(page);
		page_cache_release(page);
		return;
	}

	if (test_bit(META_dirty, &mp->flag)) {
		set_page_dirty(page);
		if (test_bit(META_sync, &mp->flag)) {
			clear_bit(META_sync, &mp->flag);
			write_one_page(page, 1);
			lock_page(page); /* write_one_page unlocks the page */
		}
	} else if (mp->lsn)	/* discard_metapage doesn't remove it */
		remove_from_logsync(mp);

	/* Try to keep metapages from using up too much memory */
	drop_metapage(page, mp);

	unlock_page(page);
	page_cache_release(page);
}

void __invalidate_metapages(struct inode *ip, s64 addr, int len)
{
	sector_t lblock;
	int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
	int BlocksPerPage = 1 << l2BlocksPerPage;
	/* All callers are interested in block device's mapping */
	struct address_space *mapping =
		JFS_SBI(ip->i_sb)->direct_inode->i_mapping;
	struct metapage *mp;
	struct page *page;
	unsigned int offset;

	/*
	 * Mark metapages to discard.  They will eventually be
	 * released, but should not be written.
	 */
	for (lblock = addr & ~(BlocksPerPage - 1); lblock < addr + len;
	     lblock += BlocksPerPage) {
		page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
		if (!page)
			continue;
		for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
			mp = page_to_mp(page, offset);
			if (!mp)
				continue;
			if (mp->index < addr)
				continue;
			if (mp->index >= addr + len)
				break;

			clear_bit(META_dirty, &mp->flag);
			set_bit(META_discard, &mp->flag);
			if (mp->lsn)
				remove_from_logsync(mp);
		}
		unlock_page(page);
		page_cache_release(page);
	}
}

#ifdef CONFIG_JFS_STATISTICS
static int jfs_mpstat_proc_show(struct seq_file *m, void *v)
{
	seq_printf(m,
		       "JFS Metapage statistics\n"
		       "=======================\n"
		       "page allocations = %d\n"
		       "page frees = %d\n"
		       "lock waits = %d\n",
		       mpStat.pagealloc,
		       mpStat.pagefree,
		       mpStat.lockwait);
	return 0;
}

static int jfs_mpstat_proc_open(struct inode *inode, struct file *file)
{
	return single_open(file, jfs_mpstat_proc_show, NULL);
}

const struct file_operations jfs_mpstat_proc_fops = {
	.owner		= THIS_MODULE,
	.open		= jfs_mpstat_proc_open,
	.read		= seq_read,
	.llseek		= seq_lseek,
	.release	= single_release,
};
#endif