[net-next-2.6.git] / fs / ocfs2 / buffer_head_io.c

/* -*- mode: c; c-basic-offset: 8; -*-
 * vim: noexpandtab sw=8 ts=8 sts=0:
 *
 * io.c
 *
 * Buffer cache handling
 *
 * Copyright (C) 2002, 2004 Oracle.  All rights reserved.
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public
 * License along with this program; if not, write to the
 * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 021110-1307, USA.
 */

#include <linux/fs.h>
#include <linux/types.h>
#include <linux/slab.h>
#include <linux/highmem.h>

#include <cluster/masklog.h>

#include "ocfs2.h"

#include "alloc.h"
#include "inode.h"
#include "journal.h"
#include "uptodate.h"

#include "buffer_head_io.h"

int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh,
		      struct inode *inode)
{
	int ret = 0;

	mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n",
		   (unsigned long long)bh->b_blocknr, inode);

	BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
	BUG_ON(buffer_jbd(bh));

	/* No need to check for a soft readonly file system here. non
	 * journalled writes are only ever done on system files which
	 * can get modified during recovery even if read-only. */
	if (ocfs2_is_hard_readonly(osb)) {
		ret = -EROFS;
		goto out;
	}

	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);

	lock_buffer(bh);
	set_buffer_uptodate(bh);

	/* remove from dirty list before I/O. */
	clear_buffer_dirty(bh);

	get_bh(bh); /* for end_buffer_write_sync() */
	bh->b_end_io = end_buffer_write_sync;
	submit_bh(WRITE, bh);

	wait_on_buffer(bh);

	if (buffer_uptodate(bh)) {
		ocfs2_set_buffer_uptodate(inode, bh);
	} else {
		/* We don't need to remove the clustered uptodate
		 * information for this bh as it's not marked locally
		 * uptodate. */
		ret = -EIO;
		put_bh(bh);
	}

	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
out:
	mlog_exit(ret);
	return ret;
}

int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
			   unsigned int nr, struct buffer_head *bhs[])
{
	int status = 0;
	unsigned int i;
	struct buffer_head *bh;

	if (!nr) {
		mlog(ML_BH_IO, "No buffers will be read!\n");
		goto bail;
	}

	for (i = 0 ; i < nr ; i++) {
		if (bhs[i] == NULL) {
			bhs[i] = sb_getblk(osb->sb, block++);
			if (bhs[i] == NULL) {
				status = -EIO;
				mlog_errno(status);
				goto bail;
			}
		}
		bh = bhs[i];

		if (buffer_jbd(bh)) {
			mlog(ML_ERROR,
			     "trying to sync read a jbd "
			     "managed bh (blocknr = %llu), skipping\n",
			     (unsigned long long)bh->b_blocknr);
			continue;
		}

		if (buffer_dirty(bh)) {
			/* This should probably be a BUG, or
			 * at least return an error. */
			mlog(ML_ERROR,
			     "trying to sync read a dirty "
			     "buffer! (blocknr = %llu), skipping\n",
			     (unsigned long long)bh->b_blocknr);
			continue;
		}

		lock_buffer(bh);
		if (buffer_jbd(bh)) {
			mlog(ML_ERROR,
			     "block %llu had the JBD bit set "
			     "while I was in lock_buffer!",
			     (unsigned long long)bh->b_blocknr);
			BUG();
		}

		clear_buffer_uptodate(bh);
		get_bh(bh); /* for end_buffer_read_sync() */
		bh->b_end_io = end_buffer_read_sync;
		submit_bh(READ, bh);
	}

	for (i = nr; i > 0; i--) {
		bh = bhs[i - 1];

		if (buffer_jbd(bh)) {
			mlog(ML_ERROR,
			     "the journal got the buffer while it was "
			     "locked for io! (blocknr = %llu)\n",
			     (unsigned long long)bh->b_blocknr);
			BUG();
		}

		wait_on_buffer(bh);
		if (!buffer_uptodate(bh)) {
			/* Status won't be cleared from here on out,
			 * so we can safely record this and loop back
			 * to cleanup the other buffers. */
			status = -EIO;
			put_bh(bh);
			bhs[i - 1] = NULL;
		}
	}

bail:
	return status;
}

int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
		      struct buffer_head *bhs[], int flags)
{
	int status = 0;
	int i, ignore_cache = 0;
	struct buffer_head *bh;

	mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
		   inode, (unsigned long long)block, nr, flags);

	BUG_ON(!inode);
	BUG_ON((flags & OCFS2_BH_READAHEAD) && !(flags & OCFS2_BH_CACHED));

	if (bhs == NULL) {
		status = -EINVAL;
		mlog_errno(status);
		goto bail;
	}

	if (nr < 0) {
		mlog(ML_ERROR, "asked to read %d blocks!\n", nr);
		status = -EINVAL;
		mlog_errno(status);
		goto bail;
	}

	if (nr == 0) {
		mlog(ML_BH_IO, "No buffers will be read!\n");
		status = 0;
		goto bail;
	}

	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
	for (i = 0 ; i < nr ; i++) {
		if (bhs[i] == NULL) {
			bhs[i] = sb_getblk(inode->i_sb, block++);
			if (bhs[i] == NULL) {
				mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
				status = -EIO;
				mlog_errno(status);
				goto bail;
			}
		}
		bh = bhs[i];
		ignore_cache = 0;

		/* There are three read-ahead cases here which we need to
		 * be concerned with. All three assume a buffer has
		 * previously been submitted with OCFS2_BH_READAHEAD
		 * and it hasn't yet completed I/O.
		 *
		 * 1) The current request is sync to disk. This rarely
		 *    happens these days, and never when performance
		 *    matters - the code can just wait on the buffer
		 *    lock and re-submit.
		 *
		 * 2) The current request is cached, but not
		 *    readahead. ocfs2_buffer_uptodate() will return
		 *    false anyway, so we'll wind up waiting on the
		 *    buffer lock to do I/O. We re-check the request
		 *    with after getting the lock to avoid a re-submit.
		 *
		 * 3) The current request is readahead (and so must
		 *    also be a caching one). We short circuit if the
		 *    buffer is locked (under I/O) and if it's in the
		 *    uptodate cache. The re-check from #2 catches the
		 *    case that the previous read-ahead completes just
		 *    before our is-it-in-flight check.
		 */

		if (flags & OCFS2_BH_CACHED &&
		    !ocfs2_buffer_uptodate(inode, bh)) {
			mlog(ML_UPTODATE,
			     "bh (%llu), inode %llu not uptodate\n",
			     (unsigned long long)bh->b_blocknr,
			     (unsigned long long)OCFS2_I(inode)->ip_blkno);
			ignore_cache = 1;
		}

		/* XXX: Can we ever get this and *not* have the cached
		 * flag set? */
		if (buffer_jbd(bh)) {
			if (!(flags & OCFS2_BH_CACHED) || ignore_cache)
				mlog(ML_BH_IO, "trying to sync read a jbd "
					       "managed bh (blocknr = %llu)\n",
				     (unsigned long long)bh->b_blocknr);
			continue;
		}

		if (!(flags & OCFS2_BH_CACHED) || ignore_cache) {
			if (buffer_dirty(bh)) {
				/* This should probably be a BUG, or
				 * at least return an error. */
				mlog(ML_BH_IO, "asking me to sync read a dirty "
					       "buffer! (blocknr = %llu)\n",
				     (unsigned long long)bh->b_blocknr);
				continue;
			}

			/* A read-ahead request was made - if the
			 * buffer is already under read-ahead from a
			 * previously submitted request than we are
			 * done here. */
			if ((flags & OCFS2_BH_READAHEAD)
			    && ocfs2_buffer_read_ahead(inode, bh))
				continue;

			lock_buffer(bh);
			if (buffer_jbd(bh)) {
#ifdef CATCH_BH_JBD_RACES
				mlog(ML_ERROR, "block %llu had the JBD bit set "
					       "while I was in lock_buffer!",
				     (unsigned long long)bh->b_blocknr);
				BUG();
#else
				unlock_buffer(bh);
				continue;
#endif
			}

			/* Re-check ocfs2_buffer_uptodate() as a
			 * previously read-ahead buffer may have
			 * completed I/O while we were waiting for the
			 * buffer lock. */
			if ((flags & OCFS2_BH_CACHED)
			    && !(flags & OCFS2_BH_READAHEAD)
			    && ocfs2_buffer_uptodate(inode, bh)) {
				unlock_buffer(bh);
				continue;
			}

			clear_buffer_uptodate(bh);
			get_bh(bh); /* for end_buffer_read_sync() */
			bh->b_end_io = end_buffer_read_sync;
			submit_bh(READ, bh);
			continue;
		}
	}

	status = 0;

	for (i = (nr - 1); i >= 0; i--) {
		bh = bhs[i];

		if (!(flags & OCFS2_BH_READAHEAD)) {
			/* We know this can't have changed as we hold the
			 * inode sem. Avoid doing any work on the bh if the
			 * journal has it. */
			if (!buffer_jbd(bh))
				wait_on_buffer(bh);

			if (!buffer_uptodate(bh)) {
				/* Status won't be cleared from here on out,
				 * so we can safely record this and loop back
				 * to cleanup the other buffers. Don't need to
				 * remove the clustered uptodate information
				 * for this bh as it's not marked locally
				 * uptodate. */
				status = -EIO;
				put_bh(bh);
				bhs[i] = NULL;
				continue;
			}
		}

		/* Always set the buffer in the cache, even if it was
		 * a forced read, or read-ahead which hasn't yet
		 * completed. */
		ocfs2_set_buffer_uptodate(inode, bh);
	}
	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);

	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n", 
	     (unsigned long long)block, nr,
	     (!(flags & OCFS2_BH_CACHED) || ignore_cache) ? "no" : "yes", flags);

bail:

	mlog_exit(status);
	return status;
}

/* Check whether the blkno is the super block or one of the backups. */
static void ocfs2_check_super_or_backup(struct super_block *sb,
					sector_t blkno)
{
	int i;
	u64 backup_blkno;

	if (blkno == OCFS2_SUPER_BLOCK_BLKNO)
		return;

	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
		backup_blkno = ocfs2_backup_super_blkno(sb, i);
		if (backup_blkno == blkno)
			return;
	}

	BUG();
}

/*
 * Write super block and backups doesn't need to collaborate with journal,
 * so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
 * into this function.
 */
int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
				struct buffer_head *bh)
{
	int ret = 0;

	mlog_entry_void();

	BUG_ON(buffer_jbd(bh));
	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);

	if (ocfs2_is_hard_readonly(osb) || ocfs2_is_soft_readonly(osb)) {
		ret = -EROFS;
		goto out;
	}

	lock_buffer(bh);
	set_buffer_uptodate(bh);

	/* remove from dirty list before I/O. */
	clear_buffer_dirty(bh);

	get_bh(bh); /* for end_buffer_write_sync() */
	bh->b_end_io = end_buffer_write_sync;
	submit_bh(WRITE, bh);

	wait_on_buffer(bh);

	if (!buffer_uptodate(bh)) {
		ret = -EIO;
		put_bh(bh);
	}

out:
	mlog_exit(ret);
	return ret;
}
Commit	Line	Data
ccd979bd MF	1	/* -- mode: c; c-basic-offset: 8; --
	2	* vim: noexpandtab sw=8 ts=8 sts=0:
	3	*
	4	* io.c
	5	*
	6	* Buffer cache handling
	7	*
	8	* Copyright (C) 2002, 2004 Oracle. All rights reserved.
	9	*
	10	* This program is free software; you can redistribute it and/or
	11	* modify it under the terms of the GNU General Public
	12	* License as published by the Free Software Foundation; either
	13	* version 2 of the License, or (at your option) any later version.
	14	*
	15	* This program is distributed in the hope that it will be useful,
	16	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	17	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	18	* General Public License for more details.
	19	*
	20	* You should have received a copy of the GNU General Public
	21	* License along with this program; if not, write to the
	22	* Free Software Foundation, Inc., 59 Temple Place - Suite 330,
	23	* Boston, MA 021110-1307, USA.
	24	*/
	25
	26	#include <linux/fs.h>
	27	#include <linux/types.h>
	28	#include <linux/slab.h>
	29	#include <linux/highmem.h>
	30
	31	#include <cluster/masklog.h>
	32
	33	#include "ocfs2.h"
	34
	35	#include "alloc.h"
	36	#include "inode.h"
	37	#include "journal.h"
	38	#include "uptodate.h"
	39
	40	#include "buffer_head_io.h"
	41
	42	int ocfs2_write_block(struct ocfs2_super osb, struct buffer_head bh,
	43	struct inode *inode)
	44	{
	45	int ret = 0;
	46
	47	mlog_entry("(bh->b_blocknr = %llu, inode=%p)\n",
	48	(unsigned long long)bh->b_blocknr, inode);
	49
	50	BUG_ON(bh->b_blocknr < OCFS2_SUPER_BLOCK_BLKNO);
	51	BUG_ON(buffer_jbd(bh));
	52
	53	/* No need to check for a soft readonly file system here. non
	54	* journalled writes are only ever done on system files which
	55	* can get modified during recovery even if read-only. */
	56	if (ocfs2_is_hard_readonly(osb)) {
	57	ret = -EROFS;
	58	goto out;
	59	}
	60
251b6ecc	61	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
ccd979bd MF	62
	63	lock_buffer(bh);
	64	set_buffer_uptodate(bh);
	65
	66	/* remove from dirty list before I/O. */
	67	clear_buffer_dirty(bh);
	68
da1e9098	69	get_bh(bh); /* for end_buffer_write_sync() */
ccd979bd MF	70	bh->b_end_io = end_buffer_write_sync;
	71	submit_bh(WRITE, bh);
	72
	73	wait_on_buffer(bh);
	74
	75	if (buffer_uptodate(bh)) {
	76	ocfs2_set_buffer_uptodate(inode, bh);
	77	} else {
	78	/* We don't need to remove the clustered uptodate
	79	* information for this bh as it's not marked locally
	80	* uptodate. */
	81	ret = -EIO;
2fe5c1d7	82	put_bh(bh);
ccd979bd MF	83	}
ccd979bd MF	84
251b6ecc	85	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
ccd979bd MF	86	out:
	87	mlog_exit(ret);
	88	return ret;
	89	}
	90
da1e9098 JB	91	int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block,
	92	unsigned int nr, struct buffer_head *bhs[])
	93	{
	94	int status = 0;
	95	unsigned int i;
	96	struct buffer_head *bh;
	97
	98	if (!nr) {
	99	mlog(ML_BH_IO, "No buffers will be read!\n");
	100	goto bail;
	101	}
	102
	103	for (i = 0 ; i < nr ; i++) {
	104	if (bhs[i] == NULL) {
	105	bhs[i] = sb_getblk(osb->sb, block++);
	106	if (bhs[i] == NULL) {
	107	status = -EIO;
	108	mlog_errno(status);
	109	goto bail;
	110	}
	111	}
	112	bh = bhs[i];
	113
	114	if (buffer_jbd(bh)) {
	115	mlog(ML_ERROR,
	116	"trying to sync read a jbd "
	117	"managed bh (blocknr = %llu), skipping\n",
	118	(unsigned long long)bh->b_blocknr);
	119	continue;
	120	}
	121
	122	if (buffer_dirty(bh)) {
	123	/* This should probably be a BUG, or
	124	* at least return an error. */
	125	mlog(ML_ERROR,
	126	"trying to sync read a dirty "
	127	"buffer! (blocknr = %llu), skipping\n",
	128	(unsigned long long)bh->b_blocknr);
	129	continue;
	130	}
	131
	132	lock_buffer(bh);
	133	if (buffer_jbd(bh)) {
	134	mlog(ML_ERROR,
	135	"block %llu had the JBD bit set "
	136	"while I was in lock_buffer!",
	137	(unsigned long long)bh->b_blocknr);
	138	BUG();
	139	}
	140
	141	clear_buffer_uptodate(bh);
	142	get_bh(bh); /* for end_buffer_read_sync() */
	143	bh->b_end_io = end_buffer_read_sync;
	144	submit_bh(READ, bh);
	145	}
	146
	147	for (i = nr; i > 0; i--) {
	148	bh = bhs[i - 1];
	149
	150	if (buffer_jbd(bh)) {
	151	mlog(ML_ERROR,
	152	"the journal got the buffer while it was "
	153	"locked for io! (blocknr = %llu)\n",
	154	(unsigned long long)bh->b_blocknr);
155	BUG();
156	}
157
158	wait_on_buffer(bh);
159	if (!buffer_uptodate(bh)) {
160	/* Status won't be cleared from here on out,
161	* so we can safely record this and loop back
162	* to cleanup the other buffers. */
163	status = -EIO;
164	put_bh(bh);
165	bhs[i - 1] = NULL;
166	}
167	}
168
169	bail:
170	return status;
171	}
172
31d33073 JB	173	int ocfs2_read_blocks(struct inode *inode, u64 block, int nr,
31d33073 JB	174	struct buffer_head *bhs[], int flags)
ccd979bd MF	175	{
ccd979bd MF	176	int status = 0;
ccd979bd MF	177	int i, ignore_cache = 0;
	178	struct buffer_head *bh;
	179
31d33073 JB	180	mlog_entry("(inode=%p, block=(%llu), nr=(%d), flags=%d)\n",
31d33073 JB	181	inode, (unsigned long long)block, nr, flags);
ccd979bd	182
31d33073 JB	183	BUG_ON(!inode);
31d33073 JB	184	BUG_ON((flags & OCFS2_BH_READAHEAD) && !(flags & OCFS2_BH_CACHED));
aa958874	185
31d33073	186	if (bhs == NULL) {
ccd979bd MF	187	status = -EINVAL;
	188	mlog_errno(status);
	189	goto bail;
	190	}
	191
	192	if (nr < 0) {
	193	mlog(ML_ERROR, "asked to read %d blocks!\n", nr);
	194	status = -EINVAL;
	195	mlog_errno(status);
	196	goto bail;
	197	}
	198
	199	if (nr == 0) {
	200	mlog(ML_BH_IO, "No buffers will be read!\n");
	201	status = 0;
	202	goto bail;
	203	}
	204
31d33073	205	mutex_lock(&OCFS2_I(inode)->ip_io_mutex);
ccd979bd MF	206	for (i = 0 ; i < nr ; i++) {
ccd979bd MF	207	if (bhs[i] == NULL) {
31d33073	208	bhs[i] = sb_getblk(inode->i_sb, block++);
ccd979bd	209	if (bhs[i] == NULL) {
31d33073	210	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
ccd979bd MF	211	status = -EIO;
	212	mlog_errno(status);
	213	goto bail;
	214	}
	215	}
	216	bh = bhs[i];
	217	ignore_cache = 0;
	218
aa958874 MF	219	/* There are three read-ahead cases here which we need to
	220	* be concerned with. All three assume a buffer has
	221	* previously been submitted with OCFS2_BH_READAHEAD
	222	* and it hasn't yet completed I/O.
	223	*
	224	* 1) The current request is sync to disk. This rarely
	225	* happens these days, and never when performance
	226	* matters - the code can just wait on the buffer
	227	* lock and re-submit.
	228	*
	229	* 2) The current request is cached, but not
	230	* readahead. ocfs2_buffer_uptodate() will return
	231	* false anyway, so we'll wind up waiting on the
	232	* buffer lock to do I/O. We re-check the request
	233	* with after getting the lock to avoid a re-submit.
	234	*
	235	* 3) The current request is readahead (and so must
	236	* also be a caching one). We short circuit if the
	237	* buffer is locked (under I/O) and if it's in the
	238	* uptodate cache. The re-check from #2 catches the
	239	* case that the previous read-ahead completes just
	240	* before our is-it-in-flight check.
	241	*/
	242
ccd979bd MF	243	if (flags & OCFS2_BH_CACHED &&
	244	!ocfs2_buffer_uptodate(inode, bh)) {
	245	mlog(ML_UPTODATE,
b0697053	246	"bh (%llu), inode %llu not uptodate\n",
ccd979bd	247	(unsigned long long)bh->b_blocknr,
b0697053	248	(unsigned long long)OCFS2_I(inode)->ip_blkno);
ccd979bd MF	249	ignore_cache = 1;
	250	}
	251
	252	/* XXX: Can we ever get this and not have the cached
	253	* flag set? */
	254	if (buffer_jbd(bh)) {
	255	if (!(flags & OCFS2_BH_CACHED) \|\| ignore_cache)
	256	mlog(ML_BH_IO, "trying to sync read a jbd "
	257	"managed bh (blocknr = %llu)\n",
	258	(unsigned long long)bh->b_blocknr);
	259	continue;
	260	}
	261
	262	if (!(flags & OCFS2_BH_CACHED) \|\| ignore_cache) {
	263	if (buffer_dirty(bh)) {
	264	/* This should probably be a BUG, or
	265	* at least return an error. */
	266	mlog(ML_BH_IO, "asking me to sync read a dirty "
	267	"buffer! (blocknr = %llu)\n",
	268	(unsigned long long)bh->b_blocknr);
	269	continue;
	270	}
	271
aa958874 MF	272	/* A read-ahead request was made - if the
	273	* buffer is already under read-ahead from a
	274	* previously submitted request than we are
	275	* done here. */
	276	if ((flags & OCFS2_BH_READAHEAD)
	277	&& ocfs2_buffer_read_ahead(inode, bh))
	278	continue;
	279
ccd979bd MF	280	lock_buffer(bh);
	281	if (buffer_jbd(bh)) {
	282	#ifdef CATCH_BH_JBD_RACES
	283	mlog(ML_ERROR, "block %llu had the JBD bit set "
	284	"while I was in lock_buffer!",
	285	(unsigned long long)bh->b_blocknr);
	286	BUG();
	287	#else
	288	unlock_buffer(bh);
	289	continue;
	290	#endif
	291	}
aa958874 MF	292
	293	/* Re-check ocfs2_buffer_uptodate() as a
	294	* previously read-ahead buffer may have
	295	* completed I/O while we were waiting for the
	296	* buffer lock. */
	297	if ((flags & OCFS2_BH_CACHED)
	298	&& !(flags & OCFS2_BH_READAHEAD)
	299	&& ocfs2_buffer_uptodate(inode, bh)) {
	300	unlock_buffer(bh);
	301	continue;
	302	}
	303
ccd979bd MF	304	clear_buffer_uptodate(bh);
	305	get_bh(bh); /* for end_buffer_read_sync() */
	306	bh->b_end_io = end_buffer_read_sync;
aa958874	307	submit_bh(READ, bh);
ccd979bd MF	308	continue;
	309	}
	310	}
	311
	312	status = 0;
	313
	314	for (i = (nr - 1); i >= 0; i--) {
	315	bh = bhs[i];
	316
aa958874 MF	317	if (!(flags & OCFS2_BH_READAHEAD)) {
	318	/* We know this can't have changed as we hold the
	319	* inode sem. Avoid doing any work on the bh if the
	320	* journal has it. */
	321	if (!buffer_jbd(bh))
	322	wait_on_buffer(bh);
	323
	324	if (!buffer_uptodate(bh)) {
	325	/* Status won't be cleared from here on out,
	326	* so we can safely record this and loop back
	327	* to cleanup the other buffers. Don't need to
	328	* remove the clustered uptodate information
	329	* for this bh as it's not marked locally
	330	* uptodate. */
	331	status = -EIO;
2fe5c1d7	332	put_bh(bh);
aa958874 MF	333	bhs[i] = NULL;
	334	continue;
	335	}
ccd979bd MF	336	}
ccd979bd MF	337
aa958874 MF	338	/* Always set the buffer in the cache, even if it was
	339	* a forced read, or read-ahead which hasn't yet
	340	* completed. */
31d33073	341	ocfs2_set_buffer_uptodate(inode, bh);
ccd979bd	342	}
31d33073	343	mutex_unlock(&OCFS2_I(inode)->ip_io_mutex);
ccd979bd	344
aa958874	345	mlog(ML_BH_IO, "block=(%llu), nr=(%d), cached=%s, flags=0x%x\n",
b0697053	346	(unsigned long long)block, nr,
aa958874	347	(!(flags & OCFS2_BH_CACHED) \|\| ignore_cache) ? "no" : "yes", flags);
ccd979bd MF	348
	349	bail:
	350
	351	mlog_exit(status);
	352	return status;
	353	}
d659072f TM	354
	355	/* Check whether the blkno is the super block or one of the backups. */
	356	static void ocfs2_check_super_or_backup(struct super_block *sb,
	357	sector_t blkno)
	358	{
	359	int i;
	360	u64 backup_blkno;
	361
	362	if (blkno == OCFS2_SUPER_BLOCK_BLKNO)
	363	return;
	364
	365	for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
	366	backup_blkno = ocfs2_backup_super_blkno(sb, i);
	367	if (backup_blkno == blkno)
	368	return;
	369	}
	370
	371	BUG();
	372	}
	373
	374	/*
	375	* Write super block and backups doesn't need to collaborate with journal,
	376	* so we don't need to lock ip_io_mutex and inode doesn't need to bea passed
	377	* into this function.
	378	*/
	379	int ocfs2_write_super_or_backup(struct ocfs2_super *osb,
	380	struct buffer_head *bh)
	381	{
	382	int ret = 0;
	383
	384	mlog_entry_void();
	385
	386	BUG_ON(buffer_jbd(bh));
	387	ocfs2_check_super_or_backup(osb->sb, bh->b_blocknr);
	388
	389	if (ocfs2_is_hard_readonly(osb) \|\| ocfs2_is_soft_readonly(osb)) {
	390	ret = -EROFS;
	391	goto out;
	392	}
	393
	394	lock_buffer(bh);
	395	set_buffer_uptodate(bh);
	396
	397	/* remove from dirty list before I/O. */
	398	clear_buffer_dirty(bh);
	399
	400	get_bh(bh); /* for end_buffer_write_sync() */
	401	bh->b_end_io = end_buffer_write_sync;
	402	submit_bh(WRITE, bh);
	403
	404	wait_on_buffer(bh);
	405
	406	if (!buffer_uptodate(bh)) {
	407	ret = -EIO;
2fe5c1d7	408	put_bh(bh);
d659072f TM	409	}
	410
	411	out:
	412	mlog_exit(ret);
	413	return ret;
	414	}