]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/ext3/super.c
ext3: mount flags manipulation cleanup
[net-next-2.6.git] / fs / ext3 / super.c
CommitLineData
1da177e4
LT
1/*
2 * linux/fs/ext3/super.c
3 *
4 * Copyright (C) 1992, 1993, 1994, 1995
5 * Remy Card (card@masi.ibp.fr)
6 * Laboratoire MASI - Institut Blaise Pascal
7 * Universite Pierre et Marie Curie (Paris VI)
8 *
9 * from
10 *
11 * linux/fs/minix/inode.c
12 *
13 * Copyright (C) 1991, 1992 Linus Torvalds
14 *
15 * Big-endian to little-endian byte-swapping/bitmaps by
16 * David S. Miller (davem@caip.rutgers.edu), 1995
17 */
18
1da177e4
LT
19#include <linux/module.h>
20#include <linux/string.h>
21#include <linux/fs.h>
22#include <linux/time.h>
23#include <linux/jbd.h>
24#include <linux/ext3_fs.h>
25#include <linux/ext3_jbd.h>
26#include <linux/slab.h>
27#include <linux/init.h>
28#include <linux/blkdev.h>
29#include <linux/parser.h>
30#include <linux/smp_lock.h>
31#include <linux/buffer_head.h>
a5694255 32#include <linux/exportfs.h>
1da177e4
LT
33#include <linux/vfs.h>
34#include <linux/random.h>
35#include <linux/mount.h>
36#include <linux/namei.h>
37#include <linux/quotaops.h>
8fc2751b 38#include <linux/seq_file.h>
3fc74269 39#include <linux/log2.h>
381be254 40
1da177e4 41#include <asm/uaccess.h>
381be254 42
1da177e4
LT
43#include "xattr.h"
44#include "acl.h"
381be254 45#include "namei.h"
1da177e4 46
bbae8bcc
LT
47#ifdef CONFIG_EXT3_DEFAULTS_TO_ORDERED
48 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_ORDERED_DATA
49#else
50 #define EXT3_MOUNT_DEFAULT_DATA_MODE EXT3_MOUNT_WRITEBACK_DATA
51#endif
52
71b96257
JL
53static int ext3_load_journal(struct super_block *, struct ext3_super_block *,
54 unsigned long journal_devnum);
1da177e4 55static int ext3_create_journal(struct super_block *, struct ext3_super_block *,
eee194e7 56 unsigned int);
c4be0c1d
TS
57static int ext3_commit_super(struct super_block *sb,
58 struct ext3_super_block *es,
1da177e4
LT
59 int sync);
60static void ext3_mark_recovery_complete(struct super_block * sb,
61 struct ext3_super_block * es);
62static void ext3_clear_journal_err(struct super_block * sb,
63 struct ext3_super_block * es);
64static int ext3_sync_fs(struct super_block *sb, int wait);
65static const char *ext3_decode_error(struct super_block * sb, int errno,
66 char nbuf[16]);
67static int ext3_remount (struct super_block * sb, int * flags, char * data);
726c3342 68static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf);
c4be0c1d 69static int ext3_unfreeze(struct super_block *sb);
c4be0c1d 70static int ext3_freeze(struct super_block *sb);
1da177e4 71
ae6ddcc5 72/*
1da177e4
LT
73 * Wrappers for journal_start/end.
74 *
75 * The only special thing we need to do here is to make sure that all
76 * journal_end calls result in the superblock being marked dirty, so
77 * that sync() will call the filesystem's write_super callback if
ae6ddcc5 78 * appropriate.
1da177e4
LT
79 */
80handle_t *ext3_journal_start_sb(struct super_block *sb, int nblocks)
81{
82 journal_t *journal;
83
84 if (sb->s_flags & MS_RDONLY)
85 return ERR_PTR(-EROFS);
86
87 /* Special case here: if the journal has aborted behind our
88 * backs (eg. EIO in the commit thread), then we still need to
89 * take the FS itself readonly cleanly. */
90 journal = EXT3_SB(sb)->s_journal;
91 if (is_journal_aborted(journal)) {
e05b6b52 92 ext3_abort(sb, __func__,
1da177e4
LT
93 "Detected aborted journal");
94 return ERR_PTR(-EROFS);
95 }
96
97 return journal_start(journal, nblocks);
98}
99
ae6ddcc5 100/*
1da177e4
LT
101 * The only special thing we need to do here is to make sure that all
102 * journal_stop calls result in the superblock being marked dirty, so
103 * that sync() will call the filesystem's write_super callback if
ae6ddcc5 104 * appropriate.
1da177e4
LT
105 */
106int __ext3_journal_stop(const char *where, handle_t *handle)
107{
108 struct super_block *sb;
109 int err;
110 int rc;
111
112 sb = handle->h_transaction->t_journal->j_private;
113 err = handle->h_err;
114 rc = journal_stop(handle);
115
116 if (!err)
117 err = rc;
118 if (err)
119 __ext3_std_error(sb, where, err);
120 return err;
121}
122
123void ext3_journal_abort_handle(const char *caller, const char *err_fn,
124 struct buffer_head *bh, handle_t *handle, int err)
125{
126 char nbuf[16];
127 const char *errstr = ext3_decode_error(NULL, err, nbuf);
128
129 if (bh)
130 BUFFER_TRACE(bh, "abort");
131
132 if (!handle->h_err)
133 handle->h_err = err;
134
135 if (is_handle_aborted(handle))
136 return;
137
4cf46b67
AF
138 printk(KERN_ERR "EXT3-fs: %s: aborting transaction: %s in %s\n",
139 caller, errstr, err_fn);
1da177e4
LT
140
141 journal_abort_handle(handle);
142}
143
4cf46b67
AF
144void ext3_msg(struct super_block *sb, const char *prefix,
145 const char *fmt, ...)
146{
147 va_list args;
148
149 va_start(args, fmt);
150 printk("%sEXT3-fs (%s): ", prefix, sb->s_id);
151 vprintk(fmt, args);
152 printk("\n");
153 va_end(args);
154}
155
1da177e4
LT
156/* Deal with the reporting of failure conditions on a filesystem such as
157 * inconsistencies detected or read IO failures.
158 *
159 * On ext2, we can store the error state of the filesystem in the
160 * superblock. That is not possible on ext3, because we may have other
161 * write ordering constraints on the superblock which prevent us from
162 * writing it out straight away; and given that the journal is about to
163 * be aborted, we can't rely on the current, or future, transactions to
164 * write out the superblock safely.
165 *
166 * We'll just use the journal_abort() error code to record an error in
167 * the journal instead. On recovery, the journal will compain about
168 * that error until we've noted it down and cleared it.
169 */
170
171static void ext3_handle_error(struct super_block *sb)
172{
173 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
174
175 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
176 es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
177
178 if (sb->s_flags & MS_RDONLY)
179 return;
180
7543fc7b 181 if (!test_opt (sb, ERRORS_CONT)) {
1da177e4
LT
182 journal_t *journal = EXT3_SB(sb)->s_journal;
183
e3c96435 184 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
1da177e4
LT
185 if (journal)
186 journal_abort(journal, -EIO);
187 }
7543fc7b 188 if (test_opt (sb, ERRORS_RO)) {
4cf46b67
AF
189 ext3_msg(sb, KERN_CRIT,
190 "error: remounting filesystem read-only");
7543fc7b
VA
191 sb->s_flags |= MS_RDONLY;
192 }
193 ext3_commit_super(sb, es, 1);
1da177e4 194 if (test_opt(sb, ERRORS_PANIC))
4cf46b67 195 panic("EXT3-fs (%s): panic forced after error\n",
1da177e4 196 sb->s_id);
1da177e4
LT
197}
198
199void ext3_error (struct super_block * sb, const char * function,
200 const char * fmt, ...)
201{
202 va_list args;
203
204 va_start(args, fmt);
205 printk(KERN_CRIT "EXT3-fs error (device %s): %s: ",sb->s_id, function);
206 vprintk(fmt, args);
207 printk("\n");
208 va_end(args);
209
210 ext3_handle_error(sb);
211}
212
213static const char *ext3_decode_error(struct super_block * sb, int errno,
214 char nbuf[16])
215{
216 char *errstr = NULL;
217
218 switch (errno) {
219 case -EIO:
220 errstr = "IO failure";
221 break;
222 case -ENOMEM:
223 errstr = "Out of memory";
224 break;
225 case -EROFS:
226 if (!sb || EXT3_SB(sb)->s_journal->j_flags & JFS_ABORT)
227 errstr = "Journal has aborted";
228 else
229 errstr = "Readonly filesystem";
230 break;
231 default:
232 /* If the caller passed in an extra buffer for unknown
233 * errors, textualise them now. Else we just return
234 * NULL. */
235 if (nbuf) {
236 /* Check for truncated error codes... */
237 if (snprintf(nbuf, 16, "error %d", -errno) >= 0)
238 errstr = nbuf;
239 }
240 break;
241 }
242
243 return errstr;
244}
245
246/* __ext3_std_error decodes expected errors from journaling functions
247 * automatically and invokes the appropriate error response. */
248
249void __ext3_std_error (struct super_block * sb, const char * function,
250 int errno)
251{
252 char nbuf[16];
30121624 253 const char *errstr;
1da177e4 254
30121624
ST
255 /* Special case: if the error is EROFS, and we're not already
256 * inside a transaction, then there's really no point in logging
257 * an error. */
258 if (errno == -EROFS && journal_current_handle() == NULL &&
259 (sb->s_flags & MS_RDONLY))
260 return;
261
262 errstr = ext3_decode_error(sb, errno, nbuf);
4cf46b67 263 ext3_msg(sb, KERN_CRIT, "error in %s: %s", function, errstr);
1da177e4
LT
264
265 ext3_handle_error(sb);
266}
267
268/*
269 * ext3_abort is a much stronger failure handler than ext3_error. The
270 * abort function may be used to deal with unrecoverable failures such
271 * as journal IO errors or ENOMEM at a critical moment in log management.
272 *
273 * We unconditionally force the filesystem into an ABORT|READONLY state,
274 * unless the error response on the fs has been set to panic in which
275 * case we take the easy way out and panic immediately.
276 */
277
278void ext3_abort (struct super_block * sb, const char * function,
279 const char * fmt, ...)
280{
281 va_list args;
282
1da177e4 283 va_start(args, fmt);
4cf46b67 284 printk(KERN_CRIT "EXT3-fs (%s): error: %s: ", sb->s_id, function);
1da177e4
LT
285 vprintk(fmt, args);
286 printk("\n");
287 va_end(args);
288
289 if (test_opt(sb, ERRORS_PANIC))
4cf46b67 290 panic("EXT3-fs: panic from previous error\n");
1da177e4
LT
291
292 if (sb->s_flags & MS_RDONLY)
293 return;
294
4cf46b67
AF
295 ext3_msg(sb, KERN_CRIT,
296 "error: remounting filesystem read-only");
1da177e4
LT
297 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
298 sb->s_flags |= MS_RDONLY;
e3c96435 299 set_opt(EXT3_SB(sb)->s_mount_opt, ABORT);
44d6f787
HK
300 if (EXT3_SB(sb)->s_journal)
301 journal_abort(EXT3_SB(sb)->s_journal, -EIO);
1da177e4
LT
302}
303
304void ext3_warning (struct super_block * sb, const char * function,
305 const char * fmt, ...)
306{
307 va_list args;
308
309 va_start(args, fmt);
4cf46b67 310 printk(KERN_WARNING "EXT3-fs (%s): warning: %s: ",
1da177e4
LT
311 sb->s_id, function);
312 vprintk(fmt, args);
313 printk("\n");
314 va_end(args);
315}
316
317void ext3_update_dynamic_rev(struct super_block *sb)
318{
319 struct ext3_super_block *es = EXT3_SB(sb)->s_es;
320
321 if (le32_to_cpu(es->s_rev_level) > EXT3_GOOD_OLD_REV)
322 return;
323
4cf46b67
AF
324 ext3_msg(sb, KERN_WARNING,
325 "warning: updating to rev %d because of "
326 "new feature flag, running e2fsck is recommended",
327 EXT3_DYNAMIC_REV);
1da177e4
LT
328
329 es->s_first_ino = cpu_to_le32(EXT3_GOOD_OLD_FIRST_INO);
330 es->s_inode_size = cpu_to_le16(EXT3_GOOD_OLD_INODE_SIZE);
331 es->s_rev_level = cpu_to_le32(EXT3_DYNAMIC_REV);
332 /* leave es->s_feature_*compat flags alone */
333 /* es->s_uuid will be set by e2fsck if empty */
334
335 /*
336 * The rest of the superblock fields should be zero, and if not it
337 * means they are likely already in use, so leave them alone. We
338 * can leave it up to e2fsck to clean up any inconsistencies there.
339 */
340}
341
342/*
343 * Open the external journal device
344 */
4cf46b67 345static struct block_device *ext3_blkdev_get(dev_t dev, struct super_block *sb)
1da177e4
LT
346{
347 struct block_device *bdev;
348 char b[BDEVNAME_SIZE];
349
350 bdev = open_by_devnum(dev, FMODE_READ|FMODE_WRITE);
351 if (IS_ERR(bdev))
352 goto fail;
353 return bdev;
354
355fail:
4cf46b67
AF
356 ext3_msg(sb, "error: failed to open journal device %s: %ld",
357 __bdevname(dev, b), PTR_ERR(bdev));
358
1da177e4
LT
359 return NULL;
360}
361
362/*
363 * Release the journal device
364 */
365static int ext3_blkdev_put(struct block_device *bdev)
366{
367 bd_release(bdev);
9a1c3542 368 return blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1da177e4
LT
369}
370
371static int ext3_blkdev_remove(struct ext3_sb_info *sbi)
372{
373 struct block_device *bdev;
374 int ret = -ENODEV;
375
376 bdev = sbi->journal_bdev;
377 if (bdev) {
378 ret = ext3_blkdev_put(bdev);
379 sbi->journal_bdev = NULL;
380 }
381 return ret;
382}
383
384static inline struct inode *orphan_list_entry(struct list_head *l)
385{
386 return &list_entry(l, struct ext3_inode_info, i_orphan)->vfs_inode;
387}
388
389static void dump_orphan_list(struct super_block *sb, struct ext3_sb_info *sbi)
390{
391 struct list_head *l;
392
4cf46b67 393 ext3_msg(sb, KERN_ERR, "error: sb orphan head is %d",
1da177e4
LT
394 le32_to_cpu(sbi->s_es->s_last_orphan));
395
4cf46b67 396 ext3_msg(sb, KERN_ERR, "sb_info orphan list:");
1da177e4
LT
397 list_for_each(l, &sbi->s_orphan) {
398 struct inode *inode = orphan_list_entry(l);
4cf46b67 399 ext3_msg(sb, KERN_ERR, " "
eee194e7 400 "inode %s:%lu at %p: mode %o, nlink %d, next %d\n",
1da177e4 401 inode->i_sb->s_id, inode->i_ino, inode,
ae6ddcc5 402 inode->i_mode, inode->i_nlink,
1da177e4
LT
403 NEXT_ORPHAN(inode));
404 }
405}
406
407static void ext3_put_super (struct super_block * sb)
408{
409 struct ext3_sb_info *sbi = EXT3_SB(sb);
410 struct ext3_super_block *es = sbi->s_es;
44d6f787 411 int i, err;
1da177e4 412
6cfd0148
CH
413 lock_kernel();
414
1da177e4 415 ext3_xattr_put_super(sb);
44d6f787
HK
416 err = journal_destroy(sbi->s_journal);
417 sbi->s_journal = NULL;
418 if (err < 0)
2d7c820e 419 ext3_abort(sb, __func__, "Couldn't clean up the journal");
44d6f787 420
1da177e4
LT
421 if (!(sb->s_flags & MS_RDONLY)) {
422 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
423 es->s_state = cpu_to_le16(sbi->s_mount_state);
424 BUFFER_TRACE(sbi->s_sbh, "marking dirty");
425 mark_buffer_dirty(sbi->s_sbh);
426 ext3_commit_super(sb, es, 1);
427 }
428
429 for (i = 0; i < sbi->s_gdb_count; i++)
430 brelse(sbi->s_group_desc[i]);
431 kfree(sbi->s_group_desc);
432 percpu_counter_destroy(&sbi->s_freeblocks_counter);
433 percpu_counter_destroy(&sbi->s_freeinodes_counter);
434 percpu_counter_destroy(&sbi->s_dirs_counter);
435 brelse(sbi->s_sbh);
436#ifdef CONFIG_QUOTA
437 for (i = 0; i < MAXQUOTAS; i++)
438 kfree(sbi->s_qf_names[i]);
439#endif
440
441 /* Debugging code just in case the in-memory inode orphan list
442 * isn't empty. The on-disk one can be non-empty if we've
443 * detected an error and taken the fs readonly, but the
444 * in-memory list had better be clean by this point. */
445 if (!list_empty(&sbi->s_orphan))
446 dump_orphan_list(sb, sbi);
447 J_ASSERT(list_empty(&sbi->s_orphan));
448
f98393a6 449 invalidate_bdev(sb->s_bdev);
1da177e4
LT
450 if (sbi->journal_bdev && sbi->journal_bdev != sb->s_bdev) {
451 /*
452 * Invalidate the journal device's buffers. We don't want them
453 * floating about in memory - the physical journal device may
454 * hotswapped, and it breaks the `ro-after' testing code.
455 */
456 sync_blockdev(sbi->journal_bdev);
f98393a6 457 invalidate_bdev(sbi->journal_bdev);
1da177e4
LT
458 ext3_blkdev_remove(sbi);
459 }
460 sb->s_fs_info = NULL;
5df096d6 461 kfree(sbi->s_blockgroup_lock);
1da177e4 462 kfree(sbi);
6cfd0148
CH
463
464 unlock_kernel();
1da177e4
LT
465}
466
e18b890b 467static struct kmem_cache *ext3_inode_cachep;
1da177e4
LT
468
469/*
470 * Called inside transaction, so use GFP_NOFS
471 */
472static struct inode *ext3_alloc_inode(struct super_block *sb)
473{
474 struct ext3_inode_info *ei;
475
e6b4f8da 476 ei = kmem_cache_alloc(ext3_inode_cachep, GFP_NOFS);
1da177e4
LT
477 if (!ei)
478 return NULL;
1da177e4
LT
479 ei->i_block_alloc_info = NULL;
480 ei->vfs_inode.i_version = 1;
fe8bc91c
JK
481 atomic_set(&ei->i_datasync_tid, 0);
482 atomic_set(&ei->i_sync_tid, 0);
1da177e4
LT
483 return &ei->vfs_inode;
484}
485
486static void ext3_destroy_inode(struct inode *inode)
487{
9f7dd93d
VA
488 if (!list_empty(&(EXT3_I(inode)->i_orphan))) {
489 printk("EXT3 Inode %p: orphan list check failed!\n",
490 EXT3_I(inode));
491 print_hex_dump(KERN_INFO, "", DUMP_PREFIX_ADDRESS, 16, 4,
492 EXT3_I(inode), sizeof(struct ext3_inode_info),
493 false);
494 dump_stack();
495 }
1da177e4
LT
496 kmem_cache_free(ext3_inode_cachep, EXT3_I(inode));
497}
498
51cc5068 499static void init_once(void *foo)
1da177e4
LT
500{
501 struct ext3_inode_info *ei = (struct ext3_inode_info *) foo;
502
a35afb83 503 INIT_LIST_HEAD(&ei->i_orphan);
1da177e4 504#ifdef CONFIG_EXT3_FS_XATTR
a35afb83 505 init_rwsem(&ei->xattr_sem);
1da177e4 506#endif
a35afb83
CL
507 mutex_init(&ei->truncate_mutex);
508 inode_init_once(&ei->vfs_inode);
1da177e4 509}
ae6ddcc5 510
1da177e4
LT
511static int init_inodecache(void)
512{
513 ext3_inode_cachep = kmem_cache_create("ext3_inode_cache",
514 sizeof(struct ext3_inode_info),
fffb60f9
PJ
515 0, (SLAB_RECLAIM_ACCOUNT|
516 SLAB_MEM_SPREAD),
20c2df83 517 init_once);
1da177e4
LT
518 if (ext3_inode_cachep == NULL)
519 return -ENOMEM;
520 return 0;
521}
522
523static void destroy_inodecache(void)
524{
1a1d92c1 525 kmem_cache_destroy(ext3_inode_cachep);
1da177e4
LT
526}
527
528static void ext3_clear_inode(struct inode *inode)
529{
530 struct ext3_block_alloc_info *rsv = EXT3_I(inode)->i_block_alloc_info;
1da177e4
LT
531 ext3_discard_reservation(inode);
532 EXT3_I(inode)->i_block_alloc_info = NULL;
e6022603
AM
533 if (unlikely(rsv))
534 kfree(rsv);
1da177e4
LT
535}
536
6cd37cda 537static inline void ext3_show_quota_options(struct seq_file *seq, struct super_block *sb)
8fc2751b 538{
6cd37cda 539#if defined(CONFIG_QUOTA)
275abf5b 540 struct ext3_sb_info *sbi = EXT3_SB(sb);
8fc2751b 541
1aeec434
JK
542 if (sbi->s_jquota_fmt) {
543 char *fmtname = "";
544
545 switch (sbi->s_jquota_fmt) {
546 case QFMT_VFS_OLD:
547 fmtname = "vfsold";
548 break;
549 case QFMT_VFS_V0:
550 fmtname = "vfsv0";
551 break;
552 case QFMT_VFS_V1:
553 fmtname = "vfsv1";
554 break;
555 }
556 seq_printf(seq, ",jqfmt=%s", fmtname);
557 }
8fc2751b
MB
558
559 if (sbi->s_qf_names[USRQUOTA])
560 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
561
562 if (sbi->s_qf_names[GRPQUOTA])
563 seq_printf(seq, ",grpjquota=%s", sbi->s_qf_names[GRPQUOTA]);
564
e3c96435 565 if (test_opt(sb, USRQUOTA))
8fc2751b
MB
566 seq_puts(seq, ",usrquota");
567
e3c96435 568 if (test_opt(sb, GRPQUOTA))
8fc2751b
MB
569 seq_puts(seq, ",grpquota");
570#endif
6cd37cda
PO
571}
572
3c4cec65
JK
573static char *data_mode_string(unsigned long mode)
574{
575 switch (mode) {
576 case EXT3_MOUNT_JOURNAL_DATA:
577 return "journal";
578 case EXT3_MOUNT_ORDERED_DATA:
579 return "ordered";
580 case EXT3_MOUNT_WRITEBACK_DATA:
581 return "writeback";
582 }
583 return "unknown";
584}
585
571beed8
MS
586/*
587 * Show an option if
588 * - it's set to a non-default value OR
589 * - if the per-sb default is different from the global default
590 */
6cd37cda
PO
591static int ext3_show_options(struct seq_file *seq, struct vfsmount *vfs)
592{
593 struct super_block *sb = vfs->mnt_sb;
571beed8
MS
594 struct ext3_sb_info *sbi = EXT3_SB(sb);
595 struct ext3_super_block *es = sbi->s_es;
596 unsigned long def_mount_opts;
597
598 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
599
600 if (sbi->s_sb_block != 1)
601 seq_printf(seq, ",sb=%lu", sbi->s_sb_block);
602 if (test_opt(sb, MINIX_DF))
603 seq_puts(seq, ",minixdf");
604 if (test_opt(sb, GRPID))
605 seq_puts(seq, ",grpid");
606 if (!test_opt(sb, GRPID) && (def_mount_opts & EXT3_DEFM_BSDGROUPS))
607 seq_puts(seq, ",nogrpid");
608 if (sbi->s_resuid != EXT3_DEF_RESUID ||
609 le16_to_cpu(es->s_def_resuid) != EXT3_DEF_RESUID) {
610 seq_printf(seq, ",resuid=%u", sbi->s_resuid);
611 }
612 if (sbi->s_resgid != EXT3_DEF_RESGID ||
613 le16_to_cpu(es->s_def_resgid) != EXT3_DEF_RESGID) {
614 seq_printf(seq, ",resgid=%u", sbi->s_resgid);
615 }
1eca93f9 616 if (test_opt(sb, ERRORS_RO)) {
571beed8
MS
617 int def_errors = le16_to_cpu(es->s_errors);
618
619 if (def_errors == EXT3_ERRORS_PANIC ||
1eca93f9
AK
620 def_errors == EXT3_ERRORS_CONTINUE) {
621 seq_puts(seq, ",errors=remount-ro");
571beed8
MS
622 }
623 }
1eca93f9
AK
624 if (test_opt(sb, ERRORS_CONT))
625 seq_puts(seq, ",errors=continue");
571beed8
MS
626 if (test_opt(sb, ERRORS_PANIC))
627 seq_puts(seq, ",errors=panic");
628 if (test_opt(sb, NO_UID32))
629 seq_puts(seq, ",nouid32");
630 if (test_opt(sb, DEBUG))
631 seq_puts(seq, ",debug");
632 if (test_opt(sb, OLDALLOC))
633 seq_puts(seq, ",oldalloc");
634#ifdef CONFIG_EXT3_FS_XATTR
635 if (test_opt(sb, XATTR_USER))
636 seq_puts(seq, ",user_xattr");
637 if (!test_opt(sb, XATTR_USER) &&
638 (def_mount_opts & EXT3_DEFM_XATTR_USER)) {
639 seq_puts(seq, ",nouser_xattr");
640 }
641#endif
642#ifdef CONFIG_EXT3_FS_POSIX_ACL
643 if (test_opt(sb, POSIX_ACL))
644 seq_puts(seq, ",acl");
645 if (!test_opt(sb, POSIX_ACL) && (def_mount_opts & EXT3_DEFM_ACL))
646 seq_puts(seq, ",noacl");
647#endif
648 if (!test_opt(sb, RESERVATION))
649 seq_puts(seq, ",noreservation");
650 if (sbi->s_commit_interval) {
651 seq_printf(seq, ",commit=%u",
652 (unsigned) (sbi->s_commit_interval / HZ));
653 }
654 if (test_opt(sb, BARRIER))
655 seq_puts(seq, ",barrier=1");
656 if (test_opt(sb, NOBH))
657 seq_puts(seq, ",nobh");
6cd37cda 658
e3c96435 659 seq_printf(seq, ",data=%s", data_mode_string(test_opt(sb, DATA_FLAGS)));
0e4fb5e2
HK
660 if (test_opt(sb, DATA_ERR_ABORT))
661 seq_puts(seq, ",data_err=abort");
662
dee1d3b6
ES
663 if (test_opt(sb, NOLOAD))
664 seq_puts(seq, ",norecovery");
665
6cd37cda 666 ext3_show_quota_options(seq, sb);
8fc2751b
MB
667
668 return 0;
669}
1da177e4 670
fdb36673 671
74af0baa
CH
672static struct inode *ext3_nfs_get_inode(struct super_block *sb,
673 u64 ino, u32 generation)
fdb36673 674{
fdb36673 675 struct inode *inode;
fdb36673
N
676
677 if (ino < EXT3_FIRST_INO(sb) && ino != EXT3_ROOT_INO)
678 return ERR_PTR(-ESTALE);
679 if (ino > le32_to_cpu(EXT3_SB(sb)->s_es->s_inodes_count))
680 return ERR_PTR(-ESTALE);
681
682 /* iget isn't really right if the inode is currently unallocated!!
683 *
684 * ext3_read_inode will return a bad_inode if the inode had been
685 * deleted, so we should be safe.
686 *
687 * Currently we don't know the generation for parent directory, so
688 * a generation of 0 means "accept any"
689 */
473043dc
DH
690 inode = ext3_iget(sb, ino);
691 if (IS_ERR(inode))
692 return ERR_CAST(inode);
693 if (generation && inode->i_generation != generation) {
fdb36673
N
694 iput(inode);
695 return ERR_PTR(-ESTALE);
696 }
74af0baa
CH
697
698 return inode;
699}
700
701static struct dentry *ext3_fh_to_dentry(struct super_block *sb, struct fid *fid,
702 int fh_len, int fh_type)
703{
704 return generic_fh_to_dentry(sb, fid, fh_len, fh_type,
705 ext3_nfs_get_inode);
706}
707
708static struct dentry *ext3_fh_to_parent(struct super_block *sb, struct fid *fid,
709 int fh_len, int fh_type)
710{
711 return generic_fh_to_parent(sb, fid, fh_len, fh_type,
712 ext3_nfs_get_inode);
fdb36673
N
713}
714
6b082b53
TO
715/*
716 * Try to release metadata pages (indirect blocks, directories) which are
717 * mapped via the block device. Since these pages could have journal heads
718 * which would prevent try_to_free_buffers() from freeing them, we must use
719 * jbd layer's try_to_free_buffers() function to release them.
720 */
721static int bdev_try_to_free_page(struct super_block *sb, struct page *page,
722 gfp_t wait)
723{
724 journal_t *journal = EXT3_SB(sb)->s_journal;
725
726 WARN_ON(PageChecked(page));
727 if (!page_has_buffers(page))
728 return 0;
729 if (journal)
730 return journal_try_to_free_buffers(journal, page,
731 wait & ~__GFP_WAIT);
732 return try_to_free_buffers(page);
733}
734
8fc2751b 735#ifdef CONFIG_QUOTA
1da177e4
LT
736#define QTYPE2NAME(t) ((t)==USRQUOTA?"user":"group")
737#define QTYPE2MOPT(on, t) ((t)==USRQUOTA?((on)##USRJQUOTA):((on)##GRPJQUOTA))
738
1da177e4
LT
739static int ext3_write_dquot(struct dquot *dquot);
740static int ext3_acquire_dquot(struct dquot *dquot);
741static int ext3_release_dquot(struct dquot *dquot);
742static int ext3_mark_dquot_dirty(struct dquot *dquot);
743static int ext3_write_info(struct super_block *sb, int type);
2fd83a4f
JK
744static int ext3_quota_on(struct super_block *sb, int type, int format_id,
745 char *path, int remount);
1da177e4
LT
746static int ext3_quota_on_mount(struct super_block *sb, int type);
747static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
748 size_t len, loff_t off);
749static ssize_t ext3_quota_write(struct super_block *sb, int type,
750 const char *data, size_t len, loff_t off);
751
61e225dc 752static const struct dquot_operations ext3_quota_operations = {
a219ce37
JK
753 .initialize = dquot_initialize,
754 .drop = dquot_drop,
1da177e4
LT
755 .alloc_space = dquot_alloc_space,
756 .alloc_inode = dquot_alloc_inode,
757 .free_space = dquot_free_space,
758 .free_inode = dquot_free_inode,
759 .transfer = dquot_transfer,
760 .write_dquot = ext3_write_dquot,
761 .acquire_dquot = ext3_acquire_dquot,
762 .release_dquot = ext3_release_dquot,
763 .mark_dirty = ext3_mark_dquot_dirty,
157091a2
JK
764 .write_info = ext3_write_info,
765 .alloc_dquot = dquot_alloc,
766 .destroy_dquot = dquot_destroy,
1da177e4
LT
767};
768
0d54b217 769static const struct quotactl_ops ext3_qctl_operations = {
1da177e4
LT
770 .quota_on = ext3_quota_on,
771 .quota_off = vfs_quota_off,
772 .quota_sync = vfs_quota_sync,
773 .get_info = vfs_get_dqinfo,
774 .set_info = vfs_set_dqinfo,
775 .get_dqblk = vfs_get_dqblk,
776 .set_dqblk = vfs_set_dqblk
777};
778#endif
779
ee9b6d61 780static const struct super_operations ext3_sops = {
1da177e4
LT
781 .alloc_inode = ext3_alloc_inode,
782 .destroy_inode = ext3_destroy_inode,
1da177e4
LT
783 .write_inode = ext3_write_inode,
784 .dirty_inode = ext3_dirty_inode,
785 .delete_inode = ext3_delete_inode,
786 .put_super = ext3_put_super,
1da177e4 787 .sync_fs = ext3_sync_fs,
c4be0c1d
TS
788 .freeze_fs = ext3_freeze,
789 .unfreeze_fs = ext3_unfreeze,
1da177e4
LT
790 .statfs = ext3_statfs,
791 .remount_fs = ext3_remount,
792 .clear_inode = ext3_clear_inode,
8fc2751b 793 .show_options = ext3_show_options,
1da177e4
LT
794#ifdef CONFIG_QUOTA
795 .quota_read = ext3_quota_read,
796 .quota_write = ext3_quota_write,
797#endif
6b082b53 798 .bdev_try_to_free_page = bdev_try_to_free_page,
1da177e4
LT
799};
800
39655164 801static const struct export_operations ext3_export_ops = {
74af0baa
CH
802 .fh_to_dentry = ext3_fh_to_dentry,
803 .fh_to_parent = ext3_fh_to_parent,
1da177e4
LT
804 .get_parent = ext3_get_parent,
805};
806
807enum {
808 Opt_bsd_df, Opt_minix_df, Opt_grpid, Opt_nogrpid,
809 Opt_resgid, Opt_resuid, Opt_sb, Opt_err_cont, Opt_err_panic, Opt_err_ro,
2860b733 810 Opt_nouid32, Opt_nocheck, Opt_debug, Opt_oldalloc, Opt_orlov,
1da177e4 811 Opt_user_xattr, Opt_nouser_xattr, Opt_acl, Opt_noacl,
ade1a29e 812 Opt_reservation, Opt_noreservation, Opt_noload, Opt_nobh, Opt_bh,
71b96257 813 Opt_commit, Opt_journal_update, Opt_journal_inum, Opt_journal_dev,
1da177e4 814 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
0e4fb5e2 815 Opt_data_err_abort, Opt_data_err_ignore,
1da177e4 816 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1aeec434
JK
817 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
818 Opt_noquota, Opt_ignore, Opt_barrier, Opt_err, Opt_resize,
819 Opt_usrquota, Opt_grpquota
1da177e4
LT
820};
821
a447c093 822static const match_table_t tokens = {
1da177e4
LT
823 {Opt_bsd_df, "bsddf"},
824 {Opt_minix_df, "minixdf"},
825 {Opt_grpid, "grpid"},
826 {Opt_grpid, "bsdgroups"},
827 {Opt_nogrpid, "nogrpid"},
828 {Opt_nogrpid, "sysvgroups"},
829 {Opt_resgid, "resgid=%u"},
830 {Opt_resuid, "resuid=%u"},
831 {Opt_sb, "sb=%u"},
832 {Opt_err_cont, "errors=continue"},
833 {Opt_err_panic, "errors=panic"},
834 {Opt_err_ro, "errors=remount-ro"},
835 {Opt_nouid32, "nouid32"},
836 {Opt_nocheck, "nocheck"},
837 {Opt_nocheck, "check=none"},
1da177e4
LT
838 {Opt_debug, "debug"},
839 {Opt_oldalloc, "oldalloc"},
840 {Opt_orlov, "orlov"},
841 {Opt_user_xattr, "user_xattr"},
842 {Opt_nouser_xattr, "nouser_xattr"},
843 {Opt_acl, "acl"},
844 {Opt_noacl, "noacl"},
845 {Opt_reservation, "reservation"},
846 {Opt_noreservation, "noreservation"},
847 {Opt_noload, "noload"},
dee1d3b6 848 {Opt_noload, "norecovery"},
1da177e4 849 {Opt_nobh, "nobh"},
ade1a29e 850 {Opt_bh, "bh"},
1da177e4
LT
851 {Opt_commit, "commit=%u"},
852 {Opt_journal_update, "journal=update"},
853 {Opt_journal_inum, "journal=%u"},
71b96257 854 {Opt_journal_dev, "journal_dev=%u"},
1da177e4
LT
855 {Opt_abort, "abort"},
856 {Opt_data_journal, "data=journal"},
857 {Opt_data_ordered, "data=ordered"},
858 {Opt_data_writeback, "data=writeback"},
0e4fb5e2
HK
859 {Opt_data_err_abort, "data_err=abort"},
860 {Opt_data_err_ignore, "data_err=ignore"},
1da177e4
LT
861 {Opt_offusrjquota, "usrjquota="},
862 {Opt_usrjquota, "usrjquota=%s"},
863 {Opt_offgrpjquota, "grpjquota="},
864 {Opt_grpjquota, "grpjquota=%s"},
865 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
866 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1aeec434 867 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
8fc2751b 868 {Opt_grpquota, "grpquota"},
1f54587b
JK
869 {Opt_noquota, "noquota"},
870 {Opt_quota, "quota"},
8fc2751b 871 {Opt_usrquota, "usrquota"},
1da177e4 872 {Opt_barrier, "barrier=%u"},
1da177e4 873 {Opt_resize, "resize"},
92587216 874 {Opt_err, NULL},
1da177e4
LT
875};
876
4cf46b67 877static ext3_fsblk_t get_sb_block(void **data, struct super_block *sb)
1da177e4 878{
e9ad5620
DK
879 ext3_fsblk_t sb_block;
880 char *options = (char *) *data;
1da177e4
LT
881
882 if (!options || strncmp(options, "sb=", 3) != 0)
883 return 1; /* Default location */
884 options += 3;
43d23f90 885 /*todo: use simple_strtoll with >32bit ext3 */
1da177e4
LT
886 sb_block = simple_strtoul(options, &options, 0);
887 if (*options && *options != ',') {
4cf46b67 888 ext3_msg(sb, "error: invalid sb specification: %s",
1da177e4
LT
889 (char *) *data);
890 return 1;
891 }
892 if (*options == ',')
893 options++;
894 *data = (void *) options;
895 return sb_block;
896}
897
71b96257 898static int parse_options (char *options, struct super_block *sb,
eee194e7 899 unsigned int *inum, unsigned long *journal_devnum,
43d23f90 900 ext3_fsblk_t *n_blocks_count, int is_remount)
1da177e4
LT
901{
902 struct ext3_sb_info *sbi = EXT3_SB(sb);
903 char * p;
904 substring_t args[MAX_OPT_ARGS];
905 int data_opt = 0;
906 int option;
907#ifdef CONFIG_QUOTA
d06bf1d2 908 int qtype, qfmt;
1da177e4
LT
909 char *qname;
910#endif
911
912 if (!options)
913 return 1;
914
915 while ((p = strsep (&options, ",")) != NULL) {
916 int token;
917 if (!*p)
918 continue;
919
920 token = match_token(p, tokens, args);
921 switch (token) {
922 case Opt_bsd_df:
923 clear_opt (sbi->s_mount_opt, MINIX_DF);
924 break;
925 case Opt_minix_df:
926 set_opt (sbi->s_mount_opt, MINIX_DF);
927 break;
928 case Opt_grpid:
929 set_opt (sbi->s_mount_opt, GRPID);
930 break;
931 case Opt_nogrpid:
932 clear_opt (sbi->s_mount_opt, GRPID);
933 break;
934 case Opt_resuid:
935 if (match_int(&args[0], &option))
936 return 0;
937 sbi->s_resuid = option;
938 break;
939 case Opt_resgid:
940 if (match_int(&args[0], &option))
941 return 0;
942 sbi->s_resgid = option;
943 break;
944 case Opt_sb:
945 /* handled by get_sb_block() instead of here */
946 /* *sb_block = match_int(&args[0]); */
947 break;
948 case Opt_err_panic:
949 clear_opt (sbi->s_mount_opt, ERRORS_CONT);
950 clear_opt (sbi->s_mount_opt, ERRORS_RO);
951 set_opt (sbi->s_mount_opt, ERRORS_PANIC);
952 break;
953 case Opt_err_ro:
954 clear_opt (sbi->s_mount_opt, ERRORS_CONT);
955 clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
956 set_opt (sbi->s_mount_opt, ERRORS_RO);
957 break;
958 case Opt_err_cont:
959 clear_opt (sbi->s_mount_opt, ERRORS_RO);
960 clear_opt (sbi->s_mount_opt, ERRORS_PANIC);
961 set_opt (sbi->s_mount_opt, ERRORS_CONT);
962 break;
963 case Opt_nouid32:
964 set_opt (sbi->s_mount_opt, NO_UID32);
965 break;
1da177e4
LT
966 case Opt_nocheck:
967 clear_opt (sbi->s_mount_opt, CHECK);
968 break;
969 case Opt_debug:
970 set_opt (sbi->s_mount_opt, DEBUG);
971 break;
972 case Opt_oldalloc:
973 set_opt (sbi->s_mount_opt, OLDALLOC);
974 break;
975 case Opt_orlov:
976 clear_opt (sbi->s_mount_opt, OLDALLOC);
977 break;
978#ifdef CONFIG_EXT3_FS_XATTR
979 case Opt_user_xattr:
980 set_opt (sbi->s_mount_opt, XATTR_USER);
981 break;
982 case Opt_nouser_xattr:
983 clear_opt (sbi->s_mount_opt, XATTR_USER);
984 break;
985#else
986 case Opt_user_xattr:
987 case Opt_nouser_xattr:
4cf46b67
AF
988 ext3_msg(sb, KERN_INFO,
989 "(no)user_xattr options not supported");
1da177e4
LT
990 break;
991#endif
992#ifdef CONFIG_EXT3_FS_POSIX_ACL
993 case Opt_acl:
994 set_opt(sbi->s_mount_opt, POSIX_ACL);
995 break;
996 case Opt_noacl:
997 clear_opt(sbi->s_mount_opt, POSIX_ACL);
998 break;
999#else
1000 case Opt_acl:
1001 case Opt_noacl:
4cf46b67
AF
1002 ext3_msg(sb, KERN_INFO,
1003 "(no)acl options not supported");
1da177e4
LT
1004 break;
1005#endif
1006 case Opt_reservation:
1007 set_opt(sbi->s_mount_opt, RESERVATION);
1008 break;
1009 case Opt_noreservation:
1010 clear_opt(sbi->s_mount_opt, RESERVATION);
1011 break;
1012 case Opt_journal_update:
1013 /* @@@ FIXME */
1014 /* Eventually we will want to be able to create
1015 a journal file here. For now, only allow the
1016 user to specify an existing inode to be the
1017 journal file. */
1018 if (is_remount) {
4cf46b67
AF
1019 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1020 "journal on remount");
1da177e4
LT
1021 return 0;
1022 }
1023 set_opt (sbi->s_mount_opt, UPDATE_JOURNAL);
1024 break;
1025 case Opt_journal_inum:
1026 if (is_remount) {
4cf46b67
AF
1027 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1028 "journal on remount");
1da177e4
LT
1029 return 0;
1030 }
1031 if (match_int(&args[0], &option))
1032 return 0;
1033 *inum = option;
1034 break;
71b96257
JL
1035 case Opt_journal_dev:
1036 if (is_remount) {
4cf46b67
AF
1037 ext3_msg(sb, KERN_ERR, "error: cannot specify "
1038 "journal on remount");
71b96257
JL
1039 return 0;
1040 }
1041 if (match_int(&args[0], &option))
1042 return 0;
1043 *journal_devnum = option;
1044 break;
1da177e4
LT
1045 case Opt_noload:
1046 set_opt (sbi->s_mount_opt, NOLOAD);
1047 break;
1048 case Opt_commit:
1049 if (match_int(&args[0], &option))
1050 return 0;
1051 if (option < 0)
1052 return 0;
1053 if (option == 0)
1054 option = JBD_DEFAULT_MAX_COMMIT_AGE;
1055 sbi->s_commit_interval = HZ * option;
1056 break;
1057 case Opt_data_journal:
1058 data_opt = EXT3_MOUNT_JOURNAL_DATA;
1059 goto datacheck;
1060 case Opt_data_ordered:
1061 data_opt = EXT3_MOUNT_ORDERED_DATA;
1062 goto datacheck;
1063 case Opt_data_writeback:
1064 data_opt = EXT3_MOUNT_WRITEBACK_DATA;
1065 datacheck:
1066 if (is_remount) {
e3c96435 1067 if (test_opt(sb, DATA_FLAGS) == data_opt)
3c4cec65 1068 break;
4cf46b67
AF
1069 ext3_msg(sb, KERN_ERR,
1070 "error: cannot change "
3c4cec65
JK
1071 "data mode on remount. The filesystem "
1072 "is mounted in data=%s mode and you "
4cf46b67 1073 "try to remount it in data=%s mode.",
e3c96435
DM
1074 data_mode_string(test_opt(sb,
1075 DATA_FLAGS)),
3c4cec65
JK
1076 data_mode_string(data_opt));
1077 return 0;
1da177e4 1078 } else {
e3c96435 1079 clear_opt(sbi->s_mount_opt, DATA_FLAGS);
1da177e4
LT
1080 sbi->s_mount_opt |= data_opt;
1081 }
1082 break;
0e4fb5e2
HK
1083 case Opt_data_err_abort:
1084 set_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1085 break;
1086 case Opt_data_err_ignore:
1087 clear_opt(sbi->s_mount_opt, DATA_ERR_ABORT);
1088 break;
1da177e4
LT
1089#ifdef CONFIG_QUOTA
1090 case Opt_usrjquota:
1091 qtype = USRQUOTA;
1092 goto set_qf_name;
1093 case Opt_grpjquota:
1094 qtype = GRPQUOTA;
1095set_qf_name:
ee0d5ffe 1096 if (sb_any_quota_loaded(sb) &&
d06bf1d2 1097 !sbi->s_qf_names[qtype]) {
4cf46b67
AF
1098 ext3_msg(sb, KERN_ERR,
1099 "error: cannot change journaled "
1100 "quota options when quota turned on.");
1da177e4
LT
1101 return 0;
1102 }
1103 qname = match_strdup(&args[0]);
1104 if (!qname) {
4cf46b67
AF
1105 ext3_msg(sb, KERN_ERR,
1106 "error: not enough memory for "
1107 "storing quotafile name.");
1da177e4
LT
1108 return 0;
1109 }
1110 if (sbi->s_qf_names[qtype] &&
1111 strcmp(sbi->s_qf_names[qtype], qname)) {
4cf46b67
AF
1112 ext3_msg(sb, KERN_ERR,
1113 "error: %s quota file already "
1114 "specified.", QTYPE2NAME(qtype));
1da177e4
LT
1115 kfree(qname);
1116 return 0;
1117 }
1118 sbi->s_qf_names[qtype] = qname;
1119 if (strchr(sbi->s_qf_names[qtype], '/')) {
4cf46b67
AF
1120 ext3_msg(sb, KERN_ERR,
1121 "error: quotafile must be on "
1122 "filesystem root.");
1da177e4
LT
1123 kfree(sbi->s_qf_names[qtype]);
1124 sbi->s_qf_names[qtype] = NULL;
1125 return 0;
1126 }
1f54587b 1127 set_opt(sbi->s_mount_opt, QUOTA);
1da177e4
LT
1128 break;
1129 case Opt_offusrjquota:
1130 qtype = USRQUOTA;
1131 goto clear_qf_name;
1132 case Opt_offgrpjquota:
1133 qtype = GRPQUOTA;
1134clear_qf_name:
ee0d5ffe 1135 if (sb_any_quota_loaded(sb) &&
d06bf1d2 1136 sbi->s_qf_names[qtype]) {
4cf46b67 1137 ext3_msg(sb, KERN_ERR, "error: cannot change "
99aeaf63 1138 "journaled quota options when "
4cf46b67 1139 "quota turned on.");
1da177e4
LT
1140 return 0;
1141 }
08c6a96f
JK
1142 /*
1143 * The space will be released later when all options
1144 * are confirmed to be correct
1145 */
1da177e4
LT
1146 sbi->s_qf_names[qtype] = NULL;
1147 break;
1148 case Opt_jqfmt_vfsold:
d06bf1d2
JK
1149 qfmt = QFMT_VFS_OLD;
1150 goto set_qf_format;
1da177e4 1151 case Opt_jqfmt_vfsv0:
d06bf1d2 1152 qfmt = QFMT_VFS_V0;
1aeec434
JK
1153 goto set_qf_format;
1154 case Opt_jqfmt_vfsv1:
1155 qfmt = QFMT_VFS_V1;
d06bf1d2 1156set_qf_format:
ee0d5ffe 1157 if (sb_any_quota_loaded(sb) &&
d06bf1d2 1158 sbi->s_jquota_fmt != qfmt) {
4cf46b67 1159 ext3_msg(sb, KERN_ERR, "error: cannot change "
d06bf1d2 1160 "journaled quota options when "
4cf46b67 1161 "quota turned on.");
d06bf1d2
JK
1162 return 0;
1163 }
1164 sbi->s_jquota_fmt = qfmt;
1da177e4 1165 break;
1f54587b 1166 case Opt_quota:
8fc2751b
MB
1167 case Opt_usrquota:
1168 set_opt(sbi->s_mount_opt, QUOTA);
1169 set_opt(sbi->s_mount_opt, USRQUOTA);
1170 break;
1171 case Opt_grpquota:
1f54587b 1172 set_opt(sbi->s_mount_opt, QUOTA);
8fc2751b 1173 set_opt(sbi->s_mount_opt, GRPQUOTA);
1f54587b
JK
1174 break;
1175 case Opt_noquota:
ee0d5ffe 1176 if (sb_any_quota_loaded(sb)) {
4cf46b67
AF
1177 ext3_msg(sb, KERN_ERR, "error: cannot change "
1178 "quota options when quota turned on.");
1f54587b
JK
1179 return 0;
1180 }
1181 clear_opt(sbi->s_mount_opt, QUOTA);
8fc2751b
MB
1182 clear_opt(sbi->s_mount_opt, USRQUOTA);
1183 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1f54587b 1184 break;
1da177e4 1185#else
8fc2751b
MB
1186 case Opt_quota:
1187 case Opt_usrquota:
1188 case Opt_grpquota:
4cf46b67
AF
1189 ext3_msg(sb, KERN_ERR,
1190 "error: quota options not supported.");
fa1ff1e0 1191 break;
1da177e4
LT
1192 case Opt_usrjquota:
1193 case Opt_grpjquota:
1194 case Opt_offusrjquota:
1195 case Opt_offgrpjquota:
1196 case Opt_jqfmt_vfsold:
1197 case Opt_jqfmt_vfsv0:
1aeec434 1198 case Opt_jqfmt_vfsv1:
4cf46b67
AF
1199 ext3_msg(sb, KERN_ERR,
1200 "error: journaled quota options not "
1201 "supported.");
1da177e4 1202 break;
1f54587b
JK
1203 case Opt_noquota:
1204 break;
1da177e4
LT
1205#endif
1206 case Opt_abort:
1207 set_opt(sbi->s_mount_opt, ABORT);
1208 break;
1209 case Opt_barrier:
1210 if (match_int(&args[0], &option))
1211 return 0;
1212 if (option)
1213 set_opt(sbi->s_mount_opt, BARRIER);
1214 else
1215 clear_opt(sbi->s_mount_opt, BARRIER);
1216 break;
1217 case Opt_ignore:
1218 break;
1219 case Opt_resize:
08c6a96f 1220 if (!is_remount) {
4cf46b67
AF
1221 ext3_msg(sb, KERN_ERR,
1222 "error: resize option only available "
1223 "for remount");
1da177e4
LT
1224 return 0;
1225 }
c7f1721e
KZ
1226 if (match_int(&args[0], &option) != 0)
1227 return 0;
1da177e4
LT
1228 *n_blocks_count = option;
1229 break;
1230 case Opt_nobh:
1231 set_opt(sbi->s_mount_opt, NOBH);
1232 break;
ade1a29e
BP
1233 case Opt_bh:
1234 clear_opt(sbi->s_mount_opt, NOBH);
1235 break;
1da177e4 1236 default:
4cf46b67
AF
1237 ext3_msg(sb, KERN_ERR,
1238 "error: unrecognized mount option \"%s\" "
1239 "or missing value", p);
1da177e4
LT
1240 return 0;
1241 }
1242 }
1243#ifdef CONFIG_QUOTA
8fc2751b 1244 if (sbi->s_qf_names[USRQUOTA] || sbi->s_qf_names[GRPQUOTA]) {
e3c96435 1245 if (test_opt(sb, USRQUOTA) && sbi->s_qf_names[USRQUOTA])
8fc2751b 1246 clear_opt(sbi->s_mount_opt, USRQUOTA);
e3c96435 1247 if (test_opt(sb, GRPQUOTA) && sbi->s_qf_names[GRPQUOTA])
8fc2751b
MB
1248 clear_opt(sbi->s_mount_opt, GRPQUOTA);
1249
e3c96435
DM
1250 if ((sbi->s_qf_names[USRQUOTA] && test_opt(sb, GRPQUOTA)) ||
1251 (sbi->s_qf_names[GRPQUOTA] && test_opt(sb, USRQUOTA))) {
4cf46b67
AF
1252 ext3_msg(sb, KERN_ERR, "error: old and new quota "
1253 "format mixing.");
8fc2751b
MB
1254 return 0;
1255 }
1256
1257 if (!sbi->s_jquota_fmt) {
4cf46b67
AF
1258 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
1259 "not specified.");
8fc2751b
MB
1260 return 0;
1261 }
1262 } else {
1263 if (sbi->s_jquota_fmt) {
4cf46b67 1264 ext3_msg(sb, KERN_ERR, "error: journaled quota format "
99aeaf63 1265 "specified with no journaling "
4cf46b67 1266 "enabled.");
8fc2751b
MB
1267 return 0;
1268 }
1da177e4
LT
1269 }
1270#endif
1da177e4
LT
1271 return 1;
1272}
1273
1274static int ext3_setup_super(struct super_block *sb, struct ext3_super_block *es,
1275 int read_only)
1276{
1277 struct ext3_sb_info *sbi = EXT3_SB(sb);
1278 int res = 0;
1279
1280 if (le32_to_cpu(es->s_rev_level) > EXT3_MAX_SUPP_REV) {
4cf46b67
AF
1281 ext3_msg(sb, KERN_ERR,
1282 "error: revision level too high, "
1283 "forcing read-only mode");
1da177e4
LT
1284 res = MS_RDONLY;
1285 }
1286 if (read_only)
1287 return res;
1288 if (!(sbi->s_mount_state & EXT3_VALID_FS))
4cf46b67
AF
1289 ext3_msg(sb, KERN_WARNING,
1290 "warning: mounting unchecked fs, "
1291 "running e2fsck is recommended");
1da177e4 1292 else if ((sbi->s_mount_state & EXT3_ERROR_FS))
4cf46b67
AF
1293 ext3_msg(sb, KERN_WARNING,
1294 "warning: mounting fs with errors, "
1295 "running e2fsck is recommended");
1da177e4
LT
1296 else if ((__s16) le16_to_cpu(es->s_max_mnt_count) >= 0 &&
1297 le16_to_cpu(es->s_mnt_count) >=
1298 (unsigned short) (__s16) le16_to_cpu(es->s_max_mnt_count))
4cf46b67
AF
1299 ext3_msg(sb, KERN_WARNING,
1300 "warning: maximal mount count reached, "
1301 "running e2fsck is recommended");
1da177e4
LT
1302 else if (le32_to_cpu(es->s_checkinterval) &&
1303 (le32_to_cpu(es->s_lastcheck) +
1304 le32_to_cpu(es->s_checkinterval) <= get_seconds()))
4cf46b67
AF
1305 ext3_msg(sb, KERN_WARNING,
1306 "warning: checktime reached, "
1307 "running e2fsck is recommended");
1da177e4
LT
1308#if 0
1309 /* @@@ We _will_ want to clear the valid bit if we find
1310 inconsistencies, to force a fsck at reboot. But for
1311 a plain journaled filesystem we can keep it set as
1312 valid forever! :) */
e7f23ebd 1313 es->s_state &= cpu_to_le16(~EXT3_VALID_FS);
1da177e4
LT
1314#endif
1315 if (!(__s16) le16_to_cpu(es->s_max_mnt_count))
1316 es->s_max_mnt_count = cpu_to_le16(EXT3_DFL_MAX_MNT_COUNT);
50e8a289 1317 le16_add_cpu(&es->s_mnt_count, 1);
1da177e4
LT
1318 es->s_mtime = cpu_to_le32(get_seconds());
1319 ext3_update_dynamic_rev(sb);
1320 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
1321
1322 ext3_commit_super(sb, es, 1);
1323 if (test_opt(sb, DEBUG))
4cf46b67
AF
1324 ext3_msg(sb, KERN_INFO, "[bs=%lu, gc=%lu, "
1325 "bpg=%lu, ipg=%lu, mo=%04lx]",
1da177e4
LT
1326 sb->s_blocksize,
1327 sbi->s_groups_count,
1328 EXT3_BLOCKS_PER_GROUP(sb),
1329 EXT3_INODES_PER_GROUP(sb),
1330 sbi->s_mount_opt);
1331
1da177e4
LT
1332 if (EXT3_SB(sb)->s_journal->j_inode == NULL) {
1333 char b[BDEVNAME_SIZE];
4cf46b67 1334 ext3_msg(sb, KERN_INFO, "using external journal on %s",
1da177e4
LT
1335 bdevname(EXT3_SB(sb)->s_journal->j_dev, b));
1336 } else {
4cf46b67 1337 ext3_msg(sb, KERN_INFO, "using internal journal");
1da177e4 1338 }
1da177e4
LT
1339 return res;
1340}
1341
1342/* Called at mount-time, super-block is locked */
197cd65a 1343static int ext3_check_descriptors(struct super_block *sb)
1da177e4
LT
1344{
1345 struct ext3_sb_info *sbi = EXT3_SB(sb);
1da177e4
LT
1346 int i;
1347
1348 ext3_debug ("Checking group descriptors");
1349
197cd65a
AM
1350 for (i = 0; i < sbi->s_groups_count; i++) {
1351 struct ext3_group_desc *gdp = ext3_get_group_desc(sb, i, NULL);
1eaafeae
AM
1352 ext3_fsblk_t first_block = ext3_group_first_block_no(sb, i);
1353 ext3_fsblk_t last_block;
197cd65a 1354
855565e8
ES
1355 if (i == sbi->s_groups_count - 1)
1356 last_block = le32_to_cpu(sbi->s_es->s_blocks_count) - 1;
1357 else
1358 last_block = first_block +
1359 (EXT3_BLOCKS_PER_GROUP(sb) - 1);
1360
855565e8
ES
1361 if (le32_to_cpu(gdp->bg_block_bitmap) < first_block ||
1362 le32_to_cpu(gdp->bg_block_bitmap) > last_block)
1da177e4
LT
1363 {
1364 ext3_error (sb, "ext3_check_descriptors",
1365 "Block bitmap for group %d"
1366 " not in group (block %lu)!",
1367 i, (unsigned long)
1368 le32_to_cpu(gdp->bg_block_bitmap));
1369 return 0;
1370 }
855565e8
ES
1371 if (le32_to_cpu(gdp->bg_inode_bitmap) < first_block ||
1372 le32_to_cpu(gdp->bg_inode_bitmap) > last_block)
1da177e4
LT
1373 {
1374 ext3_error (sb, "ext3_check_descriptors",
1375 "Inode bitmap for group %d"
1376 " not in group (block %lu)!",
1377 i, (unsigned long)
1378 le32_to_cpu(gdp->bg_inode_bitmap));
1379 return 0;
1380 }
855565e8 1381 if (le32_to_cpu(gdp->bg_inode_table) < first_block ||
780dcdb2 1382 le32_to_cpu(gdp->bg_inode_table) + sbi->s_itb_per_group - 1 >
855565e8 1383 last_block)
1da177e4
LT
1384 {
1385 ext3_error (sb, "ext3_check_descriptors",
1386 "Inode table for group %d"
1387 " not in group (block %lu)!",
1388 i, (unsigned long)
1389 le32_to_cpu(gdp->bg_inode_table));
1390 return 0;
1391 }
1da177e4
LT
1392 }
1393
1394 sbi->s_es->s_free_blocks_count=cpu_to_le32(ext3_count_free_blocks(sb));
1395 sbi->s_es->s_free_inodes_count=cpu_to_le32(ext3_count_free_inodes(sb));
1396 return 1;
1397}
1398
1399
1400/* ext3_orphan_cleanup() walks a singly-linked list of inodes (starting at
1401 * the superblock) which were deleted from all directories, but held open by
1402 * a process at the time of a crash. We walk the list and try to delete these
1403 * inodes at recovery time (only with a read-write filesystem).
1404 *
1405 * In order to keep the orphan inode chain consistent during traversal (in
1406 * case of crash during recovery), we link each inode into the superblock
1407 * orphan list_head and handle it the same way as an inode deletion during
1408 * normal operation (which journals the operations for us).
1409 *
1410 * We only do an iget() and an iput() on each inode, which is very safe if we
1411 * accidentally point at an in-use or already deleted inode. The worst that
1412 * can happen in this case is that we get a "bit already cleared" message from
1413 * ext3_free_inode(). The only reason we would point at a wrong inode is if
1414 * e2fsck was run on this filesystem, and it must have already done the orphan
1415 * inode cleanup for us, so we can safely abort without any further action.
1416 */
1417static void ext3_orphan_cleanup (struct super_block * sb,
1418 struct ext3_super_block * es)
1419{
1420 unsigned int s_flags = sb->s_flags;
1421 int nr_orphans = 0, nr_truncates = 0;
1422#ifdef CONFIG_QUOTA
1423 int i;
1424#endif
1425 if (!es->s_last_orphan) {
1426 jbd_debug(4, "no orphan inodes to clean up\n");
1427 return;
1428 }
1429
a8f48a95 1430 if (bdev_read_only(sb->s_bdev)) {
4cf46b67
AF
1431 ext3_msg(sb, KERN_ERR, "error: write access "
1432 "unavailable, skipping orphan cleanup.");
a8f48a95
ES
1433 return;
1434 }
1435
1da177e4
LT
1436 if (EXT3_SB(sb)->s_mount_state & EXT3_ERROR_FS) {
1437 if (es->s_last_orphan)
1438 jbd_debug(1, "Errors on filesystem, "
1439 "clearing orphan list.\n");
1440 es->s_last_orphan = 0;
1441 jbd_debug(1, "Skipping orphan recovery on fs with errors.\n");
1442 return;
1443 }
1444
1445 if (s_flags & MS_RDONLY) {
4cf46b67 1446 ext3_msg(sb, KERN_INFO, "orphan cleanup on readonly fs");
1da177e4
LT
1447 sb->s_flags &= ~MS_RDONLY;
1448 }
1449#ifdef CONFIG_QUOTA
1450 /* Needed for iput() to work correctly and not trash data */
1451 sb->s_flags |= MS_ACTIVE;
1452 /* Turn on quotas so that they are updated correctly */
1453 for (i = 0; i < MAXQUOTAS; i++) {
1454 if (EXT3_SB(sb)->s_qf_names[i]) {
1455 int ret = ext3_quota_on_mount(sb, i);
1456 if (ret < 0)
4cf46b67
AF
1457 ext3_msg(sb, KERN_ERR,
1458 "error: cannot turn on journaled "
1459 "quota: %d", ret);
1da177e4
LT
1460 }
1461 }
1462#endif
1463
1464 while (es->s_last_orphan) {
1465 struct inode *inode;
1466
473043dc
DH
1467 inode = ext3_orphan_get(sb, le32_to_cpu(es->s_last_orphan));
1468 if (IS_ERR(inode)) {
1da177e4
LT
1469 es->s_last_orphan = 0;
1470 break;
1471 }
1472
1473 list_add(&EXT3_I(inode)->i_orphan, &EXT3_SB(sb)->s_orphan);
81a05227 1474 vfs_dq_init(inode);
1da177e4
LT
1475 if (inode->i_nlink) {
1476 printk(KERN_DEBUG
eee194e7 1477 "%s: truncating inode %lu to %Ld bytes\n",
e05b6b52 1478 __func__, inode->i_ino, inode->i_size);
eee194e7 1479 jbd_debug(2, "truncating inode %lu to %Ld bytes\n",
1da177e4
LT
1480 inode->i_ino, inode->i_size);
1481 ext3_truncate(inode);
1482 nr_truncates++;
1483 } else {
1484 printk(KERN_DEBUG
eee194e7 1485 "%s: deleting unreferenced inode %lu\n",
e05b6b52 1486 __func__, inode->i_ino);
eee194e7 1487 jbd_debug(2, "deleting unreferenced inode %lu\n",
1da177e4
LT
1488 inode->i_ino);
1489 nr_orphans++;
1490 }
1491 iput(inode); /* The delete magic happens here! */
1492 }
1493
1494#define PLURAL(x) (x), ((x)==1) ? "" : "s"
1495
1496 if (nr_orphans)
4cf46b67
AF
1497 ext3_msg(sb, KERN_INFO, "%d orphan inode%s deleted",
1498 PLURAL(nr_orphans));
1da177e4 1499 if (nr_truncates)
4cf46b67
AF
1500 ext3_msg(sb, KERN_INFO, "%d truncate%s cleaned up",
1501 PLURAL(nr_truncates));
1da177e4
LT
1502#ifdef CONFIG_QUOTA
1503 /* Turn quotas off */
1504 for (i = 0; i < MAXQUOTAS; i++) {
1505 if (sb_dqopt(sb)->files[i])
2fd83a4f 1506 vfs_quota_off(sb, i, 0);
1da177e4
LT
1507 }
1508#endif
1509 sb->s_flags = s_flags; /* Restore MS_RDONLY status */
1510}
1511
1da177e4
LT
1512/*
1513 * Maximal file size. There is a direct, and {,double-,triple-}indirect
1514 * block limit, and also a limit of (2^32 - 1) 512-byte sectors in i_blocks.
1515 * We need to be 1 filesystem block less than the 2^32 sector limit.
1516 */
1517static loff_t ext3_max_size(int bits)
1518{
1519 loff_t res = EXT3_NDIR_BLOCKS;
fe7fdc37
AK
1520 int meta_blocks;
1521 loff_t upper_limit;
1522
1523 /* This is calculated to be the largest file size for a
1524 * dense, file such that the total number of
1da177e4 1525 * sectors in the file, including data and all indirect blocks,
fe7fdc37
AK
1526 * does not exceed 2^32 -1
1527 * __u32 i_blocks representing the total number of
1528 * 512 bytes blocks of the file
1529 */
1530 upper_limit = (1LL << 32) - 1;
1531
1532 /* total blocks in file system block size */
1533 upper_limit >>= (bits - 9);
1534
1535
1536 /* indirect blocks */
1537 meta_blocks = 1;
1538 /* double indirect blocks */
1539 meta_blocks += 1 + (1LL << (bits-2));
1540 /* tripple indirect blocks */
1541 meta_blocks += 1 + (1LL << (bits-2)) + (1LL << (2*(bits-2)));
1542
1543 upper_limit -= meta_blocks;
1544 upper_limit <<= bits;
1da177e4
LT
1545
1546 res += 1LL << (bits-2);
1547 res += 1LL << (2*(bits-2));
1548 res += 1LL << (3*(bits-2));
1549 res <<= bits;
1550 if (res > upper_limit)
1551 res = upper_limit;
fe7fdc37
AK
1552
1553 if (res > MAX_LFS_FILESIZE)
1554 res = MAX_LFS_FILESIZE;
1555
1da177e4
LT
1556 return res;
1557}
1558
43d23f90
MC
1559static ext3_fsblk_t descriptor_loc(struct super_block *sb,
1560 ext3_fsblk_t logic_sb_block,
1da177e4
LT
1561 int nr)
1562{
1563 struct ext3_sb_info *sbi = EXT3_SB(sb);
43d23f90 1564 unsigned long bg, first_meta_bg;
1da177e4
LT
1565 int has_super = 0;
1566
1da177e4
LT
1567 first_meta_bg = le32_to_cpu(sbi->s_es->s_first_meta_bg);
1568
1569 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_META_BG) ||
1570 nr < first_meta_bg)
1571 return (logic_sb_block + nr + 1);
1572 bg = sbi->s_desc_per_block * nr;
1573 if (ext3_bg_has_super(sb, bg))
1574 has_super = 1;
43d23f90 1575 return (has_super + ext3_group_first_block_no(sb, bg));
1da177e4
LT
1576}
1577
1578
1579static int ext3_fill_super (struct super_block *sb, void *data, int silent)
1580{
1581 struct buffer_head * bh;
1582 struct ext3_super_block *es = NULL;
1583 struct ext3_sb_info *sbi;
43d23f90 1584 ext3_fsblk_t block;
4cf46b67 1585 ext3_fsblk_t sb_block = get_sb_block(&data, sb);
43d23f90 1586 ext3_fsblk_t logic_sb_block;
1da177e4 1587 unsigned long offset = 0;
eee194e7 1588 unsigned int journal_inum = 0;
71b96257 1589 unsigned long journal_devnum = 0;
1da177e4
LT
1590 unsigned long def_mount_opts;
1591 struct inode *root;
1592 int blocksize;
1593 int hblock;
1594 int db_count;
1595 int i;
1596 int needs_recovery;
473043dc 1597 int ret = -EINVAL;
1da177e4 1598 __le32 features;
833f4077 1599 int err;
1da177e4 1600
f8314dc6 1601 sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
1da177e4
LT
1602 if (!sbi)
1603 return -ENOMEM;
5df096d6
PE
1604
1605 sbi->s_blockgroup_lock =
1606 kzalloc(sizeof(struct blockgroup_lock), GFP_KERNEL);
1607 if (!sbi->s_blockgroup_lock) {
1608 kfree(sbi);
1609 return -ENOMEM;
1610 }
1da177e4 1611 sb->s_fs_info = sbi;
1da177e4
LT
1612 sbi->s_mount_opt = 0;
1613 sbi->s_resuid = EXT3_DEF_RESUID;
1614 sbi->s_resgid = EXT3_DEF_RESGID;
571beed8 1615 sbi->s_sb_block = sb_block;
1da177e4
LT
1616
1617 unlock_kernel();
1618
1619 blocksize = sb_min_blocksize(sb, EXT3_MIN_BLOCK_SIZE);
1620 if (!blocksize) {
4cf46b67 1621 ext3_msg(sb, KERN_ERR, "error: unable to set blocksize");
1da177e4
LT
1622 goto out_fail;
1623 }
1624
1625 /*
1626 * The ext3 superblock will not be buffer aligned for other than 1kB
1627 * block sizes. We need to calculate the offset from buffer start.
1628 */
1629 if (blocksize != EXT3_MIN_BLOCK_SIZE) {
1630 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1631 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1632 } else {
1633 logic_sb_block = sb_block;
1634 }
1635
1636 if (!(bh = sb_bread(sb, logic_sb_block))) {
4cf46b67 1637 ext3_msg(sb, KERN_ERR, "error: unable to read superblock");
1da177e4
LT
1638 goto out_fail;
1639 }
1640 /*
1641 * Note: s_es must be initialized as soon as possible because
1642 * some ext3 macro-instructions depend on its value
1643 */
1644 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
1645 sbi->s_es = es;
1646 sb->s_magic = le16_to_cpu(es->s_magic);
1647 if (sb->s_magic != EXT3_SUPER_MAGIC)
1648 goto cantfind_ext3;
1649
1650 /* Set defaults before we parse the mount options */
1651 def_mount_opts = le32_to_cpu(es->s_default_mount_opts);
1652 if (def_mount_opts & EXT3_DEFM_DEBUG)
1653 set_opt(sbi->s_mount_opt, DEBUG);
1654 if (def_mount_opts & EXT3_DEFM_BSDGROUPS)
1655 set_opt(sbi->s_mount_opt, GRPID);
1656 if (def_mount_opts & EXT3_DEFM_UID16)
1657 set_opt(sbi->s_mount_opt, NO_UID32);
2e7842b8 1658#ifdef CONFIG_EXT3_FS_XATTR
1da177e4
LT
1659 if (def_mount_opts & EXT3_DEFM_XATTR_USER)
1660 set_opt(sbi->s_mount_opt, XATTR_USER);
2e7842b8
HD
1661#endif
1662#ifdef CONFIG_EXT3_FS_POSIX_ACL
1da177e4
LT
1663 if (def_mount_opts & EXT3_DEFM_ACL)
1664 set_opt(sbi->s_mount_opt, POSIX_ACL);
2e7842b8 1665#endif
1da177e4 1666 if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_DATA)
e3c96435 1667 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1da177e4 1668 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_ORDERED)
e3c96435 1669 set_opt(sbi->s_mount_opt, ORDERED_DATA);
1da177e4 1670 else if ((def_mount_opts & EXT3_DEFM_JMODE) == EXT3_DEFM_JMODE_WBACK)
e3c96435 1671 set_opt(sbi->s_mount_opt, WRITEBACK_DATA);
1da177e4
LT
1672
1673 if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_PANIC)
1674 set_opt(sbi->s_mount_opt, ERRORS_PANIC);
1eca93f9 1675 else if (le16_to_cpu(sbi->s_es->s_errors) == EXT3_ERRORS_CONTINUE)
2245d7c2 1676 set_opt(sbi->s_mount_opt, ERRORS_CONT);
1eca93f9
AK
1677 else
1678 set_opt(sbi->s_mount_opt, ERRORS_RO);
1da177e4
LT
1679
1680 sbi->s_resuid = le16_to_cpu(es->s_def_resuid);
1681 sbi->s_resgid = le16_to_cpu(es->s_def_resgid);
1682
1683 set_opt(sbi->s_mount_opt, RESERVATION);
1684
71b96257
JL
1685 if (!parse_options ((char *) data, sb, &journal_inum, &journal_devnum,
1686 NULL, 0))
1da177e4
LT
1687 goto failed_mount;
1688
1689 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
e3c96435 1690 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
1da177e4
LT
1691
1692 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV &&
1693 (EXT3_HAS_COMPAT_FEATURE(sb, ~0U) ||
1694 EXT3_HAS_RO_COMPAT_FEATURE(sb, ~0U) ||
1695 EXT3_HAS_INCOMPAT_FEATURE(sb, ~0U)))
4cf46b67
AF
1696 ext3_msg(sb, KERN_WARNING,
1697 "warning: feature flags set on rev 0 fs, "
1698 "running e2fsck is recommended");
1da177e4
LT
1699 /*
1700 * Check feature flags regardless of the revision level, since we
1701 * previously didn't change the revision level when setting the flags,
1702 * so there is a chance incompat flags are set on a rev 0 filesystem.
1703 */
1704 features = EXT3_HAS_INCOMPAT_FEATURE(sb, ~EXT3_FEATURE_INCOMPAT_SUPP);
1705 if (features) {
4cf46b67
AF
1706 ext3_msg(sb, KERN_ERR,
1707 "error: couldn't mount because of unsupported "
1708 "optional features (%x)", le32_to_cpu(features));
1da177e4
LT
1709 goto failed_mount;
1710 }
1711 features = EXT3_HAS_RO_COMPAT_FEATURE(sb, ~EXT3_FEATURE_RO_COMPAT_SUPP);
1712 if (!(sb->s_flags & MS_RDONLY) && features) {
4cf46b67
AF
1713 ext3_msg(sb, KERN_ERR,
1714 "error: couldn't mount RDWR because of unsupported "
1715 "optional features (%x)", le32_to_cpu(features));
1da177e4
LT
1716 goto failed_mount;
1717 }
1718 blocksize = BLOCK_SIZE << le32_to_cpu(es->s_log_block_size);
1719
1720 if (blocksize < EXT3_MIN_BLOCK_SIZE ||
1721 blocksize > EXT3_MAX_BLOCK_SIZE) {
4cf46b67
AF
1722 ext3_msg(sb, KERN_ERR,
1723 "error: couldn't mount because of unsupported "
1724 "filesystem blocksize %d", blocksize);
1da177e4
LT
1725 goto failed_mount;
1726 }
1727
e1defc4f 1728 hblock = bdev_logical_block_size(sb->s_bdev);
1da177e4
LT
1729 if (sb->s_blocksize != blocksize) {
1730 /*
1731 * Make sure the blocksize for the filesystem is larger
1732 * than the hardware sectorsize for the machine.
1733 */
1734 if (blocksize < hblock) {
4cf46b67
AF
1735 ext3_msg(sb, KERN_ERR,
1736 "error: fsblocksize %d too small for "
1737 "hardware sectorsize %d", blocksize, hblock);
1da177e4
LT
1738 goto failed_mount;
1739 }
1740
1741 brelse (bh);
0f0a89eb 1742 if (!sb_set_blocksize(sb, blocksize)) {
4cf46b67
AF
1743 ext3_msg(sb, KERN_ERR,
1744 "error: bad blocksize %d", blocksize);
0f0a89eb
TS
1745 goto out_fail;
1746 }
1da177e4
LT
1747 logic_sb_block = (sb_block * EXT3_MIN_BLOCK_SIZE) / blocksize;
1748 offset = (sb_block * EXT3_MIN_BLOCK_SIZE) % blocksize;
1749 bh = sb_bread(sb, logic_sb_block);
1750 if (!bh) {
4cf46b67
AF
1751 ext3_msg(sb, KERN_ERR,
1752 "error: can't read superblock on 2nd try");
1da177e4
LT
1753 goto failed_mount;
1754 }
1755 es = (struct ext3_super_block *)(((char *)bh->b_data) + offset);
1756 sbi->s_es = es;
1757 if (es->s_magic != cpu_to_le16(EXT3_SUPER_MAGIC)) {
4cf46b67
AF
1758 ext3_msg(sb, KERN_ERR,
1759 "error: magic mismatch");
1da177e4
LT
1760 goto failed_mount;
1761 }
1762 }
1763
1764 sb->s_maxbytes = ext3_max_size(sb->s_blocksize_bits);
1765
1766 if (le32_to_cpu(es->s_rev_level) == EXT3_GOOD_OLD_REV) {
1767 sbi->s_inode_size = EXT3_GOOD_OLD_INODE_SIZE;
1768 sbi->s_first_ino = EXT3_GOOD_OLD_FIRST_INO;
1769 } else {
1770 sbi->s_inode_size = le16_to_cpu(es->s_inode_size);
1771 sbi->s_first_ino = le32_to_cpu(es->s_first_ino);
1772 if ((sbi->s_inode_size < EXT3_GOOD_OLD_INODE_SIZE) ||
3fc74269 1773 (!is_power_of_2(sbi->s_inode_size)) ||
1da177e4 1774 (sbi->s_inode_size > blocksize)) {
4cf46b67
AF
1775 ext3_msg(sb, KERN_ERR,
1776 "error: unsupported inode size: %d",
1da177e4
LT
1777 sbi->s_inode_size);
1778 goto failed_mount;
1779 }
1780 }
1781 sbi->s_frag_size = EXT3_MIN_FRAG_SIZE <<
1782 le32_to_cpu(es->s_log_frag_size);
1783 if (blocksize != sbi->s_frag_size) {
4cf46b67
AF
1784 ext3_msg(sb, KERN_ERR,
1785 "error: fragsize %lu != blocksize %u (unsupported)",
1da177e4
LT
1786 sbi->s_frag_size, blocksize);
1787 goto failed_mount;
1788 }
1789 sbi->s_frags_per_block = 1;
1790 sbi->s_blocks_per_group = le32_to_cpu(es->s_blocks_per_group);
1791 sbi->s_frags_per_group = le32_to_cpu(es->s_frags_per_group);
1792 sbi->s_inodes_per_group = le32_to_cpu(es->s_inodes_per_group);
b47b6f38 1793 if (EXT3_INODE_SIZE(sb) == 0 || EXT3_INODES_PER_GROUP(sb) == 0)
1da177e4
LT
1794 goto cantfind_ext3;
1795 sbi->s_inodes_per_block = blocksize / EXT3_INODE_SIZE(sb);
1796 if (sbi->s_inodes_per_block == 0)
1797 goto cantfind_ext3;
1798 sbi->s_itb_per_group = sbi->s_inodes_per_group /
1799 sbi->s_inodes_per_block;
1800 sbi->s_desc_per_block = blocksize / sizeof(struct ext3_group_desc);
1801 sbi->s_sbh = bh;
1802 sbi->s_mount_state = le16_to_cpu(es->s_state);
f0d1b0b3
DH
1803 sbi->s_addr_per_block_bits = ilog2(EXT3_ADDR_PER_BLOCK(sb));
1804 sbi->s_desc_per_block_bits = ilog2(EXT3_DESC_PER_BLOCK(sb));
1da177e4
LT
1805 for (i=0; i < 4; i++)
1806 sbi->s_hash_seed[i] = le32_to_cpu(es->s_hash_seed[i]);
1807 sbi->s_def_hash_version = es->s_def_hash_version;
5e1f8c9e
TT
1808 i = le32_to_cpu(es->s_flags);
1809 if (i & EXT2_FLAGS_UNSIGNED_HASH)
1810 sbi->s_hash_unsigned = 3;
1811 else if ((i & EXT2_FLAGS_SIGNED_HASH) == 0) {
1812#ifdef __CHAR_UNSIGNED__
1813 es->s_flags |= cpu_to_le32(EXT2_FLAGS_UNSIGNED_HASH);
1814 sbi->s_hash_unsigned = 3;
1815#else
1816 es->s_flags |= cpu_to_le32(EXT2_FLAGS_SIGNED_HASH);
1817#endif
5e1f8c9e 1818 }
1da177e4
LT
1819
1820 if (sbi->s_blocks_per_group > blocksize * 8) {
4cf46b67
AF
1821 ext3_msg(sb, KERN_ERR,
1822 "#blocks per group too big: %lu",
1da177e4
LT
1823 sbi->s_blocks_per_group);
1824 goto failed_mount;
1825 }
1826 if (sbi->s_frags_per_group > blocksize * 8) {
4cf46b67
AF
1827 ext3_msg(sb, KERN_ERR,
1828 "error: #fragments per group too big: %lu",
1da177e4
LT
1829 sbi->s_frags_per_group);
1830 goto failed_mount;
1831 }
1832 if (sbi->s_inodes_per_group > blocksize * 8) {
4cf46b67
AF
1833 ext3_msg(sb, KERN_ERR,
1834 "error: #inodes per group too big: %lu",
1da177e4
LT
1835 sbi->s_inodes_per_group);
1836 goto failed_mount;
1837 }
1838
fcd5df35
MC
1839 if (le32_to_cpu(es->s_blocks_count) >
1840 (sector_t)(~0ULL) >> (sb->s_blocksize_bits - 9)) {
4cf46b67
AF
1841 ext3_msg(sb, KERN_ERR,
1842 "error: filesystem is too large to mount safely");
fcd5df35 1843 if (sizeof(sector_t) < 8)
4cf46b67
AF
1844 ext3_msg(sb, KERN_ERR,
1845 "error: CONFIG_LBDAF not enabled");
fcd5df35
MC
1846 goto failed_mount;
1847 }
1848
1da177e4
LT
1849 if (EXT3_BLOCKS_PER_GROUP(sb) == 0)
1850 goto cantfind_ext3;
855565e8
ES
1851 sbi->s_groups_count = ((le32_to_cpu(es->s_blocks_count) -
1852 le32_to_cpu(es->s_first_data_block) - 1)
1853 / EXT3_BLOCKS_PER_GROUP(sb)) + 1;
1da177e4
LT
1854 db_count = (sbi->s_groups_count + EXT3_DESC_PER_BLOCK(sb) - 1) /
1855 EXT3_DESC_PER_BLOCK(sb);
1856 sbi->s_group_desc = kmalloc(db_count * sizeof (struct buffer_head *),
1857 GFP_KERNEL);
1858 if (sbi->s_group_desc == NULL) {
4cf46b67
AF
1859 ext3_msg(sb, KERN_ERR,
1860 "error: not enough memory");
1da177e4
LT
1861 goto failed_mount;
1862 }
1863
5df096d6 1864 bgl_lock_init(sbi->s_blockgroup_lock);
1da177e4
LT
1865
1866 for (i = 0; i < db_count; i++) {
1867 block = descriptor_loc(sb, logic_sb_block, i);
1868 sbi->s_group_desc[i] = sb_bread(sb, block);
1869 if (!sbi->s_group_desc[i]) {
4cf46b67
AF
1870 ext3_msg(sb, KERN_ERR,
1871 "error: can't read group descriptor %d", i);
1da177e4
LT
1872 db_count = i;
1873 goto failed_mount2;
1874 }
1875 }
1876 if (!ext3_check_descriptors (sb)) {
4cf46b67
AF
1877 ext3_msg(sb, KERN_ERR,
1878 "error: group descriptors corrupted");
1da177e4
LT
1879 goto failed_mount2;
1880 }
1881 sbi->s_gdb_count = db_count;
1882 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
1883 spin_lock_init(&sbi->s_next_gen_lock);
0216bfcf 1884
833f4077
PZ
1885 err = percpu_counter_init(&sbi->s_freeblocks_counter,
1886 ext3_count_free_blocks(sb));
1887 if (!err) {
1888 err = percpu_counter_init(&sbi->s_freeinodes_counter,
1889 ext3_count_free_inodes(sb));
1890 }
1891 if (!err) {
1892 err = percpu_counter_init(&sbi->s_dirs_counter,
1893 ext3_count_dirs(sb));
1894 }
1895 if (err) {
4cf46b67 1896 ext3_msg(sb, KERN_ERR, "error: insufficient memory");
833f4077
PZ
1897 goto failed_mount3;
1898 }
0216bfcf 1899
1da177e4
LT
1900 /* per fileystem reservation list head & lock */
1901 spin_lock_init(&sbi->s_rsv_window_lock);
1902 sbi->s_rsv_window_root = RB_ROOT;
1903 /* Add a single, static dummy reservation to the start of the
1904 * reservation window list --- it gives us a placeholder for
1905 * append-at-start-of-list which makes the allocation logic
1906 * _much_ simpler. */
1907 sbi->s_rsv_window_head.rsv_start = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1908 sbi->s_rsv_window_head.rsv_end = EXT3_RESERVE_WINDOW_NOT_ALLOCATED;
1909 sbi->s_rsv_window_head.rsv_alloc_hit = 0;
1910 sbi->s_rsv_window_head.rsv_goal_size = 0;
1911 ext3_rsv_window_add(sb, &sbi->s_rsv_window_head);
1912
1913 /*
1914 * set up enough so that it can read an inode
1915 */
1916 sb->s_op = &ext3_sops;
1917 sb->s_export_op = &ext3_export_ops;
1918 sb->s_xattr = ext3_xattr_handlers;
1919#ifdef CONFIG_QUOTA
1920 sb->s_qcop = &ext3_qctl_operations;
1921 sb->dq_op = &ext3_quota_operations;
1922#endif
1923 INIT_LIST_HEAD(&sbi->s_orphan); /* unlinked but open files */
b8a052d0 1924 mutex_init(&sbi->s_orphan_lock);
96d2a495 1925 mutex_init(&sbi->s_resize_lock);
1da177e4
LT
1926
1927 sb->s_root = NULL;
1928
1929 needs_recovery = (es->s_last_orphan != 0 ||
1930 EXT3_HAS_INCOMPAT_FEATURE(sb,
1931 EXT3_FEATURE_INCOMPAT_RECOVER));
1932
1933 /*
1934 * The first inode we look at is the journal inode. Don't try
1935 * root first: it may be modified in the journal!
1936 */
1937 if (!test_opt(sb, NOLOAD) &&
1938 EXT3_HAS_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL)) {
71b96257 1939 if (ext3_load_journal(sb, es, journal_devnum))
0216bfcf 1940 goto failed_mount3;
1da177e4
LT
1941 } else if (journal_inum) {
1942 if (ext3_create_journal(sb, es, journal_inum))
0216bfcf 1943 goto failed_mount3;
1da177e4
LT
1944 } else {
1945 if (!silent)
4cf46b67
AF
1946 ext3_msg(sb, KERN_ERR,
1947 "error: no journal found. "
1948 "mounting ext3 over ext2?");
0216bfcf 1949 goto failed_mount3;
1da177e4
LT
1950 }
1951
1952 /* We have now updated the journal if required, so we can
1953 * validate the data journaling mode. */
1954 switch (test_opt(sb, DATA_FLAGS)) {
1955 case 0:
1956 /* No mode set, assume a default based on the journal
1957 capabilities: ORDERED_DATA if the journal can
1958 cope, else JOURNAL_DATA */
1959 if (journal_check_available_features
1960 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE))
bbae8bcc 1961 set_opt(sbi->s_mount_opt, DEFAULT_DATA_MODE);
1da177e4
LT
1962 else
1963 set_opt(sbi->s_mount_opt, JOURNAL_DATA);
1964 break;
1965
1966 case EXT3_MOUNT_ORDERED_DATA:
1967 case EXT3_MOUNT_WRITEBACK_DATA:
1968 if (!journal_check_available_features
1969 (sbi->s_journal, 0, 0, JFS_FEATURE_INCOMPAT_REVOKE)) {
4cf46b67
AF
1970 ext3_msg(sb, KERN_ERR,
1971 "error: journal does not support "
1972 "requested data journaling mode");
0216bfcf 1973 goto failed_mount4;
1da177e4
LT
1974 }
1975 default:
1976 break;
1977 }
1978
1979 if (test_opt(sb, NOBH)) {
1da177e4 1980 if (!(test_opt(sb, DATA_FLAGS) == EXT3_MOUNT_WRITEBACK_DATA)) {
4cf46b67
AF
1981 ext3_msg(sb, KERN_WARNING,
1982 "warning: ignoring nobh option - "
1983 "it is supported only with writeback mode");
1da177e4
LT
1984 clear_opt(sbi->s_mount_opt, NOBH);
1985 }
1986 }
1987 /*
1988 * The journal_load will have done any necessary log recovery,
1989 * so we can safely mount the rest of the filesystem now.
1990 */
1991
473043dc
DH
1992 root = ext3_iget(sb, EXT3_ROOT_INO);
1993 if (IS_ERR(root)) {
4cf46b67 1994 ext3_msg(sb, KERN_ERR, "error: get root inode failed");
473043dc 1995 ret = PTR_ERR(root);
0216bfcf 1996 goto failed_mount4;
1da177e4
LT
1997 }
1998 if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) {
473043dc 1999 iput(root);
4cf46b67 2000 ext3_msg(sb, KERN_ERR, "error: corrupt root inode, run e2fsck");
0216bfcf 2001 goto failed_mount4;
1da177e4 2002 }
473043dc
DH
2003 sb->s_root = d_alloc_root(root);
2004 if (!sb->s_root) {
4cf46b67 2005 ext3_msg(sb, KERN_ERR, "error: get root dentry failed");
473043dc
DH
2006 iput(root);
2007 ret = -ENOMEM;
2008 goto failed_mount4;
2009 }
1da177e4
LT
2010
2011 ext3_setup_super (sb, es, sb->s_flags & MS_RDONLY);
ed505ee4 2012
1da177e4
LT
2013 EXT3_SB(sb)->s_mount_state |= EXT3_ORPHAN_FS;
2014 ext3_orphan_cleanup(sb, es);
2015 EXT3_SB(sb)->s_mount_state &= ~EXT3_ORPHAN_FS;
2016 if (needs_recovery)
4cf46b67 2017 ext3_msg(sb, KERN_INFO, "recovery complete");
1da177e4 2018 ext3_mark_recovery_complete(sb, es);
4cf46b67 2019 ext3_msg(sb, KERN_INFO, "mounted filesystem with %s data mode",
1da177e4
LT
2020 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_JOURNAL_DATA ? "journal":
2021 test_opt(sb,DATA_FLAGS) == EXT3_MOUNT_ORDERED_DATA ? "ordered":
2022 "writeback");
2023
1da177e4
LT
2024 lock_kernel();
2025 return 0;
2026
2027cantfind_ext3:
2028 if (!silent)
4cf46b67
AF
2029 ext3_msg(sb, KERN_INFO,
2030 "error: can't find ext3 filesystem on dev %s.",
1da177e4
LT
2031 sb->s_id);
2032 goto failed_mount;
2033
0216bfcf 2034failed_mount4:
1da177e4 2035 journal_destroy(sbi->s_journal);
0216bfcf
MC
2036failed_mount3:
2037 percpu_counter_destroy(&sbi->s_freeblocks_counter);
2038 percpu_counter_destroy(&sbi->s_freeinodes_counter);
2039 percpu_counter_destroy(&sbi->s_dirs_counter);
1da177e4
LT
2040failed_mount2:
2041 for (i = 0; i < db_count; i++)
2042 brelse(sbi->s_group_desc[i]);
2043 kfree(sbi->s_group_desc);
2044failed_mount:
2045#ifdef CONFIG_QUOTA
2046 for (i = 0; i < MAXQUOTAS; i++)
2047 kfree(sbi->s_qf_names[i]);
2048#endif
2049 ext3_blkdev_remove(sbi);
2050 brelse(bh);
2051out_fail:
2052 sb->s_fs_info = NULL;
de5ce037 2053 kfree(sbi->s_blockgroup_lock);
1da177e4
LT
2054 kfree(sbi);
2055 lock_kernel();
473043dc 2056 return ret;
1da177e4
LT
2057}
2058
2059/*
2060 * Setup any per-fs journal parameters now. We'll do this both on
2061 * initial mount, once the journal has been initialised but before we've
ae6ddcc5 2062 * done any recovery; and again on any subsequent remount.
1da177e4
LT
2063 */
2064static void ext3_init_journal_params(struct super_block *sb, journal_t *journal)
2065{
2066 struct ext3_sb_info *sbi = EXT3_SB(sb);
2067
2068 if (sbi->s_commit_interval)
2069 journal->j_commit_interval = sbi->s_commit_interval;
2070 /* We could also set up an ext3-specific default for the commit
2071 * interval here, but for now we'll just fall back to the jbd
2072 * default. */
2073
2074 spin_lock(&journal->j_state_lock);
2075 if (test_opt(sb, BARRIER))
2076 journal->j_flags |= JFS_BARRIER;
2077 else
2078 journal->j_flags &= ~JFS_BARRIER;
0e4fb5e2
HK
2079 if (test_opt(sb, DATA_ERR_ABORT))
2080 journal->j_flags |= JFS_ABORT_ON_SYNCDATA_ERR;
2081 else
2082 journal->j_flags &= ~JFS_ABORT_ON_SYNCDATA_ERR;
1da177e4
LT
2083 spin_unlock(&journal->j_state_lock);
2084}
2085
eee194e7
ES
2086static journal_t *ext3_get_journal(struct super_block *sb,
2087 unsigned int journal_inum)
1da177e4
LT
2088{
2089 struct inode *journal_inode;
2090 journal_t *journal;
2091
2092 /* First, test for the existence of a valid inode on disk. Bad
2093 * things happen if we iget() an unused inode, as the subsequent
2094 * iput() will try to delete it. */
2095
473043dc
DH
2096 journal_inode = ext3_iget(sb, journal_inum);
2097 if (IS_ERR(journal_inode)) {
4cf46b67 2098 ext3_msg(sb, KERN_ERR, "error: no journal found");
1da177e4
LT
2099 return NULL;
2100 }
2101 if (!journal_inode->i_nlink) {
2102 make_bad_inode(journal_inode);
2103 iput(journal_inode);
4cf46b67 2104 ext3_msg(sb, KERN_ERR, "error: journal inode is deleted");
1da177e4
LT
2105 return NULL;
2106 }
2107
2108 jbd_debug(2, "Journal inode found at %p: %Ld bytes\n",
2109 journal_inode, journal_inode->i_size);
473043dc 2110 if (!S_ISREG(journal_inode->i_mode)) {
4cf46b67 2111 ext3_msg(sb, KERN_ERR, "error: invalid journal inode");
1da177e4
LT
2112 iput(journal_inode);
2113 return NULL;
2114 }
2115
2116 journal = journal_init_inode(journal_inode);
2117 if (!journal) {
4cf46b67 2118 ext3_msg(sb, KERN_ERR, "error: could not load journal inode");
1da177e4
LT
2119 iput(journal_inode);
2120 return NULL;
2121 }
2122 journal->j_private = sb;
2123 ext3_init_journal_params(sb, journal);
2124 return journal;
2125}
2126
2127static journal_t *ext3_get_dev_journal(struct super_block *sb,
2128 dev_t j_dev)
2129{
2130 struct buffer_head * bh;
2131 journal_t *journal;
43d23f90 2132 ext3_fsblk_t start;
1c2bf374 2133 ext3_fsblk_t len;
1da177e4 2134 int hblock, blocksize;
43d23f90 2135 ext3_fsblk_t sb_block;
1da177e4
LT
2136 unsigned long offset;
2137 struct ext3_super_block * es;
2138 struct block_device *bdev;
2139
4cf46b67 2140 bdev = ext3_blkdev_get(j_dev, sb);
1da177e4
LT
2141 if (bdev == NULL)
2142 return NULL;
2143
2144 if (bd_claim(bdev, sb)) {
4cf46b67
AF
2145 ext3_msg(sb, KERN_ERR,
2146 "error: failed to claim external journal device");
9a1c3542 2147 blkdev_put(bdev, FMODE_READ|FMODE_WRITE);
1da177e4
LT
2148 return NULL;
2149 }
2150
2151 blocksize = sb->s_blocksize;
e1defc4f 2152 hblock = bdev_logical_block_size(bdev);
1da177e4 2153 if (blocksize < hblock) {
4cf46b67
AF
2154 ext3_msg(sb, KERN_ERR,
2155 "error: blocksize too small for journal device");
1da177e4
LT
2156 goto out_bdev;
2157 }
2158
2159 sb_block = EXT3_MIN_BLOCK_SIZE / blocksize;
2160 offset = EXT3_MIN_BLOCK_SIZE % blocksize;
2161 set_blocksize(bdev, blocksize);
2162 if (!(bh = __bread(bdev, sb_block, blocksize))) {
4cf46b67
AF
2163 ext3_msg(sb, KERN_ERR, "error: couldn't read superblock of "
2164 "external journal");
1da177e4
LT
2165 goto out_bdev;
2166 }
2167
2168 es = (struct ext3_super_block *) (((char *)bh->b_data) + offset);
2169 if ((le16_to_cpu(es->s_magic) != EXT3_SUPER_MAGIC) ||
2170 !(le32_to_cpu(es->s_feature_incompat) &
2171 EXT3_FEATURE_INCOMPAT_JOURNAL_DEV)) {
4cf46b67
AF
2172 ext3_msg(sb, KERN_ERR, "error: external journal has "
2173 "bad superblock");
1da177e4
LT
2174 brelse(bh);
2175 goto out_bdev;
2176 }
2177
2178 if (memcmp(EXT3_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4cf46b67 2179 ext3_msg(sb, KERN_ERR, "error: journal UUID does not match");
1da177e4
LT
2180 brelse(bh);
2181 goto out_bdev;
2182 }
2183
2184 len = le32_to_cpu(es->s_blocks_count);
2185 start = sb_block + 1;
2186 brelse(bh); /* we're done with the superblock */
2187
2188 journal = journal_init_dev(bdev, sb->s_bdev,
2189 start, len, blocksize);
2190 if (!journal) {
4cf46b67
AF
2191 ext3_msg(sb, KERN_ERR,
2192 "error: failed to create device journal");
1da177e4
LT
2193 goto out_bdev;
2194 }
2195 journal->j_private = sb;
2196 ll_rw_block(READ, 1, &journal->j_sb_buffer);
2197 wait_on_buffer(journal->j_sb_buffer);
2198 if (!buffer_uptodate(journal->j_sb_buffer)) {
4cf46b67 2199 ext3_msg(sb, KERN_ERR, "I/O error on journal device");
1da177e4
LT
2200 goto out_journal;
2201 }
2202 if (be32_to_cpu(journal->j_superblock->s_nr_users) != 1) {
4cf46b67
AF
2203 ext3_msg(sb, KERN_ERR,
2204 "error: external journal has more than one "
2205 "user (unsupported) - %d",
1da177e4
LT
2206 be32_to_cpu(journal->j_superblock->s_nr_users));
2207 goto out_journal;
2208 }
2209 EXT3_SB(sb)->journal_bdev = bdev;
2210 ext3_init_journal_params(sb, journal);
2211 return journal;
2212out_journal:
2213 journal_destroy(journal);
2214out_bdev:
2215 ext3_blkdev_put(bdev);
2216 return NULL;
2217}
2218
71b96257
JL
2219static int ext3_load_journal(struct super_block *sb,
2220 struct ext3_super_block *es,
2221 unsigned long journal_devnum)
1da177e4
LT
2222{
2223 journal_t *journal;
eee194e7 2224 unsigned int journal_inum = le32_to_cpu(es->s_journal_inum);
71b96257 2225 dev_t journal_dev;
1da177e4
LT
2226 int err = 0;
2227 int really_read_only;
2228
71b96257
JL
2229 if (journal_devnum &&
2230 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
4cf46b67
AF
2231 ext3_msg(sb, KERN_INFO, "external journal device major/minor "
2232 "numbers have changed");
71b96257
JL
2233 journal_dev = new_decode_dev(journal_devnum);
2234 } else
2235 journal_dev = new_decode_dev(le32_to_cpu(es->s_journal_dev));
2236
1da177e4
LT
2237 really_read_only = bdev_read_only(sb->s_bdev);
2238
2239 /*
2240 * Are we loading a blank journal or performing recovery after a
2241 * crash? For recovery, we need to check in advance whether we
2242 * can get read-write access to the device.
2243 */
2244
2245 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER)) {
2246 if (sb->s_flags & MS_RDONLY) {
4cf46b67
AF
2247 ext3_msg(sb, KERN_INFO,
2248 "recovery required on readonly filesystem");
1da177e4 2249 if (really_read_only) {
4cf46b67
AF
2250 ext3_msg(sb, KERN_ERR, "error: write access "
2251 "unavailable, cannot proceed");
1da177e4
LT
2252 return -EROFS;
2253 }
4cf46b67
AF
2254 ext3_msg(sb, KERN_INFO,
2255 "write access will be enabled during recovery");
1da177e4
LT
2256 }
2257 }
2258
2259 if (journal_inum && journal_dev) {
4cf46b67
AF
2260 ext3_msg(sb, KERN_ERR, "error: filesystem has both journal "
2261 "and inode journals");
1da177e4
LT
2262 return -EINVAL;
2263 }
2264
2265 if (journal_inum) {
2266 if (!(journal = ext3_get_journal(sb, journal_inum)))
2267 return -EINVAL;
2268 } else {
2269 if (!(journal = ext3_get_dev_journal(sb, journal_dev)))
2270 return -EINVAL;
2271 }
2272
2273 if (!really_read_only && test_opt(sb, UPDATE_JOURNAL)) {
2274 err = journal_update_format(journal);
2275 if (err) {
4cf46b67 2276 ext3_msg(sb, KERN_ERR, "error updating journal");
1da177e4
LT
2277 journal_destroy(journal);
2278 return err;
2279 }
2280 }
2281
2282 if (!EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER))
2283 err = journal_wipe(journal, !really_read_only);
2284 if (!err)
2285 err = journal_load(journal);
2286
2287 if (err) {
4cf46b67 2288 ext3_msg(sb, KERN_ERR, "error loading journal");
1da177e4
LT
2289 journal_destroy(journal);
2290 return err;
2291 }
2292
2293 EXT3_SB(sb)->s_journal = journal;
2294 ext3_clear_journal_err(sb, es);
71b96257
JL
2295
2296 if (journal_devnum &&
2297 journal_devnum != le32_to_cpu(es->s_journal_dev)) {
2298 es->s_journal_dev = cpu_to_le32(journal_devnum);
71b96257
JL
2299
2300 /* Make sure we flush the recovery flag to disk. */
2301 ext3_commit_super(sb, es, 1);
2302 }
2303
1da177e4
LT
2304 return 0;
2305}
2306
4cf46b67
AF
2307static int ext3_create_journal(struct super_block *sb,
2308 struct ext3_super_block *es,
eee194e7 2309 unsigned int journal_inum)
1da177e4
LT
2310{
2311 journal_t *journal;
952d9de1 2312 int err;
1da177e4
LT
2313
2314 if (sb->s_flags & MS_RDONLY) {
4cf46b67
AF
2315 ext3_msg(sb, KERN_ERR,
2316 "error: readonly filesystem when trying to "
2317 "create journal");
1da177e4
LT
2318 return -EROFS;
2319 }
2320
952d9de1
BP
2321 journal = ext3_get_journal(sb, journal_inum);
2322 if (!journal)
1da177e4
LT
2323 return -EINVAL;
2324
4cf46b67 2325 ext3_msg(sb, KERN_INFO, "creating new journal on inode %u",
1da177e4
LT
2326 journal_inum);
2327
952d9de1
BP
2328 err = journal_create(journal);
2329 if (err) {
4cf46b67 2330 ext3_msg(sb, KERN_ERR, "error creating journal");
1da177e4
LT
2331 journal_destroy(journal);
2332 return -EIO;
2333 }
2334
2335 EXT3_SB(sb)->s_journal = journal;
2336
2337 ext3_update_dynamic_rev(sb);
2338 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2339 EXT3_SET_COMPAT_FEATURE(sb, EXT3_FEATURE_COMPAT_HAS_JOURNAL);
2340
2341 es->s_journal_inum = cpu_to_le32(journal_inum);
1da177e4
LT
2342
2343 /* Make sure we flush the recovery flag to disk. */
2344 ext3_commit_super(sb, es, 1);
2345
2346 return 0;
2347}
2348
c4be0c1d
TS
2349static int ext3_commit_super(struct super_block *sb,
2350 struct ext3_super_block *es,
1da177e4
LT
2351 int sync)
2352{
2353 struct buffer_head *sbh = EXT3_SB(sb)->s_sbh;
c4be0c1d 2354 int error = 0;
1da177e4
LT
2355
2356 if (!sbh)
c4be0c1d 2357 return error;
96ec2e0a
TT
2358 /*
2359 * If the file system is mounted read-only, don't update the
2360 * superblock write time. This avoids updating the superblock
2361 * write time when we are mounting the root file system
2362 * read/only but we need to replay the journal; at that point,
2363 * for people who are east of GMT and who make their clock
2364 * tick in localtime for Windows bug-for-bug compatibility,
2365 * the clock is set in the future, and this will cause e2fsck
2366 * to complain and force a full file system check.
2367 */
2368 if (!(sb->s_flags & MS_RDONLY))
2369 es->s_wtime = cpu_to_le32(get_seconds());
1da177e4
LT
2370 es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
2371 es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
2372 BUFFER_TRACE(sbh, "marking dirty");
2373 mark_buffer_dirty(sbh);
2374 if (sync)
c4be0c1d
TS
2375 error = sync_dirty_buffer(sbh);
2376 return error;
1da177e4
LT
2377}
2378
2379
2380/*
2381 * Have we just finished recovery? If so, and if we are mounting (or
2382 * remounting) the filesystem readonly, then we will end up with a
2383 * consistent fs on disk. Record that fact.
2384 */
2385static void ext3_mark_recovery_complete(struct super_block * sb,
2386 struct ext3_super_block * es)
2387{
2388 journal_t *journal = EXT3_SB(sb)->s_journal;
2389
2390 journal_lock_updates(journal);
2d7c820e
HK
2391 if (journal_flush(journal) < 0)
2392 goto out;
2393
1da177e4
LT
2394 if (EXT3_HAS_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER) &&
2395 sb->s_flags & MS_RDONLY) {
2396 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
1da177e4
LT
2397 ext3_commit_super(sb, es, 1);
2398 }
2d7c820e
HK
2399
2400out:
1da177e4
LT
2401 journal_unlock_updates(journal);
2402}
2403
2404/*
2405 * If we are mounting (or read-write remounting) a filesystem whose journal
2406 * has recorded an error from a previous lifetime, move that error to the
2407 * main filesystem now.
2408 */
4cf46b67
AF
2409static void ext3_clear_journal_err(struct super_block *sb,
2410 struct ext3_super_block *es)
1da177e4
LT
2411{
2412 journal_t *journal;
2413 int j_errno;
2414 const char *errstr;
2415
2416 journal = EXT3_SB(sb)->s_journal;
2417
2418 /*
2419 * Now check for any error status which may have been recorded in the
2420 * journal by a prior ext3_error() or ext3_abort()
2421 */
2422
2423 j_errno = journal_errno(journal);
2424 if (j_errno) {
2425 char nbuf[16];
2426
2427 errstr = ext3_decode_error(sb, j_errno, nbuf);
e05b6b52 2428 ext3_warning(sb, __func__, "Filesystem error recorded "
1da177e4 2429 "from previous mount: %s", errstr);
e05b6b52 2430 ext3_warning(sb, __func__, "Marking fs in need of "
1da177e4
LT
2431 "filesystem check.");
2432
2433 EXT3_SB(sb)->s_mount_state |= EXT3_ERROR_FS;
2434 es->s_state |= cpu_to_le16(EXT3_ERROR_FS);
2435 ext3_commit_super (sb, es, 1);
2436
2437 journal_clear_err(journal);
2438 }
2439}
2440
2441/*
2442 * Force the running and committing transactions to commit,
2443 * and wait on the commit.
2444 */
2445int ext3_force_commit(struct super_block *sb)
2446{
2447 journal_t *journal;
2448 int ret;
2449
2450 if (sb->s_flags & MS_RDONLY)
2451 return 0;
2452
2453 journal = EXT3_SB(sb)->s_journal;
1da177e4
LT
2454 ret = ext3_journal_force_commit(journal);
2455 return ret;
2456}
2457
1da177e4
LT
2458static int ext3_sync_fs(struct super_block *sb, int wait)
2459{
02ac597c 2460 tid_t target;
c87591b7 2461
02ac597c
JK
2462 if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
2463 if (wait)
2464 log_wait_commit(EXT3_SB(sb)->s_journal, target);
2465 }
1da177e4
LT
2466 return 0;
2467}
2468
2469/*
2470 * LVM calls this function before a (read-only) snapshot is created. This
2471 * gives us a chance to flush the journal completely and mark the fs clean.
2472 */
c4be0c1d 2473static int ext3_freeze(struct super_block *sb)
1da177e4 2474{
c4be0c1d
TS
2475 int error = 0;
2476 journal_t *journal;
1da177e4
LT
2477
2478 if (!(sb->s_flags & MS_RDONLY)) {
c4be0c1d 2479 journal = EXT3_SB(sb)->s_journal;
1da177e4
LT
2480
2481 /* Now we set up the journal barrier. */
2482 journal_lock_updates(journal);
2d7c820e
HK
2483
2484 /*
2485 * We don't want to clear needs_recovery flag when we failed
2486 * to flush the journal.
2487 */
c4be0c1d
TS
2488 error = journal_flush(journal);
2489 if (error < 0)
2490 goto out;
1da177e4
LT
2491
2492 /* Journal blocked and flushed, clear needs_recovery flag. */
2493 EXT3_CLEAR_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
c4be0c1d
TS
2494 error = ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2495 if (error)
2496 goto out;
1da177e4 2497 }
c4be0c1d
TS
2498 return 0;
2499
2500out:
2501 journal_unlock_updates(journal);
2502 return error;
1da177e4
LT
2503}
2504
2505/*
2506 * Called by LVM after the snapshot is done. We need to reset the RECOVER
2507 * flag here, even though the filesystem is not technically dirty yet.
2508 */
c4be0c1d 2509static int ext3_unfreeze(struct super_block *sb)
1da177e4
LT
2510{
2511 if (!(sb->s_flags & MS_RDONLY)) {
2512 lock_super(sb);
2513 /* Reser the needs_recovery flag before the fs is unlocked. */
2514 EXT3_SET_INCOMPAT_FEATURE(sb, EXT3_FEATURE_INCOMPAT_RECOVER);
2515 ext3_commit_super(sb, EXT3_SB(sb)->s_es, 1);
2516 unlock_super(sb);
2517 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2518 }
c4be0c1d 2519 return 0;
1da177e4
LT
2520}
2521
2522static int ext3_remount (struct super_block * sb, int * flags, char * data)
2523{
2524 struct ext3_super_block * es;
2525 struct ext3_sb_info *sbi = EXT3_SB(sb);
43d23f90 2526 ext3_fsblk_t n_blocks_count = 0;
08c6a96f
JK
2527 unsigned long old_sb_flags;
2528 struct ext3_mount_options old_opts;
2529 int err;
2530#ifdef CONFIG_QUOTA
2531 int i;
2532#endif
2533
337eb00a
AIB
2534 lock_kernel();
2535
08c6a96f 2536 /* Store the original options */
bbd6851a 2537 lock_super(sb);
08c6a96f
JK
2538 old_sb_flags = sb->s_flags;
2539 old_opts.s_mount_opt = sbi->s_mount_opt;
2540 old_opts.s_resuid = sbi->s_resuid;
2541 old_opts.s_resgid = sbi->s_resgid;
2542 old_opts.s_commit_interval = sbi->s_commit_interval;
2543#ifdef CONFIG_QUOTA
2544 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
2545 for (i = 0; i < MAXQUOTAS; i++)
2546 old_opts.s_qf_names[i] = sbi->s_qf_names[i];
2547#endif
1da177e4
LT
2548
2549 /*
2550 * Allow the "check" option to be passed as a remount option.
2551 */
71b96257 2552 if (!parse_options(data, sb, NULL, NULL, &n_blocks_count, 1)) {
08c6a96f
JK
2553 err = -EINVAL;
2554 goto restore_opts;
2555 }
1da177e4 2556
e3c96435 2557 if (test_opt(sb, ABORT))
e05b6b52 2558 ext3_abort(sb, __func__, "Abort forced by user");
1da177e4
LT
2559
2560 sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
e3c96435 2561 (test_opt(sb, POSIX_ACL) ? MS_POSIXACL : 0);
1da177e4
LT
2562
2563 es = sbi->s_es;
2564
2565 ext3_init_journal_params(sb, sbi->s_journal);
2566
2567 if ((*flags & MS_RDONLY) != (sb->s_flags & MS_RDONLY) ||
2568 n_blocks_count > le32_to_cpu(es->s_blocks_count)) {
e3c96435 2569 if (test_opt(sb, ABORT)) {
08c6a96f
JK
2570 err = -EROFS;
2571 goto restore_opts;
2572 }
1da177e4
LT
2573
2574 if (*flags & MS_RDONLY) {
2575 /*
2576 * First of all, the unconditional stuff we have to do
2577 * to disable replay of the journal when we next remount
2578 */
2579 sb->s_flags |= MS_RDONLY;
2580
2581 /*
2582 * OK, test if we are remounting a valid rw partition
2583 * readonly, and if so set the rdonly flag and then
2584 * mark the partition as valid again.
2585 */
2586 if (!(es->s_state & cpu_to_le16(EXT3_VALID_FS)) &&
2587 (sbi->s_mount_state & EXT3_VALID_FS))
2588 es->s_state = cpu_to_le16(sbi->s_mount_state);
2589
2590 ext3_mark_recovery_complete(sb, es);
2591 } else {
2592 __le32 ret;
2593 if ((ret = EXT3_HAS_RO_COMPAT_FEATURE(sb,
2594 ~EXT3_FEATURE_RO_COMPAT_SUPP))) {
4cf46b67
AF
2595 ext3_msg(sb, KERN_WARNING,
2596 "warning: couldn't remount RDWR "
2597 "because of unsupported optional "
2598 "features (%x)", le32_to_cpu(ret));
08c6a96f
JK
2599 err = -EROFS;
2600 goto restore_opts;
1da177e4 2601 }
ea9a05a1
ES
2602
2603 /*
2604 * If we have an unprocessed orphan list hanging
2605 * around from a previously readonly bdev mount,
2606 * require a full umount/remount for now.
2607 */
2608 if (es->s_last_orphan) {
4cf46b67 2609 ext3_msg(sb, KERN_WARNING, "warning: couldn't "
ea9a05a1
ES
2610 "remount RDWR because of unprocessed "
2611 "orphan inode list. Please "
4cf46b67 2612 "umount/remount instead.");
ea9a05a1
ES
2613 err = -EINVAL;
2614 goto restore_opts;
2615 }
2616
1da177e4
LT
2617 /*
2618 * Mounting a RDONLY partition read-write, so reread
2619 * and store the current valid flag. (It may have
2620 * been changed by e2fsck since we originally mounted
2621 * the partition.)
2622 */
2623 ext3_clear_journal_err(sb, es);
2624 sbi->s_mount_state = le16_to_cpu(es->s_state);
a4e4de36 2625 if ((err = ext3_group_extend(sb, es, n_blocks_count)))
08c6a96f 2626 goto restore_opts;
1da177e4
LT
2627 if (!ext3_setup_super (sb, es, 0))
2628 sb->s_flags &= ~MS_RDONLY;
2629 }
2630 }
08c6a96f
JK
2631#ifdef CONFIG_QUOTA
2632 /* Release old quota file names */
2633 for (i = 0; i < MAXQUOTAS; i++)
2634 if (old_opts.s_qf_names[i] &&
2635 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2636 kfree(old_opts.s_qf_names[i]);
2637#endif
bbd6851a 2638 unlock_super(sb);
337eb00a 2639 unlock_kernel();
1da177e4 2640 return 0;
08c6a96f
JK
2641restore_opts:
2642 sb->s_flags = old_sb_flags;
2643 sbi->s_mount_opt = old_opts.s_mount_opt;
2644 sbi->s_resuid = old_opts.s_resuid;
2645 sbi->s_resgid = old_opts.s_resgid;
2646 sbi->s_commit_interval = old_opts.s_commit_interval;
2647#ifdef CONFIG_QUOTA
2648 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
2649 for (i = 0; i < MAXQUOTAS; i++) {
2650 if (sbi->s_qf_names[i] &&
2651 old_opts.s_qf_names[i] != sbi->s_qf_names[i])
2652 kfree(sbi->s_qf_names[i]);
2653 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
2654 }
2655#endif
bbd6851a 2656 unlock_super(sb);
337eb00a 2657 unlock_kernel();
08c6a96f 2658 return err;
1da177e4
LT
2659}
2660
726c3342 2661static int ext3_statfs (struct dentry * dentry, struct kstatfs * buf)
1da177e4 2662{
726c3342 2663 struct super_block *sb = dentry->d_sb;
09fe316a
AT
2664 struct ext3_sb_info *sbi = EXT3_SB(sb);
2665 struct ext3_super_block *es = sbi->s_es;
50ee0a32 2666 u64 fsid;
1da177e4 2667
a71ce8c6
BP
2668 if (test_opt(sb, MINIX_DF)) {
2669 sbi->s_overhead_last = 0;
2670 } else if (sbi->s_blocks_last != le32_to_cpu(es->s_blocks_count)) {
2671 unsigned long ngroups = sbi->s_groups_count, i;
2672 ext3_fsblk_t overhead = 0;
1da177e4
LT
2673 smp_rmb();
2674
2675 /*
a71ce8c6
BP
2676 * Compute the overhead (FS structures). This is constant
2677 * for a given filesystem unless the number of block groups
2678 * changes so we cache the previous value until it does.
1da177e4
LT
2679 */
2680
2681 /*
2682 * All of the blocks before first_data_block are
2683 * overhead
2684 */
2685 overhead = le32_to_cpu(es->s_first_data_block);
2686
2687 /*
2688 * Add the overhead attributed to the superblock and
2689 * block group descriptors. If the sparse superblocks
2690 * feature is turned on, then not all groups have this.
2691 */
2692 for (i = 0; i < ngroups; i++) {
2693 overhead += ext3_bg_has_super(sb, i) +
2694 ext3_bg_num_gdb(sb, i);
2695 cond_resched();
2696 }
2697
2698 /*
2699 * Every block group has an inode bitmap, a block
2700 * bitmap, and an inode table.
2701 */
a71ce8c6
BP
2702 overhead += ngroups * (2 + sbi->s_itb_per_group);
2703 sbi->s_overhead_last = overhead;
2704 smp_wmb();
2705 sbi->s_blocks_last = le32_to_cpu(es->s_blocks_count);
1da177e4
LT
2706 }
2707
2708 buf->f_type = EXT3_SUPER_MAGIC;
2709 buf->f_bsize = sb->s_blocksize;
a71ce8c6 2710 buf->f_blocks = le32_to_cpu(es->s_blocks_count) - sbi->s_overhead_last;
52d9f3b4 2711 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter);
1da177e4
LT
2712 buf->f_bavail = buf->f_bfree - le32_to_cpu(es->s_r_blocks_count);
2713 if (buf->f_bfree < le32_to_cpu(es->s_r_blocks_count))
2714 buf->f_bavail = 0;
2715 buf->f_files = le32_to_cpu(es->s_inodes_count);
52d9f3b4 2716 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
1da177e4 2717 buf->f_namelen = EXT3_NAME_LEN;
50ee0a32
PE
2718 fsid = le64_to_cpup((void *)es->s_uuid) ^
2719 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
2720 buf->f_fsid.val[0] = fsid & 0xFFFFFFFFUL;
2721 buf->f_fsid.val[1] = (fsid >> 32) & 0xFFFFFFFFUL;
1da177e4
LT
2722 return 0;
2723}
2724
2725/* Helper function for writing quotas on sync - we need to start transaction before quota file
2726 * is locked for write. Otherwise the are possible deadlocks:
2727 * Process 1 Process 2
2728 * ext3_create() quota_sync()
2729 * journal_start() write_dquot()
81a05227 2730 * vfs_dq_init() down(dqio_mutex)
d3be915f 2731 * down(dqio_mutex) journal_start()
1da177e4
LT
2732 *
2733 */
2734
2735#ifdef CONFIG_QUOTA
2736
2737static inline struct inode *dquot_to_inode(struct dquot *dquot)
2738{
2739 return sb_dqopt(dquot->dq_sb)->files[dquot->dq_type];
2740}
2741
1da177e4
LT
2742static int ext3_write_dquot(struct dquot *dquot)
2743{
2744 int ret, err;
2745 handle_t *handle;
2746 struct inode *inode;
2747
2748 inode = dquot_to_inode(dquot);
2749 handle = ext3_journal_start(inode,
1f54587b 2750 EXT3_QUOTA_TRANS_BLOCKS(dquot->dq_sb));
1da177e4
LT
2751 if (IS_ERR(handle))
2752 return PTR_ERR(handle);
2753 ret = dquot_commit(dquot);
2754 err = ext3_journal_stop(handle);
2755 if (!ret)
2756 ret = err;
2757 return ret;
2758}
2759
2760static int ext3_acquire_dquot(struct dquot *dquot)
2761{
2762 int ret, err;
2763 handle_t *handle;
2764
2765 handle = ext3_journal_start(dquot_to_inode(dquot),
1f54587b 2766 EXT3_QUOTA_INIT_BLOCKS(dquot->dq_sb));
1da177e4
LT
2767 if (IS_ERR(handle))
2768 return PTR_ERR(handle);
2769 ret = dquot_acquire(dquot);
2770 err = ext3_journal_stop(handle);
2771 if (!ret)
2772 ret = err;
2773 return ret;
2774}
2775
2776static int ext3_release_dquot(struct dquot *dquot)
2777{
2778 int ret, err;
2779 handle_t *handle;
2780
2781 handle = ext3_journal_start(dquot_to_inode(dquot),
1f54587b 2782 EXT3_QUOTA_DEL_BLOCKS(dquot->dq_sb));
9c3013e9
JK
2783 if (IS_ERR(handle)) {
2784 /* Release dquot anyway to avoid endless cycle in dqput() */
2785 dquot_release(dquot);
1da177e4 2786 return PTR_ERR(handle);
9c3013e9 2787 }
1da177e4
LT
2788 ret = dquot_release(dquot);
2789 err = ext3_journal_stop(handle);
2790 if (!ret)
2791 ret = err;
2792 return ret;
2793}
2794
2795static int ext3_mark_dquot_dirty(struct dquot *dquot)
2796{
99aeaf63 2797 /* Are we journaling quotas? */
1da177e4
LT
2798 if (EXT3_SB(dquot->dq_sb)->s_qf_names[USRQUOTA] ||
2799 EXT3_SB(dquot->dq_sb)->s_qf_names[GRPQUOTA]) {
2800 dquot_mark_dquot_dirty(dquot);
2801 return ext3_write_dquot(dquot);
2802 } else {
2803 return dquot_mark_dquot_dirty(dquot);
2804 }
2805}
2806
2807static int ext3_write_info(struct super_block *sb, int type)
2808{
2809 int ret, err;
2810 handle_t *handle;
2811
2812 /* Data block + inode block */
2813 handle = ext3_journal_start(sb->s_root->d_inode, 2);
2814 if (IS_ERR(handle))
2815 return PTR_ERR(handle);
2816 ret = dquot_commit_info(sb, type);
2817 err = ext3_journal_stop(handle);
2818 if (!ret)
2819 ret = err;
2820 return ret;
2821}
2822
2823/*
2824 * Turn on quotas during mount time - we need to find
2825 * the quota file and such...
2826 */
2827static int ext3_quota_on_mount(struct super_block *sb, int type)
2828{
84de856e
CH
2829 return vfs_quota_on_mount(sb, EXT3_SB(sb)->s_qf_names[type],
2830 EXT3_SB(sb)->s_jquota_fmt, type);
1da177e4
LT
2831}
2832
2833/*
2834 * Standard function to be called on quota_on
2835 */
2836static int ext3_quota_on(struct super_block *sb, int type, int format_id,
8264613d 2837 char *name, int remount)
1da177e4
LT
2838{
2839 int err;
8264613d 2840 struct path path;
1da177e4 2841
1f54587b
JK
2842 if (!test_opt(sb, QUOTA))
2843 return -EINVAL;
8264613d 2844 /* When remounting, no checks are needed and in fact, name is NULL */
9cfe7b90 2845 if (remount)
8264613d 2846 return vfs_quota_on(sb, type, format_id, name, remount);
9cfe7b90 2847
8264613d 2848 err = kern_path(name, LOOKUP_FOLLOW, &path);
1da177e4
LT
2849 if (err)
2850 return err;
9cfe7b90 2851
1da177e4 2852 /* Quotafile not on the same filesystem? */
8264613d
AV
2853 if (path.mnt->mnt_sb != sb) {
2854 path_put(&path);
1da177e4
LT
2855 return -EXDEV;
2856 }
9cfe7b90
JK
2857 /* Journaling quota? */
2858 if (EXT3_SB(sb)->s_qf_names[type]) {
2859 /* Quotafile not of fs root? */
8264613d 2860 if (path.dentry->d_parent != sb->s_root)
4cf46b67
AF
2861 ext3_msg(sb, KERN_WARNING,
2862 "warning: Quota file not on filesystem root. "
2863 "Journaled quota will not work.");
9cfe7b90
JK
2864 }
2865
2866 /*
2867 * When we journal data on quota file, we have to flush journal to see
2868 * all updates to the file when we bypass pagecache...
2869 */
8264613d 2870 if (ext3_should_journal_data(path.dentry->d_inode)) {
9cfe7b90
JK
2871 /*
2872 * We don't need to lock updates but journal_flush() could
2873 * otherwise be livelocked...
2874 */
2875 journal_lock_updates(EXT3_SB(sb)->s_journal);
2d7c820e 2876 err = journal_flush(EXT3_SB(sb)->s_journal);
9cfe7b90 2877 journal_unlock_updates(EXT3_SB(sb)->s_journal);
2d7c820e 2878 if (err) {
12e1ec9f 2879 path_put(&path);
2d7c820e
HK
2880 return err;
2881 }
9cfe7b90
JK
2882 }
2883
8264613d
AV
2884 err = vfs_quota_on_path(sb, type, format_id, &path);
2885 path_put(&path);
77e69dac 2886 return err;
1da177e4
LT
2887}
2888
2889/* Read data from quotafile - avoid pagecache and such because we cannot afford
2890 * acquiring the locks... As quota files are never truncated and quota code
2891 * itself serializes the operations (and noone else should touch the files)
2892 * we don't have to be afraid of races */
2893static ssize_t ext3_quota_read(struct super_block *sb, int type, char *data,
2894 size_t len, loff_t off)
2895{
2896 struct inode *inode = sb_dqopt(sb)->files[type];
2897 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2898 int err = 0;
2899 int offset = off & (sb->s_blocksize - 1);
2900 int tocopy;
2901 size_t toread;
2902 struct buffer_head *bh;
2903 loff_t i_size = i_size_read(inode);
2904
2905 if (off > i_size)
2906 return 0;
2907 if (off+len > i_size)
2908 len = i_size-off;
2909 toread = len;
2910 while (toread > 0) {
2911 tocopy = sb->s_blocksize - offset < toread ?
2912 sb->s_blocksize - offset : toread;
2913 bh = ext3_bread(NULL, inode, blk, 0, &err);
2914 if (err)
2915 return err;
2916 if (!bh) /* A hole? */
2917 memset(data, 0, tocopy);
2918 else
2919 memcpy(data, bh->b_data+offset, tocopy);
2920 brelse(bh);
2921 offset = 0;
2922 toread -= tocopy;
2923 data += tocopy;
2924 blk++;
2925 }
2926 return len;
2927}
2928
2929/* Write to quotafile (we know the transaction is already started and has
2930 * enough credits) */
2931static ssize_t ext3_quota_write(struct super_block *sb, int type,
2932 const char *data, size_t len, loff_t off)
2933{
2934 struct inode *inode = sb_dqopt(sb)->files[type];
2935 sector_t blk = off >> EXT3_BLOCK_SIZE_BITS(sb);
2936 int err = 0;
2937 int offset = off & (sb->s_blocksize - 1);
2938 int tocopy;
2939 int journal_quota = EXT3_SB(sb)->s_qf_names[type] != NULL;
2940 size_t towrite = len;
2941 struct buffer_head *bh;
2942 handle_t *handle = journal_current_handle();
2943
9c3013e9 2944 if (!handle) {
4cf46b67
AF
2945 ext3_msg(sb, KERN_WARNING,
2946 "warning: quota write (off=%llu, len=%llu)"
2947 " cancelled because transaction is not started.",
9c3013e9
JK
2948 (unsigned long long)off, (unsigned long long)len);
2949 return -EIO;
2950 }
5c81a419 2951 mutex_lock_nested(&inode->i_mutex, I_MUTEX_QUOTA);
1da177e4
LT
2952 while (towrite > 0) {
2953 tocopy = sb->s_blocksize - offset < towrite ?
2954 sb->s_blocksize - offset : towrite;
2955 bh = ext3_bread(handle, inode, blk, 1, &err);
2956 if (!bh)
2957 goto out;
2958 if (journal_quota) {
2959 err = ext3_journal_get_write_access(handle, bh);
2960 if (err) {
2961 brelse(bh);
2962 goto out;
2963 }
2964 }
2965 lock_buffer(bh);
2966 memcpy(bh->b_data+offset, data, tocopy);
2967 flush_dcache_page(bh->b_page);
2968 unlock_buffer(bh);
2969 if (journal_quota)
2970 err = ext3_journal_dirty_metadata(handle, bh);
2971 else {
2972 /* Always do at least ordered writes for quotas */
2973 err = ext3_journal_dirty_data(handle, bh);
2974 mark_buffer_dirty(bh);
2975 }
2976 brelse(bh);
2977 if (err)
2978 goto out;
2979 offset = 0;
2980 towrite -= tocopy;
2981 data += tocopy;
2982 blk++;
2983 }
2984out:
f5c8f7da
JK
2985 if (len == towrite) {
2986 mutex_unlock(&inode->i_mutex);
1da177e4 2987 return err;
f5c8f7da 2988 }
1da177e4
LT
2989 if (inode->i_size < off+len-towrite) {
2990 i_size_write(inode, off+len-towrite);
2991 EXT3_I(inode)->i_disksize = inode->i_size;
2992 }
2993 inode->i_version++;
2994 inode->i_mtime = inode->i_ctime = CURRENT_TIME;
2995 ext3_mark_inode_dirty(handle, inode);
1b1dcc1b 2996 mutex_unlock(&inode->i_mutex);
1da177e4
LT
2997 return len - towrite;
2998}
2999
3000#endif
3001
454e2398
DH
3002static int ext3_get_sb(struct file_system_type *fs_type,
3003 int flags, const char *dev_name, void *data, struct vfsmount *mnt)
1da177e4 3004{
454e2398 3005 return get_sb_bdev(fs_type, flags, dev_name, data, ext3_fill_super, mnt);
1da177e4
LT
3006}
3007
3008static struct file_system_type ext3_fs_type = {
3009 .owner = THIS_MODULE,
3010 .name = "ext3",
3011 .get_sb = ext3_get_sb,
3012 .kill_sb = kill_block_super,
3013 .fs_flags = FS_REQUIRES_DEV,
3014};
3015
3016static int __init init_ext3_fs(void)
3017{
3018 int err = init_ext3_xattr();
3019 if (err)
3020 return err;
3021 err = init_inodecache();
3022 if (err)
3023 goto out1;
3024 err = register_filesystem(&ext3_fs_type);
3025 if (err)
3026 goto out;
3027 return 0;
3028out:
3029 destroy_inodecache();
3030out1:
e9ad5620 3031 exit_ext3_xattr();
1da177e4
LT
3032 return err;
3033}
3034
3035static void __exit exit_ext3_fs(void)
3036{
3037 unregister_filesystem(&ext3_fs_type);
3038 destroy_inodecache();
3039 exit_ext3_xattr();
3040}
3041
3042MODULE_AUTHOR("Remy Card, Stephen Tweedie, Andrew Morton, Andreas Dilger, Theodore Ts'o and others");
3043MODULE_DESCRIPTION("Second Extended Filesystem with journaling extensions");
3044MODULE_LICENSE("GPL");
3045module_init(init_ext3_fs)
3046module_exit(exit_ext3_fs)