]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/logfs/super.c
Fix double-free in logfs
[net-next-2.6.git] / fs / logfs / super.c
CommitLineData
5db53f3e
JE
1/*
2 * fs/logfs/super.c
3 *
4 * As should be obvious for Linux kernel code, license is GPLv2
5 *
6 * Copyright (c) 2005-2008 Joern Engel <joern@logfs.org>
7 *
8 * Generally contains mount/umount code and also serves as a dump area for
9 * any functions that don't fit elsewhere and neither justify a file of their
10 * own.
11 */
12#include "logfs.h"
13#include <linux/bio.h>
5a0e3ad6 14#include <linux/slab.h>
b8639077 15#include <linux/blkdev.h>
5db53f3e
JE
16#include <linux/mtd/mtd.h>
17#include <linux/statfs.h>
18#include <linux/buffer_head.h>
19
20static DEFINE_MUTEX(emergency_mutex);
21static struct page *emergency_page;
22
23struct page *emergency_read_begin(struct address_space *mapping, pgoff_t index)
24{
25 filler_t *filler = (filler_t *)mapping->a_ops->readpage;
26 struct page *page;
27 int err;
28
29 page = read_cache_page(mapping, index, filler, NULL);
30 if (page)
31 return page;
32
33 /* No more pages available, switch to emergency page */
34 printk(KERN_INFO"Logfs: Using emergency page\n");
35 mutex_lock(&emergency_mutex);
36 err = filler(NULL, emergency_page);
37 if (err) {
38 mutex_unlock(&emergency_mutex);
39 printk(KERN_EMERG"Logfs: Error reading emergency page\n");
40 return ERR_PTR(err);
41 }
42 return emergency_page;
43}
44
45void emergency_read_end(struct page *page)
46{
47 if (page == emergency_page)
48 mutex_unlock(&emergency_mutex);
49 else
50 page_cache_release(page);
51}
52
53static void dump_segfile(struct super_block *sb)
54{
55 struct logfs_super *super = logfs_super(sb);
56 struct logfs_segment_entry se;
57 u32 segno;
58
59 for (segno = 0; segno < super->s_no_segs; segno++) {
60 logfs_get_segment_entry(sb, segno, &se);
61 printk("%3x: %6x %8x", segno, be32_to_cpu(se.ec_level),
62 be32_to_cpu(se.valid));
63 if (++segno < super->s_no_segs) {
64 logfs_get_segment_entry(sb, segno, &se);
65 printk(" %6x %8x", be32_to_cpu(se.ec_level),
66 be32_to_cpu(se.valid));
67 }
68 if (++segno < super->s_no_segs) {
69 logfs_get_segment_entry(sb, segno, &se);
70 printk(" %6x %8x", be32_to_cpu(se.ec_level),
71 be32_to_cpu(se.valid));
72 }
73 if (++segno < super->s_no_segs) {
74 logfs_get_segment_entry(sb, segno, &se);
75 printk(" %6x %8x", be32_to_cpu(se.ec_level),
76 be32_to_cpu(se.valid));
77 }
78 printk("\n");
79 }
80}
81
82/*
83 * logfs_crash_dump - dump debug information to device
84 *
85 * The LogFS superblock only occupies part of a segment. This function will
86 * write as much debug information as it can gather into the spare space.
87 */
88void logfs_crash_dump(struct super_block *sb)
89{
90 dump_segfile(sb);
91}
92
93/*
94 * TODO: move to lib/string.c
95 */
96/**
97 * memchr_inv - Find a character in an area of memory.
98 * @s: The memory area
99 * @c: The byte to search for
100 * @n: The size of the area.
101 *
102 * returns the address of the first character other than @c, or %NULL
103 * if the whole buffer contains just @c.
104 */
105void *memchr_inv(const void *s, int c, size_t n)
106{
107 const unsigned char *p = s;
108 while (n-- != 0)
109 if ((unsigned char)c != *p++)
110 return (void *)(p - 1);
111
112 return NULL;
113}
114
115/*
116 * FIXME: There should be a reserve for root, similar to ext2.
117 */
118int logfs_statfs(struct dentry *dentry, struct kstatfs *stats)
119{
120 struct super_block *sb = dentry->d_sb;
121 struct logfs_super *super = logfs_super(sb);
122
123 stats->f_type = LOGFS_MAGIC_U32;
124 stats->f_bsize = sb->s_blocksize;
125 stats->f_blocks = super->s_size >> LOGFS_BLOCK_BITS >> 3;
126 stats->f_bfree = super->s_free_bytes >> sb->s_blocksize_bits;
127 stats->f_bavail = super->s_free_bytes >> sb->s_blocksize_bits;
128 stats->f_files = 0;
129 stats->f_ffree = 0;
130 stats->f_namelen = LOGFS_MAX_NAMELEN;
131 return 0;
132}
133
134static int logfs_sb_set(struct super_block *sb, void *_super)
135{
136 struct logfs_super *super = _super;
137
138 sb->s_fs_info = super;
139 sb->s_mtd = super->s_mtd;
140 sb->s_bdev = super->s_bdev;
b8639077
JE
141 if (sb->s_bdev)
142 sb->s_bdi = &bdev_get_queue(sb->s_bdev)->backing_dev_info;
143 if (sb->s_mtd)
144 sb->s_bdi = sb->s_mtd->backing_dev_info;
5db53f3e
JE
145 return 0;
146}
147
148static int logfs_sb_test(struct super_block *sb, void *_super)
149{
150 struct logfs_super *super = _super;
151 struct mtd_info *mtd = super->s_mtd;
152
153 if (mtd && sb->s_mtd == mtd)
154 return 1;
155 if (super->s_bdev && sb->s_bdev == super->s_bdev)
156 return 1;
157 return 0;
158}
159
160static void set_segment_header(struct logfs_segment_header *sh, u8 type,
161 u8 level, u32 segno, u32 ec)
162{
163 sh->pad = 0;
164 sh->type = type;
165 sh->level = level;
166 sh->segno = cpu_to_be32(segno);
167 sh->ec = cpu_to_be32(ec);
168 sh->gec = cpu_to_be64(segno);
169 sh->crc = logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4);
170}
171
172static void logfs_write_ds(struct super_block *sb, struct logfs_disk_super *ds,
173 u32 segno, u32 ec)
174{
175 struct logfs_super *super = logfs_super(sb);
176 struct logfs_segment_header *sh = &ds->ds_sh;
177 int i;
178
179 memset(ds, 0, sizeof(*ds));
180 set_segment_header(sh, SEG_SUPER, 0, segno, ec);
181
182 ds->ds_ifile_levels = super->s_ifile_levels;
183 ds->ds_iblock_levels = super->s_iblock_levels;
184 ds->ds_data_levels = super->s_data_levels; /* XXX: Remove */
185 ds->ds_segment_shift = super->s_segshift;
186 ds->ds_block_shift = sb->s_blocksize_bits;
187 ds->ds_write_shift = super->s_writeshift;
188 ds->ds_filesystem_size = cpu_to_be64(super->s_size);
189 ds->ds_segment_size = cpu_to_be32(super->s_segsize);
190 ds->ds_bad_seg_reserve = cpu_to_be32(super->s_bad_seg_reserve);
191 ds->ds_feature_incompat = cpu_to_be64(super->s_feature_incompat);
192 ds->ds_feature_ro_compat= cpu_to_be64(super->s_feature_ro_compat);
193 ds->ds_feature_compat = cpu_to_be64(super->s_feature_compat);
194 ds->ds_feature_flags = cpu_to_be64(super->s_feature_flags);
195 ds->ds_root_reserve = cpu_to_be64(super->s_root_reserve);
196 ds->ds_speed_reserve = cpu_to_be64(super->s_speed_reserve);
197 journal_for_each(i)
198 ds->ds_journal_seg[i] = cpu_to_be32(super->s_journal_seg[i]);
199 ds->ds_magic = cpu_to_be64(LOGFS_MAGIC);
200 ds->ds_crc = logfs_crc32(ds, sizeof(*ds),
201 LOGFS_SEGMENT_HEADERSIZE + 12);
202}
203
204static int write_one_sb(struct super_block *sb,
205 struct page *(*find_sb)(struct super_block *sb, u64 *ofs))
206{
207 struct logfs_super *super = logfs_super(sb);
208 struct logfs_disk_super *ds;
209 struct logfs_segment_entry se;
210 struct page *page;
211 u64 ofs;
212 u32 ec, segno;
213 int err;
214
215 page = find_sb(sb, &ofs);
216 if (!page)
217 return -EIO;
218 ds = page_address(page);
219 segno = seg_no(sb, ofs);
220 logfs_get_segment_entry(sb, segno, &se);
221 ec = be32_to_cpu(se.ec_level) >> 4;
222 ec++;
223 logfs_set_segment_erased(sb, segno, ec, 0);
224 logfs_write_ds(sb, ds, segno, ec);
225 err = super->s_devops->write_sb(sb, page);
226 page_cache_release(page);
227 return err;
228}
229
230int logfs_write_sb(struct super_block *sb)
231{
232 struct logfs_super *super = logfs_super(sb);
233 int err;
234
235 /* First superblock */
236 err = write_one_sb(sb, super->s_devops->find_first_sb);
237 if (err)
238 return err;
239
240 /* Last superblock */
241 err = write_one_sb(sb, super->s_devops->find_last_sb);
242 if (err)
243 return err;
244 return 0;
245}
246
247static int ds_cmp(const void *ds0, const void *ds1)
248{
249 size_t len = sizeof(struct logfs_disk_super);
250
251 /* We know the segment headers differ, so ignore them */
252 len -= LOGFS_SEGMENT_HEADERSIZE;
253 ds0 += LOGFS_SEGMENT_HEADERSIZE;
254 ds1 += LOGFS_SEGMENT_HEADERSIZE;
255 return memcmp(ds0, ds1, len);
256}
257
258static int logfs_recover_sb(struct super_block *sb)
259{
260 struct logfs_super *super = logfs_super(sb);
261 struct logfs_disk_super _ds0, *ds0 = &_ds0;
262 struct logfs_disk_super _ds1, *ds1 = &_ds1;
263 int err, valid0, valid1;
264
265 /* read first superblock */
266 err = wbuf_read(sb, super->s_sb_ofs[0], sizeof(*ds0), ds0);
267 if (err)
268 return err;
269 /* read last superblock */
270 err = wbuf_read(sb, super->s_sb_ofs[1], sizeof(*ds1), ds1);
271 if (err)
272 return err;
273 valid0 = logfs_check_ds(ds0) == 0;
274 valid1 = logfs_check_ds(ds1) == 0;
275
276 if (!valid0 && valid1) {
277 printk(KERN_INFO"First superblock is invalid - fixing.\n");
278 return write_one_sb(sb, super->s_devops->find_first_sb);
279 }
280 if (valid0 && !valid1) {
281 printk(KERN_INFO"Last superblock is invalid - fixing.\n");
282 return write_one_sb(sb, super->s_devops->find_last_sb);
283 }
284 if (valid0 && valid1 && ds_cmp(ds0, ds1)) {
285 printk(KERN_INFO"Superblocks don't match - fixing.\n");
faaa27ab 286 return logfs_write_sb(sb);
5db53f3e
JE
287 }
288 /* If neither is valid now, something's wrong. Didn't we properly
289 * check them before?!? */
290 BUG_ON(!valid0 && !valid1);
291 return 0;
292}
293
294static int logfs_make_writeable(struct super_block *sb)
295{
296 int err;
297
49137f2e
JE
298 err = logfs_open_segfile(sb);
299 if (err)
300 return err;
301
5db53f3e
JE
302 /* Repair any broken superblock copies */
303 err = logfs_recover_sb(sb);
304 if (err)
305 return err;
306
307 /* Check areas for trailing unaccounted data */
308 err = logfs_check_areas(sb);
309 if (err)
310 return err;
311
5db53f3e
JE
312 /* Do one GC pass before any data gets dirtied */
313 logfs_gc_pass(sb);
314
315 /* after all initializations are done, replay the journal
316 * for rw-mounts, if necessary */
317 err = logfs_replay_journal(sb);
318 if (err)
319 return err;
320
321 return 0;
322}
323
324static int logfs_get_sb_final(struct super_block *sb, struct vfsmount *mnt)
325{
9421502b 326 struct logfs_super *super = logfs_super(sb);
5db53f3e
JE
327 struct inode *rootdir;
328 int err;
329
330 /* root dir */
331 rootdir = logfs_iget(sb, LOGFS_INO_ROOT);
332 if (IS_ERR(rootdir))
333 goto fail;
334
335 sb->s_root = d_alloc_root(rootdir);
26562449
AV
336 if (!sb->s_root) {
337 iput(rootdir);
338 goto fail;
339 }
5db53f3e 340
9421502b
JE
341 super->s_erase_page = alloc_pages(GFP_KERNEL, 0);
342 if (!super->s_erase_page)
26562449 343 goto fail;
9421502b
JE
344 memset(page_address(super->s_erase_page), 0xFF, PAGE_SIZE);
345
5db53f3e
JE
346 /* FIXME: check for read-only mounts */
347 err = logfs_make_writeable(sb);
348 if (err)
26562449 349 goto fail1;
5db53f3e
JE
350
351 log_super("LogFS: Finished mounting\n");
352 simple_set_mnt(mnt, sb);
353 return 0;
354
26562449 355fail1:
9421502b 356 __free_page(super->s_erase_page);
5db53f3e
JE
357fail:
358 iput(logfs_super(sb)->s_master_inode);
359 return -EIO;
360}
361
362int logfs_check_ds(struct logfs_disk_super *ds)
363{
364 struct logfs_segment_header *sh = &ds->ds_sh;
365
366 if (ds->ds_magic != cpu_to_be64(LOGFS_MAGIC))
367 return -EINVAL;
368 if (sh->crc != logfs_crc32(sh, LOGFS_SEGMENT_HEADERSIZE, 4))
369 return -EINVAL;
370 if (ds->ds_crc != logfs_crc32(ds, sizeof(*ds),
371 LOGFS_SEGMENT_HEADERSIZE + 12))
372 return -EINVAL;
373 return 0;
374}
375
376static struct page *find_super_block(struct super_block *sb)
377{
378 struct logfs_super *super = logfs_super(sb);
379 struct page *first, *last;
380
381 first = super->s_devops->find_first_sb(sb, &super->s_sb_ofs[0]);
382 if (!first || IS_ERR(first))
383 return NULL;
384 last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
385 if (!last || IS_ERR(first)) {
386 page_cache_release(first);
387 return NULL;
388 }
389
390 if (!logfs_check_ds(page_address(first))) {
391 page_cache_release(last);
392 return first;
393 }
394
395 /* First one didn't work, try the second superblock */
396 if (!logfs_check_ds(page_address(last))) {
397 page_cache_release(first);
398 return last;
399 }
400
401 /* Neither worked, sorry folks */
402 page_cache_release(first);
403 page_cache_release(last);
404 return NULL;
405}
406
407static int __logfs_read_sb(struct super_block *sb)
408{
409 struct logfs_super *super = logfs_super(sb);
410 struct page *page;
411 struct logfs_disk_super *ds;
412 int i;
413
414 page = find_super_block(sb);
415 if (!page)
416 return -EIO;
417
418 ds = page_address(page);
419 super->s_size = be64_to_cpu(ds->ds_filesystem_size);
420 super->s_root_reserve = be64_to_cpu(ds->ds_root_reserve);
421 super->s_speed_reserve = be64_to_cpu(ds->ds_speed_reserve);
422 super->s_bad_seg_reserve = be32_to_cpu(ds->ds_bad_seg_reserve);
423 super->s_segsize = 1 << ds->ds_segment_shift;
424 super->s_segmask = (1 << ds->ds_segment_shift) - 1;
425 super->s_segshift = ds->ds_segment_shift;
426 sb->s_blocksize = 1 << ds->ds_block_shift;
427 sb->s_blocksize_bits = ds->ds_block_shift;
428 super->s_writesize = 1 << ds->ds_write_shift;
429 super->s_writeshift = ds->ds_write_shift;
430 super->s_no_segs = super->s_size >> super->s_segshift;
431 super->s_no_blocks = super->s_segsize >> sb->s_blocksize_bits;
432 super->s_feature_incompat = be64_to_cpu(ds->ds_feature_incompat);
433 super->s_feature_ro_compat = be64_to_cpu(ds->ds_feature_ro_compat);
434 super->s_feature_compat = be64_to_cpu(ds->ds_feature_compat);
435 super->s_feature_flags = be64_to_cpu(ds->ds_feature_flags);
436
437 journal_for_each(i)
438 super->s_journal_seg[i] = be32_to_cpu(ds->ds_journal_seg[i]);
439
440 super->s_ifile_levels = ds->ds_ifile_levels;
441 super->s_iblock_levels = ds->ds_iblock_levels;
442 super->s_data_levels = ds->ds_data_levels;
443 super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
444 + super->s_data_levels;
445 page_cache_release(page);
446 return 0;
447}
448
6a08ab84 449static int logfs_read_sb(struct super_block *sb, int read_only)
5db53f3e
JE
450{
451 struct logfs_super *super = logfs_super(sb);
452 int ret;
453
454 super->s_btree_pool = mempool_create(32, btree_alloc, btree_free, NULL);
455 if (!super->s_btree_pool)
456 return -ENOMEM;
457
458 btree_init_mempool64(&super->s_shadow_tree.new, super->s_btree_pool);
459 btree_init_mempool64(&super->s_shadow_tree.old, super->s_btree_pool);
032d8f72
JE
460 btree_init_mempool32(&super->s_shadow_tree.segment_map,
461 super->s_btree_pool);
5db53f3e
JE
462
463 ret = logfs_init_mapping(sb);
464 if (ret)
465 return ret;
466
467 ret = __logfs_read_sb(sb);
468 if (ret)
469 return ret;
470
6a08ab84
JE
471 if (super->s_feature_incompat & ~LOGFS_FEATURES_INCOMPAT)
472 return -EIO;
473 if ((super->s_feature_ro_compat & ~LOGFS_FEATURES_RO_COMPAT) &&
474 !read_only)
475 return -EIO;
476
5db53f3e
JE
477 mutex_init(&super->s_dirop_mutex);
478 mutex_init(&super->s_object_alias_mutex);
479 INIT_LIST_HEAD(&super->s_freeing_list);
480
481 ret = logfs_init_rw(sb);
482 if (ret)
483 return ret;
484
485 ret = logfs_init_areas(sb);
486 if (ret)
487 return ret;
488
489 ret = logfs_init_gc(sb);
490 if (ret)
491 return ret;
492
493 ret = logfs_init_journal(sb);
494 if (ret)
495 return ret;
496
497 return 0;
498}
499
500static void logfs_kill_sb(struct super_block *sb)
501{
502 struct logfs_super *super = logfs_super(sb);
503
504 log_super("LogFS: Start unmounting\n");
505 /* Alias entries slow down mount, so evict as many as possible */
506 sync_filesystem(sb);
c6d38301 507 logfs_write_anchor(sb);
5db53f3e
JE
508
509 /*
510 * From this point on alias entries are simply dropped - and any
511 * writes to the object store are considered bugs.
512 */
513 super->s_flags |= LOGFS_SB_FLAG_SHUTDOWN;
514 log_super("LogFS: Now in shutdown\n");
515 generic_shutdown_super(sb);
516
517 BUG_ON(super->s_dirty_used_bytes || super->s_dirty_free_bytes);
518
519 logfs_cleanup_gc(sb);
520 logfs_cleanup_journal(sb);
521 logfs_cleanup_areas(sb);
522 logfs_cleanup_rw(sb);
9421502b
JE
523 if (super->s_erase_page)
524 __free_page(super->s_erase_page);
5db53f3e 525 super->s_devops->put_device(sb);
1f1b0008
JE
526 logfs_mempool_destroy(super->s_btree_pool);
527 logfs_mempool_destroy(super->s_alias_pool);
5db53f3e
JE
528 kfree(super);
529 log_super("LogFS: Finished unmounting\n");
530}
531
532int logfs_get_sb_device(struct file_system_type *type, int flags,
533 struct mtd_info *mtd, struct block_device *bdev,
534 const struct logfs_device_ops *devops, struct vfsmount *mnt)
535{
536 struct logfs_super *super;
537 struct super_block *sb;
538 int err = -ENOMEM;
539 static int mount_count;
540
541 log_super("LogFS: Start mount %x\n", mount_count++);
542 super = kzalloc(sizeof(*super), GFP_KERNEL);
543 if (!super)
544 goto err0;
545
546 super->s_mtd = mtd;
547 super->s_bdev = bdev;
548 err = -EINVAL;
549 sb = sget(type, logfs_sb_test, logfs_sb_set, super);
550 if (IS_ERR(sb))
551 goto err0;
552
553 if (sb->s_root) {
554 /* Device is already in use */
555 err = 0;
556 simple_set_mnt(mnt, sb);
557 goto err0;
558 }
559
560 super->s_devops = devops;
561
562 /*
563 * sb->s_maxbytes is limited to 8TB. On 32bit systems, the page cache
564 * only covers 16TB and the upper 8TB are used for indirect blocks.
565 * On 64bit system we could bump up the limit, but that would make
566 * the filesystem incompatible with 32bit systems.
567 */
568 sb->s_maxbytes = (1ull << 43) - 1;
569 sb->s_op = &logfs_super_operations;
570 sb->s_flags = flags | MS_NOATIME;
571
6a08ab84 572 err = logfs_read_sb(sb, sb->s_flags & MS_RDONLY);
5db53f3e
JE
573 if (err)
574 goto err1;
575
576 sb->s_flags |= MS_ACTIVE;
577 err = logfs_get_sb_final(sb, mnt);
578 if (err)
579 goto err1;
580 return 0;
581
582err1:
6f2e9e6a 583 deactivate_locked_super(sb);
5db53f3e
JE
584 return err;
585err0:
586 kfree(super);
587 //devops->put_device(sb);
588 return err;
589}
590
591static int logfs_get_sb(struct file_system_type *type, int flags,
592 const char *devname, void *data, struct vfsmount *mnt)
593{
594 ulong mtdnr;
595
596 if (!devname)
597 return logfs_get_sb_bdev(type, flags, devname, mnt);
598 if (strncmp(devname, "mtd", 3))
599 return logfs_get_sb_bdev(type, flags, devname, mnt);
600
601 {
602 char *garbage;
603 mtdnr = simple_strtoul(devname+3, &garbage, 0);
604 if (*garbage)
605 return -EINVAL;
606 }
607
608 return logfs_get_sb_mtd(type, flags, mtdnr, mnt);
609}
610
611static struct file_system_type logfs_fs_type = {
612 .owner = THIS_MODULE,
613 .name = "logfs",
614 .get_sb = logfs_get_sb,
615 .kill_sb = logfs_kill_sb,
616 .fs_flags = FS_REQUIRES_DEV,
617
618};
619
620static int __init logfs_init(void)
621{
622 int ret;
623
624 emergency_page = alloc_pages(GFP_KERNEL, 0);
625 if (!emergency_page)
626 return -ENOMEM;
627
628 ret = logfs_compr_init();
629 if (ret)
630 goto out1;
631
632 ret = logfs_init_inode_cache();
633 if (ret)
634 goto out2;
635
636 return register_filesystem(&logfs_fs_type);
637out2:
638 logfs_compr_exit();
639out1:
640 __free_pages(emergency_page, 0);
641 return ret;
642}
643
644static void __exit logfs_exit(void)
645{
646 unregister_filesystem(&logfs_fs_type);
647 logfs_destroy_inode_cache();
648 logfs_compr_exit();
649 __free_pages(emergency_page, 0);
650}
651
652module_init(logfs_init);
653module_exit(logfs_exit);
654
655MODULE_LICENSE("GPL v2");
656MODULE_AUTHOR("Joern Engel <joern@logfs.org>");
657MODULE_DESCRIPTION("scalable flash filesystem");