Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs-2.6

author Linus Torvalds <torvalds@linux-foundation.org>

Wed, 27 Oct 2010 00:58:44 +0000 (17:58 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Wed, 27 Oct 2010 00:58:44 +0000 (17:58 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Oct 2010 00:58:44 +0000 (17:58 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Wed, 27 Oct 2010 00:58:44 +0000 (17:58 -0700)
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking

index 2db4283efa8dbf99e0e6576edc8d0a054d3b299d..8a817f656f0a808dffb344c6a46bab960cb8a0ae 100644 (file)
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -349,21 +349,36 @@ call this method upon the IO completion.
  
  --------------------------- block_device_operations -----------------------
  prototypes:
-       int (*open) (struct inode *, struct file *);
-       int (*release) (struct inode *, struct file *);
-       int (*ioctl) (struct inode *, struct file *, unsigned, unsigned long);
+       int (*open) (struct block_device *, fmode_t);
+       int (*release) (struct gendisk *, fmode_t);
+       int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
+       int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long);
+       int (*direct_access) (struct block_device *, sector_t, void **, unsigned long *);
         int (*media_changed) (struct gendisk *);
+       void (*unlock_native_capacity) (struct gendisk *);
         int (*revalidate_disk) (struct gendisk *);
+       int (*getgeo)(struct block_device *, struct hd_geometry *);
+       void (*swap_slot_free_notify) (struct block_device *, unsigned long);
  
  locking rules:
-                       BKL     bd_sem
-open:                  yes     yes
-release:               yes     yes
-ioctl:                 yes     no
+                       BKL     bd_mutex
+open:                  no      yes
+release:               no      yes
+ioctl:                 no      no
+compat_ioctl:          no      no
+direct_access:         no      no
  media_changed:         no      no
+unlock_native_capacity:        no      no
  revalidate_disk:       no      no
+getgeo:                        no      no
+swap_slot_free_notify: no      no      (see below)
+
+media_changed, unlock_native_capacity and revalidate_disk are called only from
+check_disk_change().
+
+swap_slot_free_notify is called with swap_lock and sometimes the page lock
+held.
  
-The last two are called only from check_disk_change().
  
  --------------------------- file_operations -------------------------------
  prototypes:
diff --git a/Documentation/filesystems/sharedsubtree.txt b/Documentation/filesystems/sharedsubtree.txt

index fc0e39af43c32a42343f97daed5a5d285f844863..4ede421c9687a71d33027b95118c90dc743da0fe 100644 (file)
--- a/Documentation/filesystems/sharedsubtree.txt
+++ b/Documentation/filesystems/sharedsubtree.txt
@@ -62,10 +62,10 @@ replicas continue to be exactly same.
         # mount /dev/sd0  /tmp/a
  
         #ls /tmp/a
-       t1 t2 t2
+       t1 t2 t3
  
         #ls /mnt/a
-       t1 t2 t2
+       t1 t2 t3
  
         Note that the mount has propagated to the mount at /mnt as well.
  
diff --git a/drivers/char/mem.c b/drivers/char/mem.c

index e985b1c2730e800db599ad445371a8b100eee506..1256454b2d4364f7fa6ad95b661d30ad62b673b4 100644 (file)
--- a/drivers/char/mem.c
+++ b/drivers/char/mem.c
@@ -876,6 +876,10 @@ static int memory_open(struct inode *inode, struct file *filp)
         if (dev->dev_info)
                 filp->f_mapping->backing_dev_info = dev->dev_info;
  
+       /* Is /dev/mem or /dev/kmem ? */
+       if (dev->dev_info == &directly_mappable_cdev_bdi)
+               filp->f_mode |= FMODE_UNSIGNED_OFFSET;
+
         if (dev->fops->open)
                 return dev->fops->open(inode, filp);
  
diff --git a/drivers/infiniband/hw/ipath/ipath_fs.c b/drivers/infiniband/hw/ipath/ipath_fs.c

index d13e72685dcfa17388c274a62df80f88f68b558f..12d5bf76302cc0eff403b5538607350f4e554e42 100644 (file)
--- a/drivers/infiniband/hw/ipath/ipath_fs.c
+++ b/drivers/infiniband/hw/ipath/ipath_fs.c
@@ -57,6 +57,7 @@ static int ipathfs_mknod(struct inode *dir, struct dentry *dentry,
                 goto bail;
         }
  
+       inode->i_ino = get_next_ino();
         inode->i_mode = mode;
         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
         inode->i_private = data;
diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c

index a0e6613e8be6151d3fdc00efc3005ad8c6f22ebd..7e433d75c775a963fdedb38612ef734a6211ce79 100644 (file)
--- a/drivers/infiniband/hw/qib/qib_fs.c
+++ b/drivers/infiniband/hw/qib/qib_fs.c
@@ -58,6 +58,7 @@ static int qibfs_mknod(struct inode *dir, struct dentry *dentry,
                 goto bail;
         }
  
+       inode->i_ino = get_next_ino();
         inode->i_mode = mode;
         inode->i_uid = 0;
         inode->i_gid = 0;
diff --git a/drivers/misc/ibmasm/ibmasmfs.c b/drivers/misc/ibmasm/ibmasmfs.c

index af2497ae5fe32be990aabdb6732c44eaaf0591fb..0a53500636c9d3f4c4d1f886d386f1b1b675f95d 100644 (file)
--- a/drivers/misc/ibmasm/ibmasmfs.c
+++ b/drivers/misc/ibmasm/ibmasmfs.c
@@ -146,6 +146,7 @@ static struct inode *ibmasmfs_make_inode(struct super_block *sb, int mode)
         struct inode *ret = new_inode(sb);
  
         if (ret) {
+               ret->i_ino = get_next_ino();
                 ret->i_mode = mode;
                 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
         }
diff --git a/drivers/oprofile/oprofilefs.c b/drivers/oprofile/oprofilefs.c

index 95f711b251adf33a80dd69eab2a6b792a11313ac..449de59bf35bb8a255d73b1a77eff0d040f953be 100644 (file)
--- a/drivers/oprofile/oprofilefs.c
+++ b/drivers/oprofile/oprofilefs.c
@@ -28,6 +28,7 @@ static struct inode *oprofilefs_get_inode(struct super_block *sb, int mode)
         struct inode *inode = new_inode(sb);
  
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
         }
diff --git a/drivers/staging/pohmelfs/inode.c b/drivers/staging/pohmelfs/inode.c

index 97dae297ca3c1483006c05b52f25b92386971330..c62d30017c07257bb7ac073a465f3e1adfa148ce 100644 (file)
--- a/drivers/staging/pohmelfs/inode.c
+++ b/drivers/staging/pohmelfs/inode.c
@@ -882,12 +882,8 @@ static struct inode *pohmelfs_alloc_inode(struct super_block *sb)
  static int pohmelfs_fsync(struct file *file, int datasync)
  {
         struct inode *inode = file->f_mapping->host;
-       struct writeback_control wbc = {
-               .sync_mode = WB_SYNC_ALL,
-               .nr_to_write = 0,       /* sys_fsync did this */
-       };
  
-       return sync_inode(inode, &wbc);
+       return sync_inode_metadata(inode, 1);
  }
  
  ssize_t pohmelfs_write(struct file *file, const char __user *buf,
diff --git a/drivers/usb/core/inode.c b/drivers/usb/core/inode.c

index 095fa53666909e02e2325badb15bfe5f63dc139b..e2f63c0ea09df16286ea8c4ad04a743ff3d0a9b5 100644 (file)
--- a/drivers/usb/core/inode.c
+++ b/drivers/usb/core/inode.c
@@ -276,6 +276,7 @@ static struct inode *usbfs_get_inode (struct super_block *sb, int mode, dev_t de
         struct inode *inode = new_inode(sb);
  
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_uid = current_fsuid();
                 inode->i_gid = current_fsgid();
diff --git a/drivers/usb/gadget/f_fs.c b/drivers/usb/gadget/f_fs.c

index e4f59505520889e86ecd7d131e9ba283cdbf3dc9..e093fd8d04d3587170d6cfa1edb5a50ee3637a3c 100644 (file)
--- a/drivers/usb/gadget/f_fs.c
+++ b/drivers/usb/gadget/f_fs.c
@@ -980,6 +980,7 @@ ffs_sb_make_inode(struct super_block *sb, void *data,
         if (likely(inode)) {
                 struct timespec current_time = CURRENT_TIME;
  
+               inode->i_ino     = usbfs_get_inode();
                 inode->i_mode    = perms->mode;
                 inode->i_uid     = perms->uid;
                 inode->i_gid     = perms->gid;
diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c

index d1d72d946b04d465014b593e7cd6c39604a57f4e..ba145e7fbe03ef27fcd37598dc46a9f1fb31eb27 100644 (file)
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -1991,6 +1991,7 @@ gadgetfs_make_inode (struct super_block *sb,
         struct inode *inode = new_inode (sb);
  
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_uid = default_uid;
                 inode->i_gid = default_gid;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c

index 9e670d527646fc4abe2be6f0dfc992f4a3178042..ef5905f7c8a39c9395362d5e368a846d43fcedbd 100644 (file)
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -1789,9 +1789,10 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
                 kfree(st);
         } else {
                 /* Caching disabled. No need to get upto date stat info.
-                * This dentry will be released immediately. So, just i_count++
+                * This dentry will be released immediately. So, just hold the
+                * inode
                  */
-               atomic_inc(&old_dentry->d_inode->i_count);
+               ihold(old_dentry->d_inode);
         }
  
         dentry->d_op = old_dentry->d_op;
diff --git a/fs/affs/file.c b/fs/affs/file.c

index c4a9875bd1a60520c682bed74f7a27bc3d2e1da0..0a90dcd46de28d33f2768fde829ac7800abdc9e4 100644 (file)
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -894,9 +894,9 @@ affs_truncate(struct inode *inode)
                 if (AFFS_SB(sb)->s_flags & SF_OFS) {
                         struct buffer_head *bh = affs_bread_ino(inode, last_blk, 0);
                         u32 tmp;
-                       if (IS_ERR(ext_bh)) {
+                       if (IS_ERR(bh)) {
                                 affs_warning(sb, "truncate", "unexpected read error for last block %u (%d)",
-                                            ext, PTR_ERR(ext_bh));
+                                            ext, PTR_ERR(bh));
                                 return;
                         }
                         tmp = be32_to_cpu(AFFS_DATA_HEAD(bh)->next);
diff --git a/fs/affs/inode.c b/fs/affs/inode.c

index 3a0fdec175ba6d5d84ea8ae27f117d0e2cc46644..5d828903ac69ced919eaa3d6eb2429a49922d0e2 100644 (file)
--- a/fs/affs/inode.c
+++ b/fs/affs/inode.c
@@ -388,7 +388,7 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
                 affs_adjust_checksum(inode_bh, block - be32_to_cpu(chain));
                 mark_buffer_dirty_inode(inode_bh, inode);
                 inode->i_nlink = 2;
-               atomic_inc(&inode->i_count);
+               ihold(inode);
         }
         affs_fix_checksum(sb, bh);
         mark_buffer_dirty_inode(bh, inode);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c

index 0d38c09bd55e10f4eaf5f0e25ef11fa3d9d82919..5439e1bc9a86a5702094eaea35eb4c6ebd8e67fe 100644 (file)
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -1045,7 +1045,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
         if (ret < 0)
                 goto link_error;
  
-       atomic_inc(&vnode->vfs_inode.i_count);
+       ihold(&vnode->vfs_inode);
         d_instantiate(dentry, &vnode->vfs_inode);
         key_put(key);
         _leave(" = 0");
diff --git a/fs/aio.c b/fs/aio.c

index 250b0a73c8a8ca92b78c3a0282d2425ce7649dcf..8c8f6c5b6d7930657f7a3aadf44fe4b4dc96b738 100644 (file)
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1543,7 +1543,19 @@ static void aio_batch_add(struct address_space *mapping,
         }
  
         abe = mempool_alloc(abe_pool, GFP_KERNEL);
-       BUG_ON(!igrab(mapping->host));
+
+       /*
+        * we should be using igrab here, but
+        * we don't want to hammer on the global
+        * inode spinlock just to take an extra
+        * reference on a file that we must already
+        * have a reference to.
+        *
+        * When we're called, we always have a reference
+        * on the file, so we must always have a reference
+        * on the inode, so ihold() is safe here.
+        */
+       ihold(mapping->host);
         abe->mapping = mapping;
         hlist_add_head(&abe->list, &batch_hash[bucket]);
         return;
diff --git a/fs/anon_inodes.c b/fs/anon_inodes.c

index e4b75d6eda839d6fddae166904d1002f85a4d09f..5365527ca43fec37cdc021afbc21acf76c321889 100644 (file)
--- a/fs/anon_inodes.c
+++ b/fs/anon_inodes.c
@@ -111,10 +111,9 @@ struct file *anon_inode_getfile(const char *name,
         path.mnt = mntget(anon_inode_mnt);
         /*
          * We know the anon_inode inode count is always greater than zero,
-        * so we can avoid doing an igrab() and we can use an open-coded
-        * atomic_inc().
+        * so ihold() is safe.
          */
-       atomic_inc(&anon_inode_inode->i_count);
+       ihold(anon_inode_inode);
  
         path.dentry->d_op = &anon_inodefs_dentry_operations;
         d_instantiate(path.dentry, anon_inode_inode);
@@ -194,6 +193,7 @@ static struct inode *anon_inode_mkinode(void)
         if (!inode)
                 return ERR_PTR(-ENOMEM);
  
+       inode->i_ino = get_next_ino();
         inode->i_fop = &anon_inode_fops;
  
         inode->i_mapping->a_ops = &anon_aops;
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c

index 821b2b955dac2c7b847a91d7abc91d4e0162c838..ac87e49fa70604e4b8238cd95fbae7a3cc1dcb07 100644 (file)
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -398,6 +398,7 @@ struct inode *autofs4_get_inode(struct super_block *sb,
                 inode->i_gid = sb->s_root->d_inode->i_gid;
         }
         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+       inode->i_ino = get_next_ino();
  
         if (S_ISDIR(inf->mode)) {
                 inode->i_nlink = 2;
diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c

index d967e052b779d80490e08e097f5849ddc88ae48d..685ecff3ab31728d2362a9be39f98dfbc76947c4 100644 (file)
--- a/fs/bfs/dir.c
+++ b/fs/bfs/dir.c
@@ -176,7 +176,7 @@ static int bfs_link(struct dentry *old, struct inode *dir,
         inc_nlink(inode);
         inode->i_ctime = CURRENT_TIME_SEC;
         mark_inode_dirty(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         d_instantiate(new, inode);
         mutex_unlock(&info->bfs_lock);
         return 0;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c

index 139fc8083f53cbd9351b3c4ebeeedc79f30102b4..29990f0eee0c90adb18771c2c327fc99ee32c765 100644 (file)
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -495,6 +495,7 @@ static struct inode *bm_get_inode(struct super_block *sb, int mode)
         struct inode * inode = new_inode(sb);
  
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_atime = inode->i_mtime = inode->i_ctime =
                         current_fs_time(inode->i_sb);
diff --git a/fs/block_dev.c b/fs/block_dev.c

index b737451e2e9dde95ae0d098b0443f44af1d21899..dea3b628a6ce9a9058c24d2b13c364b7c6adb3c3 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -48,6 +48,21 @@ inline struct block_device *I_BDEV(struct inode *inode)
  
  EXPORT_SYMBOL(I_BDEV);
  
+/*
+ * move the inode from it's current bdi to the a new bdi. if the inode is dirty
+ * we need to move it onto the dirty list of @dst so that the inode is always
+ * on the right list.
+ */
+static void bdev_inode_switch_bdi(struct inode *inode,
+                       struct backing_dev_info *dst)
+{
+       spin_lock(&inode_lock);
+       inode->i_data.backing_dev_info = dst;
+       if (inode->i_state & I_DIRTY)
+               list_move(&inode->i_wb_list, &dst->wb.b_dirty);
+       spin_unlock(&inode_lock);
+}
+
  static sector_t max_block(struct block_device *bdev)
  {
         sector_t retval = ~((sector_t)0);
@@ -550,7 +565,7 @@ EXPORT_SYMBOL(bdget);
   */
  struct block_device *bdgrab(struct block_device *bdev)
  {
-       atomic_inc(&bdev->bd_inode->i_count);
+       ihold(bdev->bd_inode);
         return bdev;
  }
  
@@ -580,7 +595,7 @@ static struct block_device *bd_acquire(struct inode *inode)
         spin_lock(&bdev_lock);
         bdev = inode->i_bdev;
         if (bdev) {
-               atomic_inc(&bdev->bd_inode->i_count);
+               ihold(bdev->bd_inode);
                 spin_unlock(&bdev_lock);
                 return bdev;
         }
@@ -591,12 +606,12 @@ static struct block_device *bd_acquire(struct inode *inode)
                 spin_lock(&bdev_lock);
                 if (!inode->i_bdev) {
                         /*
-                        * We take an additional bd_inode->i_count for inode,
+                        * We take an additional reference to bd_inode,
                          * and it's released in clear_inode() of inode.
                          * So, we can access it via ->i_mapping always
                          * without igrab().
                          */
-                       atomic_inc(&bdev->bd_inode->i_count);
+                       ihold(bdev->bd_inode);
                         inode->i_bdev = bdev;
                         inode->i_mapping = bdev->bd_inode->i_mapping;
                         list_add(&inode->i_devices, &bdev->bd_inodes);
@@ -1390,7 +1405,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                                 bdi = blk_get_backing_dev_info(bdev);
                                 if (bdi == NULL)
                                         bdi = &default_backing_dev_info;
-                               bdev->bd_inode->i_data.backing_dev_info = bdi;
+                               bdev_inode_switch_bdi(bdev->bd_inode, bdi);
                         }
                         if (bdev->bd_invalidated)
                                 rescan_partitions(disk, bdev);
@@ -1405,8 +1420,8 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
                         if (ret)
                                 goto out_clear;
                         bdev->bd_contains = whole;
-                       bdev->bd_inode->i_data.backing_dev_info =
-                          whole->bd_inode->i_data.backing_dev_info;
+                       bdev_inode_switch_bdi(bdev->bd_inode,
+                               whole->bd_inode->i_data.backing_dev_info);
                         bdev->bd_part = disk_get_part(disk, partno);
                         if (!(disk->flags & GENHD_FL_UP) ||
                             !bdev->bd_part || !bdev->bd_part->nr_sects) {
@@ -1439,7 +1454,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
         disk_put_part(bdev->bd_part);
         bdev->bd_disk = NULL;
         bdev->bd_part = NULL;
-       bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+       bdev_inode_switch_bdi(bdev->bd_inode, &default_backing_dev_info);
         if (bdev != bdev->bd_contains)
                 __blkdev_put(bdev->bd_contains, mode, 1);
         bdev->bd_contains = NULL;
@@ -1533,7 +1548,8 @@ static int __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
                 disk_put_part(bdev->bd_part);
                 bdev->bd_part = NULL;
                 bdev->bd_disk = NULL;
-               bdev->bd_inode->i_data.backing_dev_info = &default_backing_dev_info;
+               bdev_inode_switch_bdi(bdev->bd_inode,
+                                       &default_backing_dev_info);
                 if (bdev != bdev->bd_contains)
                         victim = bdev->bd_contains;
                 bdev->bd_contains = NULL;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c

index c03864406af3ed49b66599fc1cdace0b2bbd2e22..64f99cf69ce0c6b2da2cb62c4884fa44c150bdda 100644 (file)
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3849,7 +3849,7 @@ again:
         p = &root->inode_tree.rb_node;
         parent = NULL;
  
-       if (hlist_unhashed(&inode->i_hash))
+       if (inode_unhashed(inode))
                 return;
  
         spin_lock(&root->inode_lock);
@@ -4758,7 +4758,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
         }
  
         btrfs_set_trans_block_group(trans, dir);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
  
         err = btrfs_add_nondir(trans, dentry, inode, 1, index);
  
diff --git a/fs/buffer.c b/fs/buffer.c

index 8d595ab2aed1a75bb79a3450ff6205d88dd359ba..5930e382959bc504c58bbb428588a372742d4aa4 100644 (file)
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -1833,9 +1833,11 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to)
  }
  EXPORT_SYMBOL(page_zero_new_buffers);
  
-int block_prepare_write(struct page *page, unsigned from, unsigned to,
+int __block_write_begin(struct page *page, loff_t pos, unsigned len,
                 get_block_t *get_block)
  {
+       unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+       unsigned to = from + len;
         struct inode *inode = page->mapping->host;
         unsigned block_start, block_end;
         sector_t block;
@@ -1915,7 +1917,7 @@ int block_prepare_write(struct page *page, unsigned from, unsigned to,
         }
         return err;
  }
-EXPORT_SYMBOL(block_prepare_write);
+EXPORT_SYMBOL(__block_write_begin);
  
  static int __block_commit_write(struct inode *inode, struct page *page,
                 unsigned from, unsigned to)
@@ -1952,15 +1954,6 @@ static int __block_commit_write(struct inode *inode, struct page *page,
         return 0;
  }
  
-int __block_write_begin(struct page *page, loff_t pos, unsigned len,
-               get_block_t *get_block)
-{
-       unsigned start = pos & (PAGE_CACHE_SIZE - 1);
-
-       return block_prepare_write(page, start, start + len, get_block);
-}
-EXPORT_SYMBOL(__block_write_begin);
-
  /*
   * block_write_begin takes care of the basic task of block allocation and
   * bringing partial write blocks uptodate first.
@@ -2378,7 +2371,7 @@ block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
         else
                 end = PAGE_CACHE_SIZE;
  
-       ret = block_prepare_write(page, 0, end, get_block);
+       ret = __block_write_begin(page, 0, end, get_block);
         if (!ret)
                 ret = block_commit_write(page, 0, end);
  
@@ -2465,11 +2458,10 @@ int nobh_write_begin(struct address_space *mapping,
         *fsdata = NULL;
  
         if (page_has_buffers(page)) {
-               unlock_page(page);
-               page_cache_release(page);
-               *pagep = NULL;
-               return block_write_begin(mapping, pos, len, flags, pagep,
-                                        get_block);
+               ret = __block_write_begin(page, pos, len, get_block);
+               if (unlikely(ret))
+                       goto out_release;
+               return ret;
         }
  
         if (PageMappedToDisk(page))
diff --git a/fs/coda/dir.c b/fs/coda/dir.c

index 96fbeab77f2f42476683cd41ead808f519a14a1e..5d8b35539601b819389057b9114dd49a75828597 100644 (file)
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -276,7 +276,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
         }
  
         coda_dir_update_mtime(dir_inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         d_instantiate(de, inode);
         inc_nlink(inode);
         return 0;
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c

index cf78d44a8d6a8bcff1a546e3606058de97e90722..253476d78ed86f4d07ec04e3ef63578fccdfa223 100644 (file)
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -135,6 +135,7 @@ struct inode * configfs_new_inode(mode_t mode, struct configfs_dirent * sd)
  {
         struct inode * inode = new_inode(configfs_sb);
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mapping->a_ops = &configfs_aops;
                 inode->i_mapping->backing_dev_info = &configfs_backing_dev_info;
                 inode->i_op = &configfs_inode_operations;
diff --git a/fs/dcache.c b/fs/dcache.c

index 83293be4814965373d4c81e5b7d91bc63f3a55c6..23702a9d4e6d130b6ef226d1157f1a484f93a282 100644 (file)
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -67,33 +67,43 @@ struct dentry_stat_t dentry_stat = {
         .age_limit = 45,
  };
  
-static void __d_free(struct dentry *dentry)
+static struct percpu_counter nr_dentry __cacheline_aligned_in_smp;
+static struct percpu_counter nr_dentry_unused __cacheline_aligned_in_smp;
+
+#if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
+int proc_nr_dentry(ctl_table *table, int write, void __user *buffer,
+                  size_t *lenp, loff_t *ppos)
+{
+       dentry_stat.nr_dentry = percpu_counter_sum_positive(&nr_dentry);
+       dentry_stat.nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+       return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
+static void __d_free(struct rcu_head *head)
  {
+       struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
+
         WARN_ON(!list_empty(&dentry->d_alias));
         if (dname_external(dentry))
                 kfree(dentry->d_name.name);
         kmem_cache_free(dentry_cache, dentry); 
  }
  
-static void d_callback(struct rcu_head *head)
-{
-       struct dentry * dentry = container_of(head, struct dentry, d_u.d_rcu);
-       __d_free(dentry);
-}
-
  /*
- * no dcache_lock, please.  The caller must decrement dentry_stat.nr_dentry
- * inside dcache_lock.
+ * no dcache_lock, please.
   */
  static void d_free(struct dentry *dentry)
  {
+       percpu_counter_dec(&nr_dentry);
         if (dentry->d_op && dentry->d_op->d_release)
                 dentry->d_op->d_release(dentry);
+
         /* if dentry was never inserted into hash, immediate free is OK */
         if (hlist_unhashed(&dentry->d_hash))
-               __d_free(dentry);
+               __d_free(&dentry->d_u.d_rcu);
         else
-               call_rcu(&dentry->d_u.d_rcu, d_callback);
+               call_rcu(&dentry->d_u.d_rcu, __d_free);
  }
  
  /*
@@ -123,37 +133,34 @@ static void dentry_iput(struct dentry * dentry)
  }
  
  /*
- * dentry_lru_(add|add_tail|del|del_init) must be called with dcache_lock held.
+ * dentry_lru_(add|del|move_tail) must be called with dcache_lock held.
   */
  static void dentry_lru_add(struct dentry *dentry)
  {
-       list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
-       dentry->d_sb->s_nr_dentry_unused++;
-       dentry_stat.nr_unused++;
-}
-
-static void dentry_lru_add_tail(struct dentry *dentry)
-{
-       list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
-       dentry->d_sb->s_nr_dentry_unused++;
-       dentry_stat.nr_unused++;
+       if (list_empty(&dentry->d_lru)) {
+               list_add(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+               dentry->d_sb->s_nr_dentry_unused++;
+               percpu_counter_inc(&nr_dentry_unused);
+       }
  }
  
  static void dentry_lru_del(struct dentry *dentry)
  {
         if (!list_empty(&dentry->d_lru)) {
-               list_del(&dentry->d_lru);
+               list_del_init(&dentry->d_lru);
                 dentry->d_sb->s_nr_dentry_unused--;
-               dentry_stat.nr_unused--;
+               percpu_counter_dec(&nr_dentry_unused);
         }
  }
  
-static void dentry_lru_del_init(struct dentry *dentry)
+static void dentry_lru_move_tail(struct dentry *dentry)
  {
-       if (likely(!list_empty(&dentry->d_lru))) {
-               list_del_init(&dentry->d_lru);
-               dentry->d_sb->s_nr_dentry_unused--;
-               dentry_stat.nr_unused--;
+       if (list_empty(&dentry->d_lru)) {
+               list_add_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
+               dentry->d_sb->s_nr_dentry_unused++;
+               percpu_counter_inc(&nr_dentry_unused);
+       } else {
+               list_move_tail(&dentry->d_lru, &dentry->d_sb->s_dentry_lru);
         }
  }
  
@@ -172,7 +179,6 @@ static struct dentry *d_kill(struct dentry *dentry)
         struct dentry *parent;
  
         list_del(&dentry->d_u.d_child);
-       dentry_stat.nr_dentry--;        /* For d_free, below */
         /*drops the locks, at that point nobody can reach this dentry */
         dentry_iput(dentry);
         if (IS_ROOT(dentry))
@@ -237,13 +243,15 @@ repeat:
                 if (dentry->d_op->d_delete(dentry))
                         goto unhash_it;
         }
+
         /* Unreachable? Get rid of it */
         if (d_unhashed(dentry))
                 goto kill_it;
-       if (list_empty(&dentry->d_lru)) {
-               dentry->d_flags |= DCACHE_REFERENCED;
-               dentry_lru_add(dentry);
-       }
+
+       /* Otherwise leave it cached and ensure it's on the LRU */
+       dentry->d_flags |= DCACHE_REFERENCED;
+       dentry_lru_add(dentry);
+
         spin_unlock(&dentry->d_lock);
         spin_unlock(&dcache_lock);
         return;
@@ -318,11 +326,10 @@ int d_invalidate(struct dentry * dentry)
  EXPORT_SYMBOL(d_invalidate);
  
  /* This should be called _only_ with dcache_lock held */
-
  static inline struct dentry * __dget_locked(struct dentry *dentry)
  {
         atomic_inc(&dentry->d_count);
-       dentry_lru_del_init(dentry);
+       dentry_lru_del(dentry);
         return dentry;
  }
  
@@ -441,73 +448,27 @@ static void prune_one_dentry(struct dentry * dentry)
  
                 if (dentry->d_op && dentry->d_op->d_delete)
                         dentry->d_op->d_delete(dentry);
-               dentry_lru_del_init(dentry);
+               dentry_lru_del(dentry);
                 __d_drop(dentry);
                 dentry = d_kill(dentry);
                 spin_lock(&dcache_lock);
         }
  }
  
-/*
- * Shrink the dentry LRU on a given superblock.
- * @sb   : superblock to shrink dentry LRU.
- * @count: If count is NULL, we prune all dentries on superblock.
- * @flags: If flags is non-zero, we need to do special processing based on
- * which flags are set. This means we don't need to maintain multiple
- * similar copies of this loop.
- */
-static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+static void shrink_dentry_list(struct list_head *list)
  {
-       LIST_HEAD(referenced);
-       LIST_HEAD(tmp);
         struct dentry *dentry;
-       int cnt = 0;
  
-       BUG_ON(!sb);
-       BUG_ON((flags & DCACHE_REFERENCED) && count == NULL);
-       spin_lock(&dcache_lock);
-       if (count != NULL)
-               /* called from prune_dcache() and shrink_dcache_parent() */
-               cnt = *count;
-restart:
-       if (count == NULL)
-               list_splice_init(&sb->s_dentry_lru, &tmp);
-       else {
-               while (!list_empty(&sb->s_dentry_lru)) {
-                       dentry = list_entry(sb->s_dentry_lru.prev,
-                                       struct dentry, d_lru);
-                       BUG_ON(dentry->d_sb != sb);
+       while (!list_empty(list)) {
+               dentry = list_entry(list->prev, struct dentry, d_lru);
+               dentry_lru_del(dentry);
  
-                       spin_lock(&dentry->d_lock);
-                       /*
-                        * If we are honouring the DCACHE_REFERENCED flag and
-                        * the dentry has this flag set, don't free it. Clear
-                        * the flag and put it back on the LRU.
-                        */
-                       if ((flags & DCACHE_REFERENCED)
-                               && (dentry->d_flags & DCACHE_REFERENCED)) {
-                               dentry->d_flags &= ~DCACHE_REFERENCED;
-                               list_move(&dentry->d_lru, &referenced);
-                               spin_unlock(&dentry->d_lock);
-                       } else {
-                               list_move_tail(&dentry->d_lru, &tmp);
-                               spin_unlock(&dentry->d_lock);
-                               cnt--;
-                               if (!cnt)
-                                       break;
-                       }
-                       cond_resched_lock(&dcache_lock);
-               }
-       }
-       while (!list_empty(&tmp)) {
-               dentry = list_entry(tmp.prev, struct dentry, d_lru);
-               dentry_lru_del_init(dentry);
-               spin_lock(&dentry->d_lock);
                 /*
                  * We found an inuse dentry which was not removed from
                  * the LRU because of laziness during lookup.  Do not free
                  * it - just keep it off the LRU list.
                  */
+               spin_lock(&dentry->d_lock);
                 if (atomic_read(&dentry->d_count)) {
                         spin_unlock(&dentry->d_lock);
                         continue;
@@ -516,13 +477,60 @@ restart:
                 /* dentry->d_lock was dropped in prune_one_dentry() */
                 cond_resched_lock(&dcache_lock);
         }
-       if (count == NULL && !list_empty(&sb->s_dentry_lru))
-               goto restart;
-       if (count != NULL)
-               *count = cnt;
+}
+
+/**
+ * __shrink_dcache_sb - shrink the dentry LRU on a given superblock
+ * @sb:                superblock to shrink dentry LRU.
+ * @count:     number of entries to prune
+ * @flags:     flags to control the dentry processing
+ *
+ * If flags contains DCACHE_REFERENCED reference dentries will not be pruned.
+ */
+static void __shrink_dcache_sb(struct super_block *sb, int *count, int flags)
+{
+       /* called from prune_dcache() and shrink_dcache_parent() */
+       struct dentry *dentry;
+       LIST_HEAD(referenced);
+       LIST_HEAD(tmp);
+       int cnt = *count;
+
+       spin_lock(&dcache_lock);
+       while (!list_empty(&sb->s_dentry_lru)) {
+               dentry = list_entry(sb->s_dentry_lru.prev,
+                               struct dentry, d_lru);
+               BUG_ON(dentry->d_sb != sb);
+
+               /*
+                * If we are honouring the DCACHE_REFERENCED flag and the
+                * dentry has this flag set, don't free it.  Clear the flag
+                * and put it back on the LRU.
+                */
+               if (flags & DCACHE_REFERENCED) {
+                       spin_lock(&dentry->d_lock);
+                       if (dentry->d_flags & DCACHE_REFERENCED) {
+                               dentry->d_flags &= ~DCACHE_REFERENCED;
+                               list_move(&dentry->d_lru, &referenced);
+                               spin_unlock(&dentry->d_lock);
+                               cond_resched_lock(&dcache_lock);
+                               continue;
+                       }
+                       spin_unlock(&dentry->d_lock);
+               }
+
+               list_move_tail(&dentry->d_lru, &tmp);
+               if (!--cnt)
+                       break;
+               cond_resched_lock(&dcache_lock);
+       }
+
+       *count = cnt;
+       shrink_dentry_list(&tmp);
+
         if (!list_empty(&referenced))
                 list_splice(&referenced, &sb->s_dentry_lru);
         spin_unlock(&dcache_lock);
+
  }
  
  /**
@@ -538,7 +546,7 @@ static void prune_dcache(int count)
  {
         struct super_block *sb, *p = NULL;
         int w_count;
-       int unused = dentry_stat.nr_unused;
+       int unused = percpu_counter_sum_positive(&nr_dentry_unused);
         int prune_ratio;
         int pruned;
  
@@ -608,13 +616,19 @@ static void prune_dcache(int count)
   * shrink_dcache_sb - shrink dcache for a superblock
   * @sb: superblock
   *
- * Shrink the dcache for the specified super block. This
- * is used to free the dcache before unmounting a file
- * system
+ * Shrink the dcache for the specified super block. This is used to free
+ * the dcache before unmounting a file system.
   */
-void shrink_dcache_sb(struct super_block * sb)
+void shrink_dcache_sb(struct super_block *sb)
  {
-       __shrink_dcache_sb(sb, NULL, 0);
+       LIST_HEAD(tmp);
+
+       spin_lock(&dcache_lock);
+       while (!list_empty(&sb->s_dentry_lru)) {
+               list_splice_init(&sb->s_dentry_lru, &tmp);
+               shrink_dentry_list(&tmp);
+       }
+       spin_unlock(&dcache_lock);
  }
  EXPORT_SYMBOL(shrink_dcache_sb);
  
@@ -632,7 +646,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
  
         /* detach this root from the system */
         spin_lock(&dcache_lock);
-       dentry_lru_del_init(dentry);
+       dentry_lru_del(dentry);
         __d_drop(dentry);
         spin_unlock(&dcache_lock);
  
@@ -646,7 +660,7 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                         spin_lock(&dcache_lock);
                         list_for_each_entry(loop, &dentry->d_subdirs,
                                             d_u.d_child) {
-                               dentry_lru_del_init(loop);
+                               dentry_lru_del(loop);
                                 __d_drop(loop);
                                 cond_resched_lock(&dcache_lock);
                         }
@@ -703,20 +717,13 @@ static void shrink_dcache_for_umount_subtree(struct dentry *dentry)
                          * otherwise we ascend to the parent and move to the
                          * next sibling if there is one */
                         if (!parent)
-                               goto out;
-
+                               return;
                         dentry = parent;
-
                 } while (list_empty(&dentry->d_subdirs));
  
                 dentry = list_entry(dentry->d_subdirs.next,
                                     struct dentry, d_u.d_child);
         }
-out:
-       /* several dentries were freed, need to correct nr_dentry */
-       spin_lock(&dcache_lock);
-       dentry_stat.nr_dentry -= detached;
-       spin_unlock(&dcache_lock);
  }
  
  /*
@@ -830,14 +837,15 @@ resume:
                 struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
                 next = tmp->next;
  
-               dentry_lru_del_init(dentry);
                 /* 
                  * move only zero ref count dentries to the end 
                  * of the unused list for prune_dcache
                  */
                 if (!atomic_read(&dentry->d_count)) {
-                       dentry_lru_add_tail(dentry);
+                       dentry_lru_move_tail(dentry);
                         found++;
+               } else {
+                       dentry_lru_del(dentry);
                 }
  
                 /*
@@ -900,12 +908,16 @@ EXPORT_SYMBOL(shrink_dcache_parent);
   */
  static int shrink_dcache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
  {
+       int nr_unused;
+
         if (nr) {
                 if (!(gfp_mask & __GFP_FS))
                         return -1;
                 prune_dcache(nr);
         }
-       return (dentry_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+
+       nr_unused = percpu_counter_sum_positive(&nr_dentry_unused);
+       return (nr_unused / 100) * sysctl_vfs_cache_pressure;
  }
  
  static struct shrinker dcache_shrinker = {
@@ -972,9 +984,10 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
         spin_lock(&dcache_lock);
         if (parent)
                 list_add(&dentry->d_u.d_child, &parent->d_subdirs);
-       dentry_stat.nr_dentry++;
         spin_unlock(&dcache_lock);
  
+       percpu_counter_inc(&nr_dentry);
+
         return dentry;
  }
  EXPORT_SYMBOL(d_alloc);
@@ -1478,33 +1491,26 @@ out:
   * This is used by ncpfs in its readdir implementation.
   * Zero is returned in the dentry is invalid.
   */
- 
-int d_validate(struct dentry *dentry, struct dentry *dparent)
+int d_validate(struct dentry *dentry, struct dentry *parent)
  {
-       struct hlist_head *base;
-       struct hlist_node *lhp;
+       struct hlist_head *head = d_hash(parent, dentry->d_name.hash);
+       struct hlist_node *node;
+       struct dentry *d;
  
         /* Check whether the ptr might be valid at all.. */
         if (!kmem_ptr_validate(dentry_cache, dentry))
-               goto out;
-
-       if (dentry->d_parent != dparent)
-               goto out;
+               return 0;
+       if (dentry->d_parent != parent)
+               return 0;
  
-       spin_lock(&dcache_lock);
-       base = d_hash(dparent, dentry->d_name.hash);
-       hlist_for_each(lhp,base) { 
-               /* hlist_for_each_entry_rcu() not required for d_hash list
-                * as it is parsed under dcache_lock
-                */
-               if (dentry == hlist_entry(lhp, struct dentry, d_hash)) {
-                       __dget_locked(dentry);
-                       spin_unlock(&dcache_lock);
+       rcu_read_lock();
+       hlist_for_each_entry_rcu(d, node, head, d_hash) {
+               if (d == dentry) {
+                       dget(dentry);
                         return 1;
                 }
         }
-       spin_unlock(&dcache_lock);
-out:
+       rcu_read_unlock();
         return 0;
  }
  EXPORT_SYMBOL(d_validate);
@@ -1994,7 +2000,7 @@ global_root:
   * Returns a pointer into the buffer or an error code if the
   * path was too long.
   *
- * "buflen" should be positive. Caller holds the dcache_lock.
+ * "buflen" should be positive.
   *
   * If path is not reachable from the supplied root, then the value of
   * root is changed (without modifying refcounts).
@@ -2006,10 +2012,12 @@ char *__d_path(const struct path *path, struct path *root,
         int error;
  
         prepend(&res, &buflen, "\0", 1);
+       spin_lock(&dcache_lock);
         error = prepend_path(path, root, &res, &buflen);
+       spin_unlock(&dcache_lock);
+
         if (error)
                 return ERR_PTR(error);
-
         return res;
  }
  
@@ -2419,6 +2427,9 @@ static void __init dcache_init(void)
  {
         int loop;
  
+       percpu_counter_init(&nr_dentry, 0);
+       percpu_counter_init(&nr_dentry_unused, 0);
+
         /* 
          * A constructor could be added for stable state like the lists,
          * but it is probably not worth it because of the cache nature
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c

index 30a87b3dbcac1286de7fa400f6855b2937796d5b..a4ed8380e98a630197edf4c556b8f4067a1c872f 100644 (file)
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -40,6 +40,7 @@ static struct inode *debugfs_get_inode(struct super_block *sb, int mode, dev_t d
         struct inode *inode = new_inode(sb);
  
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                 switch (mode & S_IFMT) {
diff --git a/fs/exofs/file.c b/fs/exofs/file.c

index 68cb23e3bb9828e88c2647a7f028db6d2f94bd78..b905c79b4f0afc88b7a1997009960f09f50f595f 100644 (file)
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -46,10 +46,6 @@ static int exofs_file_fsync(struct file *filp, int datasync)
  {
         int ret;
         struct inode *inode = filp->f_mapping->host;
-       struct writeback_control wbc = {
-               .sync_mode = WB_SYNC_ALL,
-               .nr_to_write = 0, /* metadata-only; caller takes care of data */
-       };
         struct super_block *sb;
  
         if (!(inode->i_state & I_DIRTY))
@@ -57,7 +53,7 @@ static int exofs_file_fsync(struct file *filp, int datasync)
         if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
                 return 0;
  
-       ret = sync_inode(inode, &wbc);
+       ret = sync_inode_metadata(inode, 1);
  
         /* This is a good place to write the sb */
         /* TODO: Sechedule an sb-sync on create */
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c

index b7dd0c23686376822ffd40c185bc9b1683796d31..264e95d02830f28d992eefb717460ccd0be09d4d 100644 (file)
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -153,7 +153,7 @@ static int exofs_link(struct dentry *old_dentry, struct inode *dir,
  
         inode->i_ctime = CURRENT_TIME;
         inode_inc_link_count(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
  
         return exofs_add_nondir(dentry, inode);
  }
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c

index e9e175949a637822a8689e757362673284af0d29..51b304056f10fcd47c18593a9a12bf56e8633063 100644 (file)
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -74,21 +74,20 @@ static struct dentry *
  find_disconnected_root(struct dentry *dentry)
  {
         dget(dentry);
-       spin_lock(&dentry->d_lock);
-       while (!IS_ROOT(dentry) &&
-              (dentry->d_parent->d_flags & DCACHE_DISCONNECTED)) {
-               struct dentry *parent = dentry->d_parent;
-               dget(parent);
-               spin_unlock(&dentry->d_lock);
+       while (!IS_ROOT(dentry)) {
+               struct dentry *parent = dget_parent(dentry);
+
+               if (!(parent->d_flags & DCACHE_DISCONNECTED)) {
+                       dput(parent);
+                       break;
+               }
+
                 dput(dentry);
                 dentry = parent;
-               spin_lock(&dentry->d_lock);
         }
-       spin_unlock(&dentry->d_lock);
         return dentry;
  }
  
-
  /*
   * Make sure target_dir is fully connected to the dentry tree.
   *
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c

index 764109886ec00f8f8bfafed187e88d7ebed63202..2709b34206abdbc442377c2d02ed0d17e5959ec9 100644 (file)
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -98,7 +98,7 @@ static int ext2_commit_chunk(struct page *page, loff_t pos, unsigned len)
         if (IS_DIRSYNC(dir)) {
                 err = write_one_page(page, 1);
                 if (!err)
-                       err = ext2_sync_inode(dir);
+                       err = sync_inode_metadata(dir, 1);
         } else {
                 unlock_page(page);
         }
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h

index 416daa62242c5410e99ea455dca1854ddfbc0353..6346a2acf326075b2e23739a2ba9b55ce6d56c33 100644 (file)
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -120,7 +120,6 @@ extern unsigned long ext2_count_free (struct buffer_head *, unsigned);
  extern struct inode *ext2_iget (struct super_block *, unsigned long);
  extern int ext2_write_inode (struct inode *, struct writeback_control *);
  extern void ext2_evict_inode(struct inode *);
-extern int ext2_sync_inode (struct inode *);
  extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
  extern int ext2_setattr (struct dentry *, struct iattr *);
  extern void ext2_set_inode_flags(struct inode *inode);
diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c

index 533699c1604060417e2fb3cdc42d61f5c9b52bdc..40ad210a5049a6eed9b184b3c30750ce6951fa46 100644 (file)
--- a/fs/ext2/inode.c
+++ b/fs/ext2/inode.c
@@ -1203,7 +1203,7 @@ static int ext2_setsize(struct inode *inode, loff_t newsize)
         inode->i_mtime = inode->i_ctime = CURRENT_TIME_SEC;
         if (inode_needs_sync(inode)) {
                 sync_mapping_buffers(inode->i_mapping);
-               ext2_sync_inode (inode);
+               sync_inode_metadata(inode, 1);
         } else {
                 mark_inode_dirty(inode);
         }
@@ -1523,15 +1523,6 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
         return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
  }
  
-int ext2_sync_inode(struct inode *inode)
-{
-       struct writeback_control wbc = {
-               .sync_mode = WB_SYNC_ALL,
-               .nr_to_write = 0,       /* sys_fsync did this */
-       };
-       return sync_inode(inode, &wbc);
-}
-
  int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
  {
         struct inode *inode = dentry->d_inode;
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c

index 71efb0e9a3f2ababa1ae74033e5caa73332fba61..f8aecd2e32976c40b1af182a13a312ae0b8e23fe 100644 (file)
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -206,7 +206,7 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
  
         inode->i_ctime = CURRENT_TIME_SEC;
         inode_inc_link_count(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
  
         err = ext2_add_link(dentry, inode);
         if (!err) {
diff --git a/fs/ext2/super.c b/fs/ext2/super.c

index 85df87d0f7b70a01dde7b39826f6d9abbeb08799..0901320671da20631092a1cc57113e9a0ef46d8d 100644 (file)
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1221,9 +1221,7 @@ static int ext2_remount (struct super_block * sb, int * flags, char * data)
         }
  
         es = sbi->s_es;
-       if (((sbi->s_mount_opt & EXT2_MOUNT_XIP) !=
-           (old_mount_opt & EXT2_MOUNT_XIP)) &&
-           invalidate_inodes(sb)) {
+       if ((sbi->s_mount_opt ^ old_mount_opt) & EXT2_MOUNT_XIP) {
                 ext2_msg(sb, KERN_WARNING, "warning: refusing change of "
                          "xip flag with busy inodes while remounting");
                 sbi->s_mount_opt &= ~EXT2_MOUNT_XIP;
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c

index 8c29ae15129ed06bacb04727d4e2d6da17700960..f84700be3274292808c9cfa2b9517274d9b17fc1 100644 (file)
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -699,7 +699,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
         EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
         inode->i_ctime = CURRENT_TIME_SEC;
         if (IS_SYNC(inode)) {
-               error = ext2_sync_inode (inode);
+               error = sync_inode_metadata(inode, 1);
                 /* In case sync failed due to ENOSPC the inode was actually
                  * written (only some dirty data were not) so we just proceed
                  * as if nothing happened and cleanup the unused block */
diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c

index 5e0faf4cda797800713a335f3c6d3d5d64fcd2c2..ad05353040a12338c1a0c84626abcfc20f96408f 100644 (file)
--- a/fs/ext3/inode.c
+++ b/fs/ext3/inode.c
@@ -1696,8 +1696,8 @@ static int ext3_journalled_writepage(struct page *page,
                  * doesn't seem much point in redirtying the page here.
                  */
                 ClearPageChecked(page);
-               ret = block_prepare_write(page, 0, PAGE_CACHE_SIZE,
-                                       ext3_get_block);
+               ret = __block_write_begin(page, 0, PAGE_CACHE_SIZE,
+                                         ext3_get_block);
                 if (ret != 0) {
                         ext3_journal_stop(handle);
                         goto out_unlock;
diff --git a/fs/ext3/namei.c b/fs/ext3/namei.c

index 2b35ddb70d65535a53f98268310ab377ea94e1f2..bce9dce639b874989fc687173531728842557c24 100644 (file)
--- a/fs/ext3/namei.c
+++ b/fs/ext3/namei.c
@@ -2260,7 +2260,7 @@ retry:
  
         inode->i_ctime = CURRENT_TIME_SEC;
         inc_nlink(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
  
         err = ext3_add_entry(handle, dentry, inode);
         if (!err) {
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c

index 4b8debeb39652fa0e10bcf36d397d6125c750d1a..49635ef236f84d40b0090a78a1a96860fae1fb6b 100644 (file)
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -1538,10 +1538,10 @@ static int do_journal_get_write_access(handle_t *handle,
         if (!buffer_mapped(bh) || buffer_freed(bh))
                 return 0;
         /*
-        * __block_prepare_write() could have dirtied some buffers. Clean
+        * __block_write_begin() could have dirtied some buffers. Clean
          * the dirty bit as jbd2_journal_get_write_access() could complain
          * otherwise about fs integrity issues. Setting of the dirty bit
-        * by __block_prepare_write() isn't a real problem here as we clear
+        * by __block_write_begin() isn't a real problem here as we clear
          * the bit before releasing a page lock and thus writeback cannot
          * ever write the buffer.
          */
@@ -2550,8 +2550,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
                 if (buffer_delay(bh))
                         return 0; /* Not sure this could or should happen */
                 /*
-                * XXX: __block_prepare_write() unmaps passed block,
-                * is it OK?
+                * XXX: __block_write_begin() unmaps passed block, is it OK?
                  */
                 ret = ext4_da_reserve_space(inode, iblock);
                 if (ret)
@@ -2583,7 +2582,7 @@ static int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
  /*
   * This function is used as a standard get_block_t calback function
   * when there is no desire to allocate any blocks.  It is used as a
- * callback function for block_prepare_write() and block_write_full_page().
+ * callback function for block_write_begin() and block_write_full_page().
   * These functions should only try to map a single block at a time.
   *
   * Since this function doesn't do block allocations even if the caller
@@ -2743,7 +2742,7 @@ static int ext4_writepage(struct page *page,
                  * all are mapped and non delay. We don't want to
                  * do block allocation here.
                  */
-               ret = block_prepare_write(page, 0, len,
+               ret = __block_write_begin(page, 0, len,
                                           noalloc_get_block_write);
                 if (!ret) {
                         page_bufs = page_buffers(page);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c

index 19aa0d44d82283680f462f1cc2b76fe2fe220618..42f77b1dc72d810f48deb583baeb0f984c6fbc61 100644 (file)
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2373,6 +2373,7 @@ static int ext4_mb_init_backend(struct super_block *sb)
                 printk(KERN_ERR "EXT4-fs: can't get new inode\n");
                 goto err_freesgi;
         }
+       sbi->s_buddy_cache->i_ino = get_next_ino();
         EXT4_I(sbi->s_buddy_cache)->i_disksize = 0;
         for (i = 0; i < ngroups; i++) {
                 desc = ext4_get_group_desc(sb, i, NULL);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c

index 314c0d3b3fa9aece3015a113b6d348b7bc2be4aa..bd39885b599854329922fa443a016e405aeac1f9 100644 (file)
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2312,7 +2312,7 @@ retry:
  
         inode->i_ctime = ext4_current_time(inode);
         ext4_inc_count(handle, inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
  
         err = ext4_add_entry(handle, dentry, inode);
         if (!err) {
diff --git a/fs/freevxfs/vxfs_inode.c b/fs/freevxfs/vxfs_inode.c

index 79d1b4ea13e79bc6531761ed817f0011653492bd..8c04eac5079d34d5ecdec820e23990294cc1d2ba 100644 (file)
--- a/fs/freevxfs/vxfs_inode.c
+++ b/fs/freevxfs/vxfs_inode.c
@@ -260,6 +260,7 @@ vxfs_get_fake_inode(struct super_block *sbp, struct vxfs_inode_info *vip)
         struct inode                    *ip = NULL;
  
         if ((ip = new_inode(sbp))) {
+               ip->i_ino = get_next_ino();
                 vxfs_iinit(ip, vip);
                 ip->i_mapping->a_ops = &vxfs_aops;
         }
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c

index 9e46aec10d1aaaec3a4f0fe968ebc99412f09713..aed881a76b229602ece291fb8c55fbd5ec45ee90 100644 (file)
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -79,6 +79,11 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode)
         return sb->s_bdi;
  }
  
+static inline struct inode *wb_inode(struct list_head *head)
+{
+       return list_entry(head, struct inode, i_wb_list);
+}
+
  static void bdi_queue_work(struct backing_dev_info *bdi,
                 struct wb_writeback_work *work)
  {
@@ -172,11 +177,11 @@ static void redirty_tail(struct inode *inode)
         if (!list_empty(&wb->b_dirty)) {
                 struct inode *tail;
  
-               tail = list_entry(wb->b_dirty.next, struct inode, i_list);
+               tail = wb_inode(wb->b_dirty.next);
                 if (time_before(inode->dirtied_when, tail->dirtied_when))
                         inode->dirtied_when = jiffies;
         }
-       list_move(&inode->i_list, &wb->b_dirty);
+       list_move(&inode->i_wb_list, &wb->b_dirty);
  }
  
  /*
@@ -186,7 +191,7 @@ static void requeue_io(struct inode *inode)
  {
         struct bdi_writeback *wb = &inode_to_bdi(inode)->wb;
  
-       list_move(&inode->i_list, &wb->b_more_io);
+       list_move(&inode->i_wb_list, &wb->b_more_io);
  }
  
  static void inode_sync_complete(struct inode *inode)
@@ -227,14 +232,14 @@ static void move_expired_inodes(struct list_head *delaying_queue,
         int do_sb_sort = 0;
  
         while (!list_empty(delaying_queue)) {
-               inode = list_entry(delaying_queue->prev, struct inode, i_list);
+               inode = wb_inode(delaying_queue->prev);
                 if (older_than_this &&
                     inode_dirtied_after(inode, *older_than_this))
                         break;
                 if (sb && sb != inode->i_sb)
                         do_sb_sort = 1;
                 sb = inode->i_sb;
-               list_move(&inode->i_list, &tmp);
+               list_move(&inode->i_wb_list, &tmp);
         }
  
         /* just one sb in list, splice to dispatch_queue and we're done */
@@ -245,12 +250,11 @@ static void move_expired_inodes(struct list_head *delaying_queue,
  
         /* Move inodes from one superblock together */
         while (!list_empty(&tmp)) {
-               inode = list_entry(tmp.prev, struct inode, i_list);
-               sb = inode->i_sb;
+               sb = wb_inode(tmp.prev)->i_sb;
                 list_for_each_prev_safe(pos, node, &tmp) {
-                       inode = list_entry(pos, struct inode, i_list);
+                       inode = wb_inode(pos);
                         if (inode->i_sb == sb)
-                               list_move(&inode->i_list, dispatch_queue);
+                               list_move(&inode->i_wb_list, dispatch_queue);
                 }
         }
  }
@@ -408,16 +412,13 @@ writeback_single_inode(struct inode *inode, struct writeback_control *wbc)
                          * completion.
                          */
                         redirty_tail(inode);
-               } else if (atomic_read(&inode->i_count)) {
-                       /*
-                        * The inode is clean, inuse
-                        */
-                       list_move(&inode->i_list, &inode_in_use);
                 } else {
                         /*
-                        * The inode is clean, unused
+                        * The inode is clean.  At this point we either have
+                        * a reference to the inode or it's on it's way out.
+                        * No need to add it back to the LRU.
                          */
-                       list_move(&inode->i_list, &inode_unused);
+                       list_del_init(&inode->i_wb_list);
                 }
         }
         inode_sync_complete(inode);
@@ -465,8 +466,7 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
  {
         while (!list_empty(&wb->b_io)) {
                 long pages_skipped;
-               struct inode *inode = list_entry(wb->b_io.prev,
-                                                struct inode, i_list);
+               struct inode *inode = wb_inode(wb->b_io.prev);
  
                 if (inode->i_sb != sb) {
                         if (only_this_sb) {
@@ -487,10 +487,16 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
                         return 0;
                 }
  
-               if (inode->i_state & (I_NEW | I_WILL_FREE)) {
+               /*
+                * Don't bother with new inodes or inodes beeing freed, first
+                * kind does not need peridic writeout yet, and for the latter
+                * kind writeout is handled by the freer.
+                */
+               if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
                         requeue_io(inode);
                         continue;
                 }
+
                 /*
                  * Was this inode dirtied after sync_sb_inodes was called?
                  * This keeps sync from extra jobs and livelock.
@@ -498,7 +504,6 @@ static int writeback_sb_inodes(struct super_block *sb, struct bdi_writeback *wb,
                 if (inode_dirtied_after(inode, wbc->wb_start))
                         return 1;
  
-               BUG_ON(inode->i_state & I_FREEING);
                 __iget(inode);
                 pages_skipped = wbc->pages_skipped;
                 writeback_single_inode(inode, wbc);
@@ -536,8 +541,7 @@ void writeback_inodes_wb(struct bdi_writeback *wb,
                 queue_io(wb, wbc->older_than_this);
  
         while (!list_empty(&wb->b_io)) {
-               struct inode *inode = list_entry(wb->b_io.prev,
-                                                struct inode, i_list);
+               struct inode *inode = wb_inode(wb->b_io.prev);
                 struct super_block *sb = inode->i_sb;
  
                 if (!pin_sb_for_writeback(sb)) {
@@ -675,8 +679,7 @@ static long wb_writeback(struct bdi_writeback *wb,
                  */
                 spin_lock(&inode_lock);
                 if (!list_empty(&wb->b_more_io))  {
-                       inode = list_entry(wb->b_more_io.prev,
-                                               struct inode, i_list);
+                       inode = wb_inode(wb->b_more_io.prev);
                         trace_wbc_writeback_wait(&wbc, wb->bdi);
                         inode_wait_for_writeback(inode);
                 }
@@ -727,7 +730,7 @@ static long wb_check_old_data_flush(struct bdi_writeback *wb)
          */
         nr_pages = global_page_state(NR_FILE_DIRTY) +
                         global_page_state(NR_UNSTABLE_NFS) +
-                       (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+                       get_nr_dirty_inodes();
  
         if (nr_pages) {
                 struct wb_writeback_work work = {
@@ -966,7 +969,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                  * dirty list.  Add blockdev inodes as well.
                  */
                 if (!S_ISBLK(inode->i_mode)) {
-                       if (hlist_unhashed(&inode->i_hash))
+                       if (inode_unhashed(inode))
                                 goto out;
                 }
                 if (inode->i_state & I_FREEING)
@@ -994,7 +997,7 @@ void __mark_inode_dirty(struct inode *inode, int flags)
                         }
  
                         inode->dirtied_when = jiffies;
-                       list_move(&inode->i_list, &bdi->wb.b_dirty);
+                       list_move(&inode->i_wb_list, &bdi->wb.b_dirty);
                 }
         }
  out:
@@ -1094,8 +1097,7 @@ void writeback_inodes_sb(struct super_block *sb)
  
         WARN_ON(!rwsem_is_locked(&sb->s_umount));
  
-       work.nr_pages = nr_dirty + nr_unstable +
-                       (inodes_stat.nr_inodes - inodes_stat.nr_unused);
+       work.nr_pages = nr_dirty + nr_unstable + get_nr_dirty_inodes();
  
         bdi_queue_work(sb->s_bdi, &work);
         wait_for_completion(&done);
@@ -1202,3 +1204,23 @@ int sync_inode(struct inode *inode, struct writeback_control *wbc)
         return ret;
  }
  EXPORT_SYMBOL(sync_inode);
+
+/**
+ * sync_inode - write an inode to disk
+ * @inode: the inode to sync
+ * @wait: wait for I/O to complete.
+ *
+ * Write an inode to disk and adjust it's dirty state after completion.
+ *
+ * Note: only writes the actual inode, no associated data or other metadata.
+ */
+int sync_inode_metadata(struct inode *inode, int wait)
+{
+       struct writeback_control wbc = {
+               .sync_mode = wait ? WB_SYNC_ALL : WB_SYNC_NONE,
+               .nr_to_write = 0, /* metadata-only */
+       };
+
+       return sync_inode(inode, &wbc);
+}
+EXPORT_SYMBOL(sync_inode_metadata);
diff --git a/fs/fuse/control.c b/fs/fuse/control.c

index 7367e177186f4b0efb96d1281134860058050e6d..4eba07661e5c562b50462d8feee6982e39fe2c4b 100644 (file)
--- a/fs/fuse/control.c
+++ b/fs/fuse/control.c
@@ -222,6 +222,7 @@ static struct dentry *fuse_ctl_add_dentry(struct dentry *parent,
         if (!inode)
                 return NULL;
  
+       inode->i_ino = get_next_ino();
         inode->i_mode = mode;
         inode->i_uid = fc->user_id;
         inode->i_gid = fc->group_id;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c

index 6b24afb96aaedade304b48bb427e664eae8e6e53..4f36f8832b9b1b167321e119260da82d89b4cdf0 100644 (file)
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -618,7 +618,6 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
         struct gfs2_alloc *al = NULL;
         pgoff_t index = pos >> PAGE_CACHE_SHIFT;
         unsigned from = pos & (PAGE_CACHE_SIZE - 1);
-       unsigned to = from + len;
         struct page *page;
  
         gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -691,7 +690,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
         }
  
  prepare_write:
-       error = block_prepare_write(page, from, to, gfs2_block_map);
+       error = __block_write_begin(page, from, len, gfs2_block_map);
  out:
         if (error == 0)
                 return 0;
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c

index aeafc233dc897fdb102df29bf052fc040b61faef..cade1acbcea9420dc7c69622888d820af24eea99 100644 (file)
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -1219,7 +1219,6 @@ fail_sb:
  fail_locking:
         init_locking(sdp, &mount_gh, UNDO);
  fail_lm:
-       invalidate_inodes(sb);
         gfs2_gl_hash_clear(sdp);
         gfs2_lm_unmount(sdp);
  fail_sys:
diff --git a/fs/gfs2/ops_inode.c b/fs/gfs2/ops_inode.c

index 0534510200d5961887d3ee957d799e3b4669e76b..12cbea7502c26040fb90db5750e764bdd831079a 100644 (file)
--- a/fs/gfs2/ops_inode.c
+++ b/fs/gfs2/ops_inode.c
@@ -255,7 +255,7 @@ out_parent:
         gfs2_holder_uninit(ghs);
         gfs2_holder_uninit(ghs + 1);
         if (!error) {
-               atomic_inc(&inode->i_count);
+               ihold(inode);
                 d_instantiate(dentry, inode);
                 mark_inode_dirty(inode);
         }
@@ -1294,7 +1294,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
         int error;
  
         if (!page_has_buffers(page)) {
-               error = block_prepare_write(page, from, to, gfs2_block_map);
+               error = __block_write_begin(page, from, to - from, gfs2_block_map);
                 if (unlikely(error))
                         return error;
  
@@ -1313,7 +1313,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
                 next += bh->b_size;
                 if (buffer_mapped(bh)) {
                         if (end) {
-                               error = block_prepare_write(page, start, end,
+                               error = __block_write_begin(page, start, end - start,
                                                             gfs2_block_map);
                                 if (unlikely(error))
                                         return error;
@@ -1328,7 +1328,7 @@ static int write_empty_blocks(struct page *page, unsigned from, unsigned to)
         } while (next < to);
  
         if (end) {
-               error = block_prepare_write(page, start, end, gfs2_block_map);
+               error = __block_write_begin(page, start, end - start, gfs2_block_map);
                 if (unlikely(error))
                         return error;
                 empty_write_end(page, start, end);
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c

index 047d1176096c79d6f0ce50227b8c098755fb0150..2b2c4997430b708113af14a08d4c58449b97ccbe 100644 (file)
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -857,7 +857,6 @@ restart:
         gfs2_clear_rgrpd(sdp);
         gfs2_jindex_free(sdp);
         /*  Take apart glock structures and buffer lists  */
-       invalidate_inodes(sdp->sd_vfs);
         gfs2_gl_hash_clear(sdp);
         /*  Unmount the locking protocol  */
         gfs2_lm_unmount(sdp);
diff --git a/fs/hfs/hfs_fs.h b/fs/hfs/hfs_fs.h

index 4f55651aaa51e888e6c6dbcb19b78384e20caa61..c8cffb81e849e68e8e85d6e4c19666e6263e3f25 100644 (file)
--- a/fs/hfs/hfs_fs.h
+++ b/fs/hfs/hfs_fs.h
@@ -147,8 +147,6 @@ struct hfs_sb_info {
         u16 blockoffset;
  
         int fs_div;
-
-       struct hlist_head rsrc_inodes;
  };
  
  #define HFS_FLG_BITMAP_DIRTY   0
@@ -254,17 +252,6 @@ static inline void hfs_bitmap_dirty(struct super_block *sb)
         sb->s_dirt = 1;
  }
  
-static inline void hfs_buffer_sync(struct buffer_head *bh)
-{
-       while (buffer_locked(bh)) {
-               wait_on_buffer(bh);
-       }
-       if (buffer_dirty(bh)) {
-               ll_rw_block(WRITE, 1, &bh);
-               wait_on_buffer(bh);
-       }
-}
-
  #define sb_bread512(sb, sec, data) ({                  \
         struct buffer_head *__bh;                       \
         sector_t __block;                               \
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c

index 397b7adc7ce668dd2880fa73b4f5ee6cf1b050a2..dffb4e996643557f04ae1c83292bd729ef5adada 100644 (file)
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -524,7 +524,7 @@ static struct dentry *hfs_file_lookup(struct inode *dir, struct dentry *dentry,
         HFS_I(inode)->rsrc_inode = dir;
         HFS_I(dir)->rsrc_inode = inode;
         igrab(dir);
-       hlist_add_head(&inode->i_hash, &HFS_SB(dir->i_sb)->rsrc_inodes);
+       hlist_add_fake(&inode->i_hash);
         mark_inode_dirty(inode);
  out:
         d_add(dentry, inode);
diff --git a/fs/hfs/mdb.c b/fs/hfs/mdb.c

index 86428f5ac991c59dd46a088b25cffe12b5a13dee..1563d5ce57643e23ac08e3a7db02e0b336603c60 100644 (file)
--- a/fs/hfs/mdb.c
+++ b/fs/hfs/mdb.c
@@ -220,7 +220,7 @@ int hfs_mdb_get(struct super_block *sb)
                 mdb->drLsMod = hfs_mtime();
  
                 mark_buffer_dirty(HFS_SB(sb)->mdb_bh);
-               hfs_buffer_sync(HFS_SB(sb)->mdb_bh);
+               sync_dirty_buffer(HFS_SB(sb)->mdb_bh);
         }
  
         return 0;
@@ -287,7 +287,7 @@ void hfs_mdb_commit(struct super_block *sb)
                 HFS_SB(sb)->alt_mdb->drAtrb |= cpu_to_be16(HFS_SB_ATTRIB_UNMNT);
                 HFS_SB(sb)->alt_mdb->drAtrb &= cpu_to_be16(~HFS_SB_ATTRIB_INCNSTNT);
                 mark_buffer_dirty(HFS_SB(sb)->alt_mdb_bh);
-               hfs_buffer_sync(HFS_SB(sb)->alt_mdb_bh);
+               sync_dirty_buffer(HFS_SB(sb)->alt_mdb_bh);
         }
  
         if (test_and_clear_bit(HFS_FLG_BITMAP_DIRTY, &HFS_SB(sb)->flags)) {
diff --git a/fs/hfs/super.c b/fs/hfs/super.c

index 33254160f650e22da90ff1b0b153df229c81f0df..6ee1586f2334f0c05be1dd8a5305dc96be12674a 100644 (file)
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -382,7 +382,6 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
                 return -ENOMEM;
  
         sb->s_fs_info = sbi;
-       INIT_HLIST_HEAD(&sbi->rsrc_inodes);
  
         res = -EINVAL;
         if (!parse_options((char *)data, sbi)) {
diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c

index d236d85ec9d73f703384ecaa9d6522fe7433c775..e318bbc0daf6eb9ac4cd4dac18213cc7e7e89ea8 100644 (file)
--- a/fs/hfsplus/dir.c
+++ b/fs/hfsplus/dir.c
@@ -286,7 +286,7 @@ static int hfsplus_link(struct dentry *src_dentry, struct inode *dst_dir,
  
         inc_nlink(inode);
         hfsplus_instantiate(dst_dentry, inode, cnid);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         inode->i_ctime = CURRENT_TIME_SEC;
         mark_inode_dirty(inode);
         sbi->file_count++;
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c

index 78449280dae08471958a328afca7f225fe9d744f..8afd7e84f98d5d0417b8c256d60f0ff0329521db 100644 (file)
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -211,7 +211,7 @@ static struct dentry *hfsplus_file_lookup(struct inode *dir, struct dentry *dent
          * appear hashed, but do not put on any lists.  hlist_del()
          * will work fine and require no locking.
          */
-       inode->i_hash.pprev = &inode->i_hash.next;
+       hlist_add_fake(&inode->i_hash);
  
         mark_inode_dirty(inode);
  out:
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c

index a14328d270e855a4d16d3772b9ad464bab296bdc..b14be3f781c714129d4fafbd17a7ff9874193b3f 100644 (file)
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -456,6 +456,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb, uid_t uid,
         inode = new_inode(sb);
         if (inode) {
                 struct hugetlbfs_inode_info *info;
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_uid = uid;
                 inode->i_gid = gid;
diff --git a/fs/inode.c b/fs/inode.c

index 56d909d69bc88cda72bb70a528fbf0c70e877d0d..ae2727ab0c3ab7695b14f256da0bccdfd3668d81 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -29,7 +29,6 @@
  /*
   * This is needed for the following functions:
   *  - inode_has_buffers
- *  - invalidate_inode_buffers
   *  - invalidate_bdev
   *
   * FIXME: remove all knowledge of the buffer layer from this file
@@ -73,8 +72,7 @@ static unsigned int i_hash_shift __read_mostly;
   * allowing for low-overhead inode sync() operations.
   */
  
-LIST_HEAD(inode_in_use);
-LIST_HEAD(inode_unused);
+static LIST_HEAD(inode_lru);
  static struct hlist_head *inode_hashtable __read_mostly;
  
  /*
@@ -104,8 +102,41 @@ static DECLARE_RWSEM(iprune_sem);
   */
  struct inodes_stat_t inodes_stat;
  
+static struct percpu_counter nr_inodes __cacheline_aligned_in_smp;
+static struct percpu_counter nr_inodes_unused __cacheline_aligned_in_smp;
+
  static struct kmem_cache *inode_cachep __read_mostly;
  
+static inline int get_nr_inodes(void)
+{
+       return percpu_counter_sum_positive(&nr_inodes);
+}
+
+static inline int get_nr_inodes_unused(void)
+{
+       return percpu_counter_sum_positive(&nr_inodes_unused);
+}
+
+int get_nr_dirty_inodes(void)
+{
+       int nr_dirty = get_nr_inodes() - get_nr_inodes_unused();
+       return nr_dirty > 0 ? nr_dirty : 0;
+
+}
+
+/*
+ * Handle nr_inode sysctl
+ */
+#ifdef CONFIG_SYSCTL
+int proc_nr_inodes(ctl_table *table, int write,
+                  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+       inodes_stat.nr_inodes = get_nr_inodes();
+       inodes_stat.nr_unused = get_nr_inodes_unused();
+       return proc_dointvec(table, write, buffer, lenp, ppos);
+}
+#endif
+
  static void wake_up_inode(struct inode *inode)
  {
         /*
@@ -193,6 +224,8 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
         inode->i_fsnotify_mask = 0;
  #endif
  
+       percpu_counter_inc(&nr_inodes);
+
         return 0;
  out:
         return -ENOMEM;
@@ -233,11 +266,13 @@ void __destroy_inode(struct inode *inode)
         if (inode->i_default_acl && inode->i_default_acl != ACL_NOT_CACHED)
                 posix_acl_release(inode->i_default_acl);
  #endif
+       percpu_counter_dec(&nr_inodes);
  }
  EXPORT_SYMBOL(__destroy_inode);
  
-void destroy_inode(struct inode *inode)
+static void destroy_inode(struct inode *inode)
  {
+       BUG_ON(!list_empty(&inode->i_lru));
         __destroy_inode(inode);
         if (inode->i_sb->s_op->destroy_inode)
                 inode->i_sb->s_op->destroy_inode(inode);
@@ -256,6 +291,8 @@ void inode_init_once(struct inode *inode)
         INIT_HLIST_NODE(&inode->i_hash);
         INIT_LIST_HEAD(&inode->i_dentry);
         INIT_LIST_HEAD(&inode->i_devices);
+       INIT_LIST_HEAD(&inode->i_wb_list);
+       INIT_LIST_HEAD(&inode->i_lru);
         INIT_RADIX_TREE(&inode->i_data.page_tree, GFP_ATOMIC);
         spin_lock_init(&inode->i_data.tree_lock);
         spin_lock_init(&inode->i_data.i_mmap_lock);
@@ -282,14 +319,109 @@ static void init_once(void *foo)
   */
  void __iget(struct inode *inode)
  {
-       if (atomic_inc_return(&inode->i_count) != 1)
-               return;
+       atomic_inc(&inode->i_count);
+}
+
+/*
+ * get additional reference to inode; caller must already hold one.
+ */
+void ihold(struct inode *inode)
+{
+       WARN_ON(atomic_inc_return(&inode->i_count) < 2);
+}
+EXPORT_SYMBOL(ihold);
+
+static void inode_lru_list_add(struct inode *inode)
+{
+       if (list_empty(&inode->i_lru)) {
+               list_add(&inode->i_lru, &inode_lru);
+               percpu_counter_inc(&nr_inodes_unused);
+       }
+}
  
-       if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-               list_move(&inode->i_list, &inode_in_use);
-       inodes_stat.nr_unused--;
+static void inode_lru_list_del(struct inode *inode)
+{
+       if (!list_empty(&inode->i_lru)) {
+               list_del_init(&inode->i_lru);
+               percpu_counter_dec(&nr_inodes_unused);
+       }
+}
+
+static inline void __inode_sb_list_add(struct inode *inode)
+{
+       list_add(&inode->i_sb_list, &inode->i_sb->s_inodes);
  }
  
+/**
+ * inode_sb_list_add - add inode to the superblock list of inodes
+ * @inode: inode to add
+ */
+void inode_sb_list_add(struct inode *inode)
+{
+       spin_lock(&inode_lock);
+       __inode_sb_list_add(inode);
+       spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL_GPL(inode_sb_list_add);
+
+static inline void __inode_sb_list_del(struct inode *inode)
+{
+       list_del_init(&inode->i_sb_list);
+}
+
+static unsigned long hash(struct super_block *sb, unsigned long hashval)
+{
+       unsigned long tmp;
+
+       tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
+                       L1_CACHE_BYTES;
+       tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
+       return tmp & I_HASHMASK;
+}
+
+/**
+ *     __insert_inode_hash - hash an inode
+ *     @inode: unhashed inode
+ *     @hashval: unsigned long value used to locate this object in the
+ *             inode_hashtable.
+ *
+ *     Add an inode to the inode hash for this superblock.
+ */
+void __insert_inode_hash(struct inode *inode, unsigned long hashval)
+{
+       struct hlist_head *b = inode_hashtable + hash(inode->i_sb, hashval);
+
+       spin_lock(&inode_lock);
+       hlist_add_head(&inode->i_hash, b);
+       spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(__insert_inode_hash);
+
+/**
+ *     __remove_inode_hash - remove an inode from the hash
+ *     @inode: inode to unhash
+ *
+ *     Remove an inode from the superblock.
+ */
+static void __remove_inode_hash(struct inode *inode)
+{
+       hlist_del_init(&inode->i_hash);
+}
+
+/**
+ *     remove_inode_hash - remove an inode from the hash
+ *     @inode: inode to unhash
+ *
+ *     Remove an inode from the superblock.
+ */
+void remove_inode_hash(struct inode *inode)
+{
+       spin_lock(&inode_lock);
+       hlist_del_init(&inode->i_hash);
+       spin_unlock(&inode_lock);
+}
+EXPORT_SYMBOL(remove_inode_hash);
+
  void end_writeback(struct inode *inode)
  {
         might_sleep();
@@ -328,101 +460,113 @@ static void evict(struct inode *inode)
   */
  static void dispose_list(struct list_head *head)
  {
-       int nr_disposed = 0;
-
         while (!list_empty(head)) {
                 struct inode *inode;
  
-               inode = list_first_entry(head, struct inode, i_list);
-               list_del(&inode->i_list);
+               inode = list_first_entry(head, struct inode, i_lru);
+               list_del_init(&inode->i_lru);
  
                 evict(inode);
  
                 spin_lock(&inode_lock);
-               hlist_del_init(&inode->i_hash);
-               list_del_init(&inode->i_sb_list);
+               __remove_inode_hash(inode);
+               __inode_sb_list_del(inode);
                 spin_unlock(&inode_lock);
  
                 wake_up_inode(inode);
                 destroy_inode(inode);
-               nr_disposed++;
         }
-       spin_lock(&inode_lock);
-       inodes_stat.nr_inodes -= nr_disposed;
-       spin_unlock(&inode_lock);
  }
  
-/*
- * Invalidate all inodes for a device.
+/**
+ * evict_inodes        - evict all evictable inodes for a superblock
+ * @sb:                superblock to operate on
+ *
+ * Make sure that no inodes with zero refcount are retained.  This is
+ * called by superblock shutdown after having MS_ACTIVE flag removed,
+ * so any inode reaching zero refcount during or after that call will
+ * be immediately evicted.
   */
-static int invalidate_list(struct list_head *head, struct list_head *dispose)
+void evict_inodes(struct super_block *sb)
  {
-       struct list_head *next;
-       int busy = 0, count = 0;
-
-       next = head->next;
-       for (;;) {
-               struct list_head *tmp = next;
-               struct inode *inode;
+       struct inode *inode, *next;
+       LIST_HEAD(dispose);
  
-               /*
-                * We can reschedule here without worrying about the list's
-                * consistency because the per-sb list of inodes must not
-                * change during umount anymore, and because iprune_sem keeps
-                * shrink_icache_memory() away.
-                */
-               cond_resched_lock(&inode_lock);
+       down_write(&iprune_sem);
  
-               next = next->next;
-               if (tmp == head)
-                       break;
-               inode = list_entry(tmp, struct inode, i_sb_list);
-               if (inode->i_state & I_NEW)
+       spin_lock(&inode_lock);
+       list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
+               if (atomic_read(&inode->i_count))
                         continue;
-               invalidate_inode_buffers(inode);
-               if (!atomic_read(&inode->i_count)) {
-                       list_move(&inode->i_list, dispose);
-                       WARN_ON(inode->i_state & I_NEW);
-                       inode->i_state |= I_FREEING;
-                       count++;
+
+               if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
+                       WARN_ON(1);
                         continue;
                 }
-               busy = 1;
+
+               inode->i_state |= I_FREEING;
+
+               /*
+                * Move the inode off the IO lists and LRU once I_FREEING is
+                * set so that it won't get moved back on there if it is dirty.
+                */
+               list_move(&inode->i_lru, &dispose);
+               list_del_init(&inode->i_wb_list);
+               if (!(inode->i_state & (I_DIRTY | I_SYNC)))
+                       percpu_counter_dec(&nr_inodes_unused);
         }
-       /* only unused inodes may be cached with i_count zero */
-       inodes_stat.nr_unused -= count;
-       return busy;
+       spin_unlock(&inode_lock);
+
+       dispose_list(&dispose);
+       up_write(&iprune_sem);
  }
  
  /**
- *     invalidate_inodes       - discard the inodes on a device
- *     @sb: superblock
+ * invalidate_inodes   - attempt to free all inodes on a superblock
+ * @sb:                superblock to operate on
   *
- *     Discard all of the inodes for a given superblock. If the discard
- *     fails because there are busy inodes then a non zero value is returned.
- *     If the discard is successful all the inodes have been discarded.
+ * Attempts to free all inodes for a given superblock.  If there were any
+ * busy inodes return a non-zero value, else zero.
   */
  int invalidate_inodes(struct super_block *sb)
  {
-       int busy;
-       LIST_HEAD(throw_away);
+       int busy = 0;
+       struct inode *inode, *next;
+       LIST_HEAD(dispose);
  
         down_write(&iprune_sem);
+
         spin_lock(&inode_lock);
-       fsnotify_unmount_inodes(&sb->s_inodes);
-       busy = invalidate_list(&sb->s_inodes, &throw_away);
+       list_for_each_entry_safe(inode, next, &sb->s_inodes, i_sb_list) {
+               if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE))
+                       continue;
+               if (atomic_read(&inode->i_count)) {
+                       busy = 1;
+                       continue;
+               }
+
+               inode->i_state |= I_FREEING;
+
+               /*
+                * Move the inode off the IO lists and LRU once I_FREEING is
+                * set so that it won't get moved back on there if it is dirty.
+                */
+               list_move(&inode->i_lru, &dispose);
+               list_del_init(&inode->i_wb_list);
+               if (!(inode->i_state & (I_DIRTY | I_SYNC)))
+                       percpu_counter_dec(&nr_inodes_unused);
+       }
         spin_unlock(&inode_lock);
  
-       dispose_list(&throw_away);
+       dispose_list(&dispose);
         up_write(&iprune_sem);
  
         return busy;
  }
-EXPORT_SYMBOL(invalidate_inodes);
  
  static int can_unuse(struct inode *inode)
  {
-       if (inode->i_state)
+       if (inode->i_state & ~I_REFERENCED)
                 return 0;
         if (inode_has_buffers(inode))
                 return 0;
@@ -434,22 +578,24 @@ static int can_unuse(struct inode *inode)
  }
  
  /*
- * Scan `goal' inodes on the unused list for freeable ones. They are moved to
- * a temporary list and then are freed outside inode_lock by dispose_list().
+ * Scan `goal' inodes on the unused list for freeable ones. They are moved to a
+ * temporary list and then are freed outside inode_lock by dispose_list().
   *
   * Any inodes which are pinned purely because of attached pagecache have their
- * pagecache removed.  We expect the final iput() on that inode to add it to
- * the front of the inode_unused list.  So look for it there and if the
- * inode is still freeable, proceed.  The right inode is found 99.9% of the
- * time in testing on a 4-way.
+ * pagecache removed.  If the inode has metadata buffers attached to
+ * mapping->private_list then try to remove them.
   *
- * If the inode has metadata buffers attached to mapping->private_list then
- * try to remove them.
+ * If the inode has the I_REFERENCED flag set, then it means that it has been
+ * used recently - the flag is set in iput_final(). When we encounter such an
+ * inode, clear the flag and move it to the back of the LRU so it gets another
+ * pass through the LRU before it gets reclaimed. This is necessary because of
+ * the fact we are doing lazy LRU updates to minimise lock contention so the
+ * LRU does not have strict ordering. Hence we don't want to reclaim inodes
+ * with this flag set because they are the inodes that are out of order.
   */
  static void prune_icache(int nr_to_scan)
  {
         LIST_HEAD(freeable);
-       int nr_pruned = 0;
         int nr_scanned;
         unsigned long reap = 0;
  
@@ -458,13 +604,26 @@ static void prune_icache(int nr_to_scan)
         for (nr_scanned = 0; nr_scanned < nr_to_scan; nr_scanned++) {
                 struct inode *inode;
  
-               if (list_empty(&inode_unused))
+               if (list_empty(&inode_lru))
                         break;
  
-               inode = list_entry(inode_unused.prev, struct inode, i_list);
+               inode = list_entry(inode_lru.prev, struct inode, i_lru);
  
-               if (inode->i_state || atomic_read(&inode->i_count)) {
-                       list_move(&inode->i_list, &inode_unused);
+               /*
+                * Referenced or dirty inodes are still in use. Give them
+                * another pass through the LRU as we canot reclaim them now.
+                */
+               if (atomic_read(&inode->i_count) ||
+                   (inode->i_state & ~I_REFERENCED)) {
+                       list_del_init(&inode->i_lru);
+                       percpu_counter_dec(&nr_inodes_unused);
+                       continue;
+               }
+
+               /* recently referenced inodes get one more pass */
+               if (inode->i_state & I_REFERENCED) {
+                       list_move(&inode->i_lru, &inode_lru);
+                       inode->i_state &= ~I_REFERENCED;
                         continue;
                 }
                 if (inode_has_buffers(inode) || inode->i_data.nrpages) {
@@ -476,18 +635,23 @@ static void prune_icache(int nr_to_scan)
                         iput(inode);
                         spin_lock(&inode_lock);
  
-                       if (inode != list_entry(inode_unused.next,
-                                               struct inode, i_list))
+                       if (inode != list_entry(inode_lru.next,
+                                               struct inode, i_lru))
                                 continue;       /* wrong inode or list_empty */
                         if (!can_unuse(inode))
                                 continue;
                 }
-               list_move(&inode->i_list, &freeable);
                 WARN_ON(inode->i_state & I_NEW);
                 inode->i_state |= I_FREEING;
-               nr_pruned++;
+
+               /*
+                * Move the inode off the IO lists and LRU once I_FREEING is
+                * set so that it won't get moved back on there if it is dirty.
+                */
+               list_move(&inode->i_lru, &freeable);
+               list_del_init(&inode->i_wb_list);
+               percpu_counter_dec(&nr_inodes_unused);
         }
-       inodes_stat.nr_unused -= nr_pruned;
         if (current_is_kswapd())
                 __count_vm_events(KSWAPD_INODESTEAL, reap);
         else
@@ -519,7 +683,7 @@ static int shrink_icache_memory(struct shrinker *shrink, int nr, gfp_t gfp_mask)
                         return -1;
                 prune_icache(nr);
         }
-       return (inodes_stat.nr_unused / 100) * sysctl_vfs_cache_pressure;
+       return (get_nr_inodes_unused() / 100) * sysctl_vfs_cache_pressure;
  }
  
  static struct shrinker icache_shrinker = {
@@ -530,9 +694,6 @@ static struct shrinker icache_shrinker = {
  static void __wait_on_freeing_inode(struct inode *inode);
  /*
   * Called with the inode lock held.
- * NOTE: we are not increasing the inode-refcount, you must call __iget()
- * by hand after calling find_inode now! This simplifies iunique and won't
- * add any additional branch in the common code.
   */
  static struct inode *find_inode(struct super_block *sb,
                                 struct hlist_head *head,
@@ -552,9 +713,10 @@ repeat:
                         __wait_on_freeing_inode(inode);
                         goto repeat;
                 }
-               break;
+               __iget(inode);
+               return inode;
         }
-       return node ? inode : NULL;
+       return NULL;
  }
  
  /*
@@ -577,53 +739,49 @@ repeat:
                         __wait_on_freeing_inode(inode);
                         goto repeat;
                 }
-               break;
+               __iget(inode);
+               return inode;
         }
-       return node ? inode : NULL;
-}
-
-static unsigned long hash(struct super_block *sb, unsigned long hashval)
-{
-       unsigned long tmp;
-
-       tmp = (hashval * (unsigned long)sb) ^ (GOLDEN_RATIO_PRIME + hashval) /
-                       L1_CACHE_BYTES;
-       tmp = tmp ^ ((tmp ^ GOLDEN_RATIO_PRIME) >> I_HASHBITS);
-       return tmp & I_HASHMASK;
-}
-
-static inline void
-__inode_add_to_lists(struct super_block *sb, struct hlist_head *head,
-                       struct inode *inode)
-{
-       inodes_stat.nr_inodes++;
-       list_add(&inode->i_list, &inode_in_use);
-       list_add(&inode->i_sb_list, &sb->s_inodes);
-       if (head)
-               hlist_add_head(&inode->i_hash, head);
+       return NULL;
  }
  
-/**
- * inode_add_to_lists - add a new inode to relevant lists
- * @sb: superblock inode belongs to
- * @inode: inode to mark in use
+/*
+ * Each cpu owns a range of LAST_INO_BATCH numbers.
+ * 'shared_last_ino' is dirtied only once out of LAST_INO_BATCH allocations,
+ * to renew the exhausted range.
   *
- * When an inode is allocated it needs to be accounted for, added to the in use
- * list, the owning superblock and the inode hash. This needs to be done under
- * the inode_lock, so export a function to do this rather than the inode lock
- * itself. We calculate the hash list to add to here so it is all internal
- * which requires the caller to have already set up the inode number in the
- * inode to add.
+ * This does not significantly increase overflow rate because every CPU can
+ * consume at most LAST_INO_BATCH-1 unused inode numbers. So there is
+ * NR_CPUS*(LAST_INO_BATCH-1) wastage. At 4096 and 1024, this is ~0.1% of the
+ * 2^32 range, and is a worst-case. Even a 50% wastage would only increase
+ * overflow rate by 2x, which does not seem too significant.
+ *
+ * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
+ * error if st_ino won't fit in target struct field. Use 32bit counter
+ * here to attempt to avoid that.
   */
-void inode_add_to_lists(struct super_block *sb, struct inode *inode)
+#define LAST_INO_BATCH 1024
+static DEFINE_PER_CPU(unsigned int, last_ino);
+
+unsigned int get_next_ino(void)
  {
-       struct hlist_head *head = inode_hashtable + hash(sb, inode->i_ino);
+       unsigned int *p = &get_cpu_var(last_ino);
+       unsigned int res = *p;
  
-       spin_lock(&inode_lock);
-       __inode_add_to_lists(sb, head, inode);
-       spin_unlock(&inode_lock);
+#ifdef CONFIG_SMP
+       if (unlikely((res & (LAST_INO_BATCH-1)) == 0)) {
+               static atomic_t shared_last_ino;
+               int next = atomic_add_return(LAST_INO_BATCH, &shared_last_ino);
+
+               res = next - LAST_INO_BATCH;
+       }
+#endif
+
+       *p = ++res;
+       put_cpu_var(last_ino);
+       return res;
  }
-EXPORT_SYMBOL_GPL(inode_add_to_lists);
+EXPORT_SYMBOL(get_next_ino);
  
  /**
   *     new_inode       - obtain an inode
@@ -639,12 +797,6 @@ EXPORT_SYMBOL_GPL(inode_add_to_lists);
   */
  struct inode *new_inode(struct super_block *sb)
  {
-       /*
-        * On a 32bit, non LFS stat() call, glibc will generate an EOVERFLOW
-        * error if st_ino won't fit in target struct field. Use 32bit counter
-        * here to attempt to avoid that.
-        */
-       static unsigned int last_ino;
         struct inode *inode;
  
         spin_lock_prefetch(&inode_lock);
@@ -652,8 +804,7 @@ struct inode *new_inode(struct super_block *sb)
         inode = alloc_inode(sb);
         if (inode) {
                 spin_lock(&inode_lock);
-               __inode_add_to_lists(sb, NULL, inode);
-               inode->i_ino = ++last_ino;
+               __inode_sb_list_add(inode);
                 inode->i_state = 0;
                 spin_unlock(&inode_lock);
         }
@@ -664,7 +815,7 @@ EXPORT_SYMBOL(new_inode);
  void unlock_new_inode(struct inode *inode)
  {
  #ifdef CONFIG_DEBUG_LOCK_ALLOC
-       if (inode->i_mode & S_IFDIR) {
+       if (S_ISDIR(inode->i_mode)) {
                 struct file_system_type *type = inode->i_sb->s_type;
  
                 /* Set new key only if filesystem hasn't already changed it */
@@ -721,7 +872,8 @@ static struct inode *get_new_inode(struct super_block *sb,
                         if (set(inode, data))
                                 goto set_failed;
  
-                       __inode_add_to_lists(sb, head, inode);
+                       hlist_add_head(&inode->i_hash, head);
+                       __inode_sb_list_add(inode);
                         inode->i_state = I_NEW;
                         spin_unlock(&inode_lock);
  
@@ -736,7 +888,6 @@ static struct inode *get_new_inode(struct super_block *sb,
                  * us. Use the old inode instead of the one we just
                  * allocated.
                  */
-               __iget(old);
                 spin_unlock(&inode_lock);
                 destroy_inode(inode);
                 inode = old;
@@ -768,7 +919,8 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
                 old = find_inode_fast(sb, head, ino);
                 if (!old) {
                         inode->i_ino = ino;
-                       __inode_add_to_lists(sb, head, inode);
+                       hlist_add_head(&inode->i_hash, head);
+                       __inode_sb_list_add(inode);
                         inode->i_state = I_NEW;
                         spin_unlock(&inode_lock);
  
@@ -783,7 +935,6 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
                  * us. Use the old inode instead of the one we just
                  * allocated.
                  */
-               __iget(old);
                 spin_unlock(&inode_lock);
                 destroy_inode(inode);
                 inode = old;
@@ -792,6 +943,27 @@ static struct inode *get_new_inode_fast(struct super_block *sb,
         return inode;
  }
  
+/*
+ * search the inode cache for a matching inode number.
+ * If we find one, then the inode number we are trying to
+ * allocate is not unique and so we should not use it.
+ *
+ * Returns 1 if the inode number is unique, 0 if it is not.
+ */
+static int test_inode_iunique(struct super_block *sb, unsigned long ino)
+{
+       struct hlist_head *b = inode_hashtable + hash(sb, ino);
+       struct hlist_node *node;
+       struct inode *inode;
+
+       hlist_for_each_entry(inode, node, b, i_hash) {
+               if (inode->i_ino == ino && inode->i_sb == sb)
+                       return 0;
+       }
+
+       return 1;
+}
+
  /**
   *     iunique - get a unique inode number
   *     @sb: superblock
@@ -813,19 +985,18 @@ ino_t iunique(struct super_block *sb, ino_t max_reserved)
          * error if st_ino won't fit in target struct field. Use 32bit counter
          * here to attempt to avoid that.
          */
+       static DEFINE_SPINLOCK(iunique_lock);
         static unsigned int counter;
-       struct inode *inode;
-       struct hlist_head *head;
         ino_t res;
  
         spin_lock(&inode_lock);
+       spin_lock(&iunique_lock);
         do {
                 if (counter <= max_reserved)
                         counter = max_reserved + 1;
                 res = counter++;
-               head = inode_hashtable + hash(sb, res);
-               inode = find_inode_fast(sb, head, res);
-       } while (inode != NULL);
+       } while (!test_inode_iunique(sb, res));
+       spin_unlock(&iunique_lock);
         spin_unlock(&inode_lock);
  
         return res;
@@ -877,7 +1048,6 @@ static struct inode *ifind(struct super_block *sb,
         spin_lock(&inode_lock);
         inode = find_inode(sb, head, test, data);
         if (inode) {
-               __iget(inode);
                 spin_unlock(&inode_lock);
                 if (likely(wait))
                         wait_on_inode(inode);
@@ -910,7 +1080,6 @@ static struct inode *ifind_fast(struct super_block *sb,
         spin_lock(&inode_lock);
         inode = find_inode_fast(sb, head, ino);
         if (inode) {
-               __iget(inode);
                 spin_unlock(&inode_lock);
                 wait_on_inode(inode);
                 return inode;
@@ -1096,7 +1265,7 @@ int insert_inode_locked(struct inode *inode)
                 __iget(old);
                 spin_unlock(&inode_lock);
                 wait_on_inode(old);
-               if (unlikely(!hlist_unhashed(&old->i_hash))) {
+               if (unlikely(!inode_unhashed(old))) {
                         iput(old);
                         return -EBUSY;
                 }
@@ -1135,7 +1304,7 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
                 __iget(old);
                 spin_unlock(&inode_lock);
                 wait_on_inode(old);
-               if (unlikely(!hlist_unhashed(&old->i_hash))) {
+               if (unlikely(!inode_unhashed(old))) {
                         iput(old);
                         return -EBUSY;
                 }
@@ -1144,36 +1313,6 @@ int insert_inode_locked4(struct inode *inode, unsigned long hashval,
  }
  EXPORT_SYMBOL(insert_inode_locked4);
  
-/**
- *     __insert_inode_hash - hash an inode
- *     @inode: unhashed inode
- *     @hashval: unsigned long value used to locate this object in the
- *             inode_hashtable.
- *
- *     Add an inode to the inode hash for this superblock.
- */
-void __insert_inode_hash(struct inode *inode, unsigned long hashval)
-{
-       struct hlist_head *head = inode_hashtable + hash(inode->i_sb, hashval);
-       spin_lock(&inode_lock);
-       hlist_add_head(&inode->i_hash, head);
-       spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL(__insert_inode_hash);
-
-/**
- *     remove_inode_hash - remove an inode from the hash
- *     @inode: inode to unhash
- *
- *     Remove an inode from the superblock.
- */
-void remove_inode_hash(struct inode *inode)
-{
-       spin_lock(&inode_lock);
-       hlist_del_init(&inode->i_hash);
-       spin_unlock(&inode_lock);
-}
-EXPORT_SYMBOL(remove_inode_hash);
  
  int generic_delete_inode(struct inode *inode)
  {
@@ -1188,7 +1327,7 @@ EXPORT_SYMBOL(generic_delete_inode);
   */
  int generic_drop_inode(struct inode *inode)
  {
-       return !inode->i_nlink || hlist_unhashed(&inode->i_hash);
+       return !inode->i_nlink || inode_unhashed(inode);
  }
  EXPORT_SYMBOL_GPL(generic_drop_inode);
  
@@ -1214,10 +1353,11 @@ static void iput_final(struct inode *inode)
                 drop = generic_drop_inode(inode);
  
         if (!drop) {
-               if (!(inode->i_state & (I_DIRTY|I_SYNC)))
-                       list_move(&inode->i_list, &inode_unused);
-               inodes_stat.nr_unused++;
                 if (sb->s_flags & MS_ACTIVE) {
+                       inode->i_state |= I_REFERENCED;
+                       if (!(inode->i_state & (I_DIRTY|I_SYNC))) {
+                               inode_lru_list_add(inode);
+                       }
                         spin_unlock(&inode_lock);
                         return;
                 }
@@ -1228,19 +1368,23 @@ static void iput_final(struct inode *inode)
                 spin_lock(&inode_lock);
                 WARN_ON(inode->i_state & I_NEW);
                 inode->i_state &= ~I_WILL_FREE;
-               inodes_stat.nr_unused--;
-               hlist_del_init(&inode->i_hash);
+               __remove_inode_hash(inode);
         }
-       list_del_init(&inode->i_list);
-       list_del_init(&inode->i_sb_list);
+
         WARN_ON(inode->i_state & I_NEW);
         inode->i_state |= I_FREEING;
-       inodes_stat.nr_inodes--;
+
+       /*
+        * Move the inode off the IO lists and LRU once I_FREEING is
+        * set so that it won't get moved back on there if it is dirty.
+        */
+       inode_lru_list_del(inode);
+       list_del_init(&inode->i_wb_list);
+
+       __inode_sb_list_del(inode);
         spin_unlock(&inode_lock);
         evict(inode);
-       spin_lock(&inode_lock);
-       hlist_del_init(&inode->i_hash);
-       spin_unlock(&inode_lock);
+       remove_inode_hash(inode);
         wake_up_inode(inode);
         BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
         destroy_inode(inode);
@@ -1504,6 +1648,8 @@ void __init inode_init(void)
                                          SLAB_MEM_SPREAD),
                                          init_once);
         register_shrinker(&icache_shrinker);
+       percpu_counter_init(&nr_inodes, 0);
+       percpu_counter_init(&nr_inodes_unused, 0);
  
         /* Hash may have been set up in inode_init_early */
         if (!hashdist)
diff --git a/fs/internal.h b/fs/internal.h

index a6910e91cee8799196e991c7cab98ad11206cf01..ebad3b90752dae0b37838734293ee36864af2c02 100644 (file)
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -101,3 +101,10 @@ extern void put_super(struct super_block *sb);
  struct nameidata;
  extern struct file *nameidata_to_filp(struct nameidata *);
  extern void release_open_intent(struct nameidata *);
+
+/*
+ * inode.c
+ */
+extern int get_nr_dirty_inodes(void);
+extern int evict_inodes(struct super_block *);
+extern int invalidate_inodes(struct super_block *);
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c

index 09ff41a752a092431068b991b9e323bbe5276fbd..60c2b944d76267bbd40358f7384dfe3fc9240bd3 100644 (file)
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -962,25 +962,23 @@ static int isofs_statfs (struct dentry *dentry, struct kstatfs *buf)
   * or getblk() if they are not.  Returns the number of blocks inserted
   * (-ve == error.)
   */
-int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
+int isofs_get_blocks(struct inode *inode, sector_t iblock,
                      struct buffer_head **bh, unsigned long nblocks)
  {
-       unsigned long b_off;
+       unsigned long b_off = iblock;
         unsigned offset, sect_size;
         unsigned int firstext;
         unsigned long nextblk, nextoff;
-       long iblock = (long)iblock_s;
         int section, rv, error;
         struct iso_inode_info *ei = ISOFS_I(inode);
  
         error = -EIO;
         rv = 0;
-       if (iblock < 0 || iblock != iblock_s) {
+       if (iblock != b_off) {
                 printk(KERN_DEBUG "%s: block number too large\n", __func__);
                 goto abort;
         }
  
-       b_off = iblock;
  
         offset = 0;
         firstext = ei->i_first_extent;
@@ -998,8 +996,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
                  * I/O errors.
                  */
                 if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
-                       printk(KERN_DEBUG "%s: block >= EOF (%ld, %ld)\n",
-                               __func__, iblock, (unsigned long) inode->i_size);
+                       printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
+                               __func__, b_off,
+                               (unsigned long long)inode->i_size);
                         goto abort;
                 }
  
@@ -1025,9 +1024,9 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock_s,
                         if (++section > 100) {
                                 printk(KERN_DEBUG "%s: More than 100 file sections ?!?"
                                         " aborting...\n", __func__);
-                               printk(KERN_DEBUG "%s: block=%ld firstext=%u sect_size=%u "
+                               printk(KERN_DEBUG "%s: block=%lu firstext=%u sect_size=%u "
                                         "nextblk=%lu nextoff=%lu\n", __func__,
-                                       iblock, firstext, (unsigned) sect_size,
+                                       b_off, firstext, (unsigned) sect_size,
                                         nextblk, nextoff);
                                 goto abort;
                         }
diff --git a/fs/jffs2/dir.c b/fs/jffs2/dir.c

index ed78a3cf3cb047046d8ea385af9554e63214aac8..79121aa5858b8a4aa02861ba57cf17d9a8dfe13b 100644 (file)
--- a/fs/jffs2/dir.c
+++ b/fs/jffs2/dir.c
@@ -289,7 +289,7 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
                 mutex_unlock(&f->sem);
                 d_instantiate(dentry, old_dentry->d_inode);
                 dir_i->i_mtime = dir_i->i_ctime = ITIME(now);
-               atomic_inc(&old_dentry->d_inode->i_count);
+               ihold(old_dentry->d_inode);
         }
         return ret;
  }
@@ -864,7 +864,7 @@ static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
                 printk(KERN_NOTICE "jffs2_rename(): Link succeeded, unlink failed (err %d). You now have a hard link\n", ret);
                 /* Might as well let the VFS know */
                 d_instantiate(new_dentry, old_dentry->d_inode);
-               atomic_inc(&old_dentry->d_inode->i_count);
+               ihold(old_dentry->d_inode);
                 new_dir_i->i_mtime = new_dir_i->i_ctime = ITIME(now);
                 return ret;
         }
diff --git a/fs/jfs/jfs_imap.c b/fs/jfs/jfs_imap.c

index f8332dc8eeb2111430d2a16ae716b6d1925fde99..3a09423b6c22c6bf0d1635ddd572ef4620c146b2 100644 (file)
--- a/fs/jfs/jfs_imap.c
+++ b/fs/jfs/jfs_imap.c
@@ -497,7 +497,7 @@ struct inode *diReadSpecial(struct super_block *sb, ino_t inum, int secondary)
          * appear hashed, but do not put on any lists.  hlist_del()
          * will work fine and require no locking.
          */
-       ip->i_hash.pprev = &ip->i_hash.next;
+       hlist_add_fake(&ip->i_hash);
  
         return (ip);
  }
diff --git a/fs/jfs/jfs_txnmgr.c b/fs/jfs/jfs_txnmgr.c

index d945ea76b445249de68fece3be7dea631cf2b6b1..9466957ec84173f56707bd591e97b007c7bba57c 100644 (file)
--- a/fs/jfs/jfs_txnmgr.c
+++ b/fs/jfs/jfs_txnmgr.c
@@ -1279,7 +1279,7 @@ int txCommit(tid_t tid,           /* transaction identifier */
          * lazy commit thread finishes processing
          */
         if (tblk->xflag & COMMIT_DELETE) {
-               atomic_inc(&tblk->u.ip->i_count);
+               ihold(tblk->u.ip);
                 /*
                  * Avoid a rare deadlock
                  *
diff --git a/fs/jfs/namei.c b/fs/jfs/namei.c

index a9cf8e8675be8713dc5f9311faa20fefcac85156..231ca4af9bce4f27d2c12d71e9c0a237616e512d 100644 (file)
--- a/fs/jfs/namei.c
+++ b/fs/jfs/namei.c
@@ -839,7 +839,7 @@ static int jfs_link(struct dentry *old_dentry,
         ip->i_ctime = CURRENT_TIME;
         dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         mark_inode_dirty(dir);
-       atomic_inc(&ip->i_count);
+       ihold(ip);
  
         iplist[0] = ip;
         iplist[1] = dir;
diff --git a/fs/libfs.c b/fs/libfs.c

index 62baa0387d6e03869e726b89ffaddccce4fc090c..304a5132ca270d018799d3003170bd813c23dd9b 100644 (file)
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -255,7 +255,7 @@ int simple_link(struct dentry *old_dentry, struct inode *dir, struct dentry *den
  
         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         inc_nlink(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         dget(dentry);
         d_instantiate(dentry, inode);
         return 0;
@@ -892,10 +892,6 @@ EXPORT_SYMBOL_GPL(generic_fh_to_parent);
   */
  int generic_file_fsync(struct file *file, int datasync)
  {
-       struct writeback_control wbc = {
-               .sync_mode = WB_SYNC_ALL,
-               .nr_to_write = 0, /* metadata-only; caller takes care of data */
-       };
         struct inode *inode = file->f_mapping->host;
         int err;
         int ret;
@@ -906,7 +902,7 @@ int generic_file_fsync(struct file *file, int datasync)
         if (datasync && !(inode->i_state & I_DIRTY_DATASYNC))
                 return ret;
  
-       err = sync_inode(inode, &wbc);
+       err = sync_inode_metadata(inode, 1);
         if (ret == 0)
                 ret = err;
         return ret;
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c

index 1eb4e89e045b082d9f02d6d005edd3f61e5c48c8..409dfd65e9a1764029edf55071069e61f447d29d 100644 (file)
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -569,7 +569,7 @@ static int logfs_link(struct dentry *old_dentry, struct inode *dir,
                 return -EMLINK;
  
         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         inode->i_nlink++;
         mark_inode_dirty_sync(inode);
  
diff --git a/fs/minix/namei.c b/fs/minix/namei.c

index f3f3578393a417085812ba1ad7e0b7c1e4e5c981..c0d35a3accef98dd5ed7619214146e0f0f5d3260 100644 (file)
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -101,7 +101,7 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
  
         inode->i_ctime = CURRENT_TIME_SEC;
         inode_inc_link_count(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         return add_nondir(dentry, inode);
  }
  
diff --git a/fs/namei.c b/fs/namei.c

index 24896e8335658c9c0ce2c81c117cb664b964da70..f7dbc06857abfe6e1959654f7fd66b393dd161d0 100644 (file)
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1121,11 +1121,13 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
  static struct dentry *__lookup_hash(struct qstr *name,
                 struct dentry *base, struct nameidata *nd)
  {
+       struct inode *inode = base->d_inode;
         struct dentry *dentry;
-       struct inode *inode;
         int err;
  
-       inode = base->d_inode;
+       err = exec_permission(inode);
+       if (err)
+               return ERR_PTR(err);
  
         /*
          * See if the low-level filesystem might want
@@ -1161,11 +1163,6 @@ out:
   */
  static struct dentry *lookup_hash(struct nameidata *nd)
  {
-       int err;
-
-       err = exec_permission(nd->path.dentry->d_inode);
-       if (err)
-               return ERR_PTR(err);
         return __lookup_hash(&nd->last, nd->path.dentry, nd);
  }
  
@@ -1213,9 +1210,6 @@ struct dentry *lookup_one_len(const char *name, struct dentry *base, int len)
         if (err)
                 return ERR_PTR(err);
  
-       err = exec_permission(base->d_inode);
-       if (err)
-               return ERR_PTR(err);
         return __lookup_hash(&this, base, NULL);
  }
  
@@ -2291,7 +2285,7 @@ static long do_unlinkat(int dfd, const char __user *pathname)
                         goto slashes;
                 inode = dentry->d_inode;
                 if (inode)
-                       atomic_inc(&inode->i_count);
+                       ihold(inode);
                 error = mnt_want_write(nd.path.mnt);
                 if (error)
                         goto exit2;
diff --git a/fs/namespace.c b/fs/namespace.c

index 7ca5182c0bedd547e21531e777175d2058ea2982..8a415c9c5e552efd945cdb85af6f74af4b2b6de6 100644 (file)
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -595,7 +595,7 @@ static struct vfsmount *clone_mnt(struct vfsmount *old, struct dentry *root,
                                 goto out_free;
                 }
  
-               mnt->mnt_flags = old->mnt_flags;
+               mnt->mnt_flags = old->mnt_flags & ~MNT_WRITE_HOLD;
                 atomic_inc(&sb->s_active);
                 mnt->mnt_sb = sb;
                 mnt->mnt_root = dget(root);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c

index 257e4052492e9eb174d2db47b4cdc94bd8f81ef6..07ac3847e562b54c26efd30c9a9eabab468c309d 100644 (file)
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1801,7 +1801,7 @@ nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
         d_drop(dentry);
         error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
         if (error == 0) {
-               atomic_inc(&inode->i_count);
+               ihold(inode);
                 d_add(dentry, inode);
         }
         return error;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c

index a70e446e1605fa43fb1b23b96418d8296773109f..ac7b814ce1620f3e5cb0afa3334d31d1d540a52b 100644 (file)
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -54,8 +54,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
                         iput(inode);
                         return -ENOMEM;
                 }
-               /* Circumvent igrab(): we know the inode is not being freed */
-               atomic_inc(&inode->i_count);
+               ihold(inode);
                 /*
                  * Ensure that this dentry is invisible to d_find_alias().
                  * Otherwise, it may be spliced into the tree by
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c

index 661a6cf8e8265eaeca988e2aabc0ba1ede767717..184938fcff04d5dff712b8bee01f5d7e41bb20ad 100644 (file)
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -281,23 +281,13 @@ commit_metadata(struct svc_fh *fhp)
  {
         struct inode *inode = fhp->fh_dentry->d_inode;
         const struct export_operations *export_ops = inode->i_sb->s_export_op;
-       int error = 0;
  
         if (!EX_ISSYNC(fhp->fh_export))
                 return 0;
  
-       if (export_ops->commit_metadata) {
-               error = export_ops->commit_metadata(inode);
-       } else {
-               struct writeback_control wbc = {
-                       .sync_mode = WB_SYNC_ALL,
-                       .nr_to_write = 0, /* metadata only */
-               };
-
-               error = sync_inode(inode, &wbc);
-       }
-
-       return error;
+       if (export_ops->commit_metadata)
+               return export_ops->commit_metadata(inode);
+       return sync_inode_metadata(inode, 1);
  }
  
  /*
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c

index 185d1607cb00c2f6c0b1e224ea148af0fb17e224..6e9557ecf161fd664a0715d9a3b8e0f3e34501dd 100644 (file)
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -207,7 +207,7 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
  
         inode->i_ctime = CURRENT_TIME;
         inode_inc_link_count(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
  
         err = nilfs_add_nondir(dentry, inode);
         if (!err)
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c

index 36802420d69a94024eefde05f1eab9f6a9932241..4498a208df940b92117323dcad6cb355969fd164 100644 (file)
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -88,8 +88,6 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
  {
         struct dentry *parent;
         struct inode *p_inode;
-       bool send = false;
-       bool should_update_children = false;
  
         if (!dentry)
                 dentry = path->dentry;
@@ -97,29 +95,12 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
         if (!(dentry->d_flags & DCACHE_FSNOTIFY_PARENT_WATCHED))
                 return;
  
-       spin_lock(&dentry->d_lock);
-       parent = dentry->d_parent;
+       parent = dget_parent(dentry);
         p_inode = parent->d_inode;
  
-       if (fsnotify_inode_watches_children(p_inode)) {
-               if (p_inode->i_fsnotify_mask & mask) {
-                       dget(parent);
-                       send = true;
-               }
-       } else {
-               /*
-                * The parent doesn't care about events on it's children but
-                * at least one child thought it did.  We need to run all the
-                * children and update their d_flags to let them know p_inode
-                * doesn't care about them any more.
-                */
-               dget(parent);
-               should_update_children = true;
-       }
-
-       spin_unlock(&dentry->d_lock);
-
-       if (send) {
+       if (unlikely(!fsnotify_inode_watches_children(p_inode)))
+               __fsnotify_update_child_dentry_flags(p_inode);
+       else if (p_inode->i_fsnotify_mask & mask) {
                 /* we are notifying a parent so come up with the new mask which
                  * specifies these are events which came from a child. */
                 mask |= FS_EVENT_ON_CHILD;
@@ -130,13 +111,9 @@ void __fsnotify_parent(struct path *path, struct dentry *dentry, __u32 mask)
                 else
                         fsnotify(p_inode, mask, dentry->d_inode, FSNOTIFY_EVENT_INODE,
                                  dentry->d_name.name, 0);
-               dput(parent);
         }
  
-       if (unlikely(should_update_children)) {
-               __fsnotify_update_child_dentry_flags(p_inode);
-               dput(parent);
-       }
+       dput(parent);
  }
  EXPORT_SYMBOL_GPL(__fsnotify_parent);
  
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c

index 33297c00506050afb6bbe6227f8d47e4eb9cdd11..21ed10660b80770c34eb136a311254330568fac2 100644 (file)
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -240,6 +240,7 @@ void fsnotify_unmount_inodes(struct list_head *list)
  {
         struct inode *inode, *next_i, *need_iput = NULL;
  
+       spin_lock(&inode_lock);
         list_for_each_entry_safe(inode, next_i, list, i_sb_list) {
                 struct inode *need_iput_tmp;
  
@@ -297,4 +298,5 @@ void fsnotify_unmount_inodes(struct list_head *list)
  
                 spin_lock(&inode_lock);
         }
+       spin_unlock(&inode_lock);
  }
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c

index 19c5180f8a28c063b9a8cf5fefc58a6c61512455..d3fbe5730bfc1bb1c1264e9e3923c9425321fea0 100644 (file)
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -2911,8 +2911,8 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
                 goto unl_upcase_iput_tmp_ino_err_out_now;
         }
         if ((sb->s_root = d_alloc_root(vol->root_ino))) {
-               /* We increment i_count simulating an ntfs_iget(). */
-               atomic_inc(&vol->root_ino->i_count);
+               /* We grab a reference, simulating an ntfs_iget(). */
+               ihold(vol->root_ino);
                 ntfs_debug("Exiting, status successful.");
                 /* Release the default upcase if it has no users. */
                 mutex_lock(&ntfs_lock);
@@ -3021,21 +3021,6 @@ iput_tmp_ino_err_out_now:
         if (vol->mft_ino && vol->mft_ino != tmp_ino)
                 iput(vol->mft_ino);
         vol->mft_ino = NULL;
-       /*
-        * This is needed to get ntfs_clear_extent_inode() called for each
-        * inode we have ever called ntfs_iget()/iput() on, otherwise we A)
-        * leak resources and B) a subsequent mount fails automatically due to
-        * ntfs_iget() never calling down into our ntfs_read_locked_inode()
-        * method again... FIXME: Do we need to do this twice now because of
-        * attribute inodes? I think not, so leave as is for now... (AIA)
-        */
-       if (invalidate_inodes(sb)) {
-               ntfs_error(sb, "Busy inodes left. This is most likely a NTFS "
-                               "driver bug.");
-               /* Copied from fs/super.c. I just love this message. (-; */
-               printk("NTFS: Busy inodes after umount. Self-destruct in 5 "
-                               "seconds.  Have a nice day...\n");
-       }
         /* Errors at this stage are irrelevant. */
  err_out_now:
         sb->s_fs_info = NULL;
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c

index 5cfeee11815881b04a3250d42a72838e40289200..f1e962cb3b73084699a182933b7760926c36880e 100644 (file)
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -165,7 +165,7 @@ int ocfs2_get_block(struct inode *inode, sector_t iblock,
          * ocfs2 never allocates in this function - the only time we
          * need to use BH_New is when we're extending i_size on a file
          * system which doesn't support holes, in which case BH_New
-        * allows block_prepare_write() to zero.
+        * allows __block_write_begin() to zero.
          *
          * If we see this on a sparse file system, then a truncate has
          * raced us and removed the cluster. In this case, we clear
@@ -407,21 +407,6 @@ static int ocfs2_writepage(struct page *page, struct writeback_control *wbc)
         return ret;
  }
  
-/*
- * This is called from ocfs2_write_zero_page() which has handled it's
- * own cluster locking and has ensured allocation exists for those
- * blocks to be written.
- */
-int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
-                              unsigned from, unsigned to)
-{
-       int ret;
-
-       ret = block_prepare_write(page, from, to, ocfs2_get_block);
-
-       return ret;
-}
-
  /* Taken from ext3. We don't necessarily need the full blown
   * functionality yet, but IMHO it's better to cut and paste the whole
   * thing so we can avoid introducing our own bugs (and easily pick up
@@ -732,7 +717,7 @@ static int ocfs2_should_read_blk(struct inode *inode, struct page *page,
  }
  
  /*
- * Some of this taken from block_prepare_write(). We already have our
+ * Some of this taken from __block_write_begin(). We already have our
   * mapping by now though, and the entire write will be allocating or
   * it won't, so not much need to use BH_New.
   *
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h

index 7606f663da6d1e1181dac172736ed9dfbbb3fd84..76bfdfda691a03b1790ac9670a8bb79d042e7584 100644 (file)
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -22,9 +22,6 @@
  #ifndef OCFS2_AOPS_H
  #define OCFS2_AOPS_H
  
-int ocfs2_prepare_write_nolock(struct inode *inode, struct page *page,
-                              unsigned from, unsigned to);
-
  handle_t *ocfs2_start_walk_page_trans(struct inode *inode,
                                                          struct page *page,
                                                          unsigned from,
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c

index a7ebd9d42dc8853e89dd381af65afeab3e5df8fe..75e115f1bd730d2b12d4e1d924d203a5864d5150 100644 (file)
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -400,6 +400,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
         if (inode) {
                 ip = DLMFS_I(inode);
  
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_uid = current_fsuid();
                 inode->i_gid = current_fsgid();
@@ -425,6 +426,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
         if (!inode)
                 return NULL;
  
+       inode->i_ino = get_next_ino();
         inode->i_mode = mode;
         inode->i_uid = current_fsuid();
         inode->i_gid = current_fsgid();
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c

index 1ca6867935bbc62e4f733f703951d9f39c7bab93..77b4c04a2809831e7209d1be3f8e860a0ba7edc8 100644 (file)
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -796,13 +796,12 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
                 block_end = block_start + (1 << inode->i_blkbits);
  
                 /*
-                * block_start is block-aligned.  Bump it by one to
-                * force ocfs2_{prepare,commit}_write() to zero the
+                * block_start is block-aligned.  Bump it by one to force
+                * __block_write_begin and block_commit_write to zero the
                  * whole block.
                  */
-               ret = ocfs2_prepare_write_nolock(inode, page,
-                                                block_start + 1,
-                                                block_start + 1);
+               ret = __block_write_begin(page, block_start + 1, 0,
+                                         ocfs2_get_block);
                 if (ret < 0) {
                         mlog_errno(ret);
                         goto out_unlock;
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c

index e7bde21149aee4f4e1dfed501069358927a064d7..ff5744e1e36fcb9200158eb419abc3ebdf33ec5b 100644 (file)
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -742,7 +742,7 @@ static int ocfs2_link(struct dentry *old_dentry,
                 goto out_commit;
         }
  
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         dentry->d_op = &ocfs2_dentry_ops;
         d_instantiate(dentry, inode);
  
diff --git a/fs/pipe.c b/fs/pipe.c

index 37eb1ebeaa906ea068f940e6298f8cd632feacb4..d2d7566ce68e5cb4fc9713e531670d8f05d52d02 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -954,6 +954,8 @@ static struct inode * get_pipe_inode(void)
         if (!inode)
                 goto fail_inode;
  
+       inode->i_ino = get_next_ino();
+
         pipe = alloc_pipe_info(inode);
         if (!pipe)
                 goto fail_iput;
diff --git a/fs/proc/base.c b/fs/proc/base.c

index 53dc8ad40ae682b7ad2781782538b05c45da53b1..9b094c1c846542160ff6c265171467d5ef0050ff 100644 (file)
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -771,6 +771,8 @@ static const struct file_operations proc_single_file_operations = {
  static int mem_open(struct inode* inode, struct file* file)
  {
         file->private_data = (void*)((long)current->self_exec_id);
+       /* OK to pass negative loff_t, we can catch out-of-range */
+       file->f_mode |= FMODE_UNSIGNED_OFFSET;
         return 0;
  }
  
@@ -1646,6 +1648,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st
  
         /* Common stuff */
         ei = PROC_I(inode);
+       inode->i_ino = get_next_ino();
         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
         inode->i_op = &proc_def_inode_operations;
  
@@ -2592,6 +2595,7 @@ static struct dentry *proc_base_instantiate(struct inode *dir,
  
         /* Initialize the inode */
         ei = PROC_I(inode);
+       inode->i_ino = get_next_ino();
         inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
  
         /*
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c

index 2fc52552271d91cf904213ee98576e20237ec1da..b652cb00906b02927c7f4afa048d6bd83615936d 100644 (file)
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -23,6 +23,8 @@ static struct inode *proc_sys_make_inode(struct super_block *sb,
         if (!inode)
                 goto out;
  
+       inode->i_ino = get_next_ino();
+
         sysctl_head_get(head);
         ei = PROC_I(inode);
         ei->sysctl = head;
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c

index a5ebae70dc6d4c90213770630f185d56ec6d6009..67fadb1ad2c11a6428480aad801ee616d90eebac 100644 (file)
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -58,6 +58,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
         struct inode * inode = new_inode(sb);
  
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode_init_owner(inode, dir, mode);
                 inode->i_mapping->a_ops = &ramfs_aops;
                 inode->i_mapping->backing_dev_info = &ramfs_backing_dev_info;
diff --git a/fs/read_write.c b/fs/read_write.c

index e757ef26e4cecb6d47fd628b32f204e90c4b8497..9cd9d148105d287b55c9f60af4034f300cc6d1af 100644 (file)
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -31,6 +31,20 @@ const struct file_operations generic_ro_fops = {
  
  EXPORT_SYMBOL(generic_ro_fops);
  
+static int
+__negative_fpos_check(struct file *file, loff_t pos, size_t count)
+{
+       /*
+        * pos or pos+count is negative here, check overflow.
+        * too big "count" will be caught in rw_verify_area().
+        */
+       if ((pos < 0) && (pos + count < pos))
+               return -EOVERFLOW;
+       if (file->f_mode & FMODE_UNSIGNED_OFFSET)
+               return 0;
+       return -EINVAL;
+}
+
  /**
   * generic_file_llseek_unlocked - lockless generic llseek implementation
   * @file:      file structure to seek on
@@ -62,7 +76,9 @@ generic_file_llseek_unlocked(struct file *file, loff_t offset, int origin)
                 break;
         }
  
-       if (offset < 0 || offset > inode->i_sb->s_maxbytes)
+       if (offset < 0 && __negative_fpos_check(file, offset, 0))
+               return -EINVAL;
+       if (offset > inode->i_sb->s_maxbytes)
                 return -EINVAL;
  
         /* Special lock needed here? */
@@ -137,7 +153,7 @@ loff_t default_llseek(struct file *file, loff_t offset, int origin)
                         offset += file->f_pos;
         }
         retval = -EINVAL;
-       if (offset >= 0) {
+       if (offset >= 0 || !__negative_fpos_check(file, offset, 0)) {
                 if (offset != file->f_pos) {
                         file->f_pos = offset;
                         file->f_version = 0;
@@ -221,6 +237,7 @@ bad:
  }
  #endif
  
+
  /*
   * rw_verify_area doesn't like huge counts. We limit
   * them to something that fits in "int" so that others
@@ -238,8 +255,11 @@ int rw_verify_area(int read_write, struct file *file, loff_t *ppos, size_t count
         if (unlikely((ssize_t) count < 0))
                 return retval;
         pos = *ppos;
-       if (unlikely((pos < 0) || (loff_t) (pos + count) < 0))
-               return retval;
+       if (unlikely((pos < 0) || (loff_t) (pos + count) < 0)) {
+               retval = __negative_fpos_check(file, pos, count);
+               if (retval)
+                       return retval;
+       }
  
         if (unlikely(inode->i_flock && mandatory_lock(inode))) {
                 retval = locks_mandatory_area(
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c

index c1f93896cb538082ce0b4158bbdfb565edb523cd..41656d40dc5c87fc8bcfd9ec5aea4bc635ebc092 100644 (file)
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -22,8 +22,6 @@
  
  int reiserfs_commit_write(struct file *f, struct page *page,
                           unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-                          unsigned from, unsigned to);
  
  void reiserfs_evict_inode(struct inode *inode)
  {
@@ -165,7 +163,7 @@ inline void make_le_item_head(struct item_head *ih, const struct cpu_key *key,
  ** but tail is still sitting in a direct item, and we can't write to
  ** it.  So, look through this page, and check all the mapped buffers
  ** to make sure they have valid block numbers.  Any that don't need
-** to be unmapped, so that block_prepare_write will correctly call
+** to be unmapped, so that __block_write_begin will correctly call
  ** reiserfs_get_block to convert the tail into an unformatted node
  */
  static inline void fix_tail_page_for_writing(struct page *page)
@@ -439,13 +437,13 @@ static int reiserfs_bmap(struct inode *inode, sector_t block,
  }
  
  /* special version of get_block that is only used by grab_tail_page right
-** now.  It is sent to block_prepare_write, and when you try to get a
+** now.  It is sent to __block_write_begin, and when you try to get a
  ** block past the end of the file (or a block from a hole) it returns
-** -ENOENT instead of a valid buffer.  block_prepare_write expects to
+** -ENOENT instead of a valid buffer.  __block_write_begin expects to
  ** be able to do i/o on the buffers returned, unless an error value
  ** is also returned.
  **
-** So, this allows block_prepare_write to be used for reading a single block
+** So, this allows __block_write_begin to be used for reading a single block
  ** in a page.  Where it does not produce a valid page for holes, or past the
  ** end of the file.  This turns out to be exactly what we need for reading
  ** tails for conversion.
@@ -558,11 +556,12 @@ static int convert_tail_for_hole(struct inode *inode,
          **
          ** We must fix the tail page for writing because it might have buffers
          ** that are mapped, but have a block number of 0.  This indicates tail
-        ** data that has been read directly into the page, and block_prepare_write
-        ** won't trigger a get_block in this case.
+        ** data that has been read directly into the page, and
+        ** __block_write_begin won't trigger a get_block in this case.
          */
         fix_tail_page_for_writing(tail_page);
-       retval = reiserfs_prepare_write(NULL, tail_page, tail_start, tail_end);
+       retval = __reiserfs_write_begin(tail_page, tail_start,
+                                     tail_end - tail_start);
         if (retval)
                 goto unlock;
  
@@ -2033,7 +2032,7 @@ static int grab_tail_page(struct inode *inode,
         /* start within the page of the last block in the file */
         start = (offset / blocksize) * blocksize;
  
-       error = block_prepare_write(page, start, offset,
+       error = __block_write_begin(page, start, offset - start,
                                     reiserfs_get_block_create_0);
         if (error)
                 goto unlock;
@@ -2628,8 +2627,7 @@ static int reiserfs_write_begin(struct file *file,
         return ret;
  }
  
-int reiserfs_prepare_write(struct file *f, struct page *page,
-                          unsigned from, unsigned to)
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len)
  {
         struct inode *inode = page->mapping->host;
         int ret;
@@ -2650,7 +2648,7 @@ int reiserfs_prepare_write(struct file *f, struct page *page,
                 th->t_refcount++;
         }
  
-       ret = block_prepare_write(page, from, to, reiserfs_get_block);
+       ret = __block_write_begin(page, from, len, reiserfs_get_block);
         if (ret && reiserfs_transaction_running(inode->i_sb)) {
                 struct reiserfs_transaction_handle *th = current->journal_info;
                 /* this gets a little ugly.  If reiserfs_get_block returned an
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c

index 5cbb81e134aca031b21e3c88661400ff1c1b174f..adf22b485ceac01e0f5ed7b0a8d1593956a7b81f 100644 (file)
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -160,8 +160,6 @@ long reiserfs_compat_ioctl(struct file *file, unsigned int cmd,
  
  int reiserfs_commit_write(struct file *f, struct page *page,
                           unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-                          unsigned from, unsigned to);
  /*
  ** reiserfs_unpack
  ** Function try to convert tail from direct item into indirect.
@@ -200,7 +198,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
         }
  
         /* we unpack by finding the page with the tail, and calling
-        ** reiserfs_prepare_write on that page.  This will force a
+        ** __reiserfs_write_begin on that page.  This will force a
          ** reiserfs_get_block to unpack the tail for us.
          */
         index = inode->i_size >> PAGE_CACHE_SHIFT;
@@ -210,7 +208,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
         if (!page) {
                 goto out;
         }
-       retval = reiserfs_prepare_write(NULL, page, write_from, write_from);
+       retval = __reiserfs_write_begin(page, write_from, 0);
         if (retval)
                 goto out_unlock;
  
diff --git a/fs/reiserfs/namei.c b/fs/reiserfs/namei.c

index ee78d4a0086a983609749a72ec0fb840615255fd..ba5f51ec345829499982f17196fc61ac444429a0 100644 (file)
--- a/fs/reiserfs/namei.c
+++ b/fs/reiserfs/namei.c
@@ -1156,7 +1156,7 @@ static int reiserfs_link(struct dentry *old_dentry, struct inode *dir,
         inode->i_ctime = CURRENT_TIME_SEC;
         reiserfs_update_sd(&th, inode);
  
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         d_instantiate(dentry, inode);
         retval = journal_end(&th, dir->i_sb, jbegin_count);
         reiserfs_write_unlock(dir->i_sb);
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c

index 8c4cf273c672185fe7cb4c5d0a6e30ddc85e5bf3..5d04a7828e7a4ee05d27446cbc8ee7fdfb433ad9 100644 (file)
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -418,13 +418,11 @@ static inline __u32 xattr_hash(const char *msg, int len)
  
  int reiserfs_commit_write(struct file *f, struct page *page,
                           unsigned from, unsigned to);
-int reiserfs_prepare_write(struct file *f, struct page *page,
-                          unsigned from, unsigned to);
  
  static void update_ctime(struct inode *inode)
  {
         struct timespec now = current_fs_time(inode->i_sb);
-       if (hlist_unhashed(&inode->i_hash) || !inode->i_nlink ||
+       if (inode_unhashed(inode) || !inode->i_nlink ||
             timespec_equal(&inode->i_ctime, &now))
                 return;
  
@@ -532,8 +530,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
                         rxh->h_hash = cpu_to_le32(xahash);
                 }
  
-               err = reiserfs_prepare_write(NULL, page, page_offset,
-                                           page_offset + chunk + skip);
+               err = __reiserfs_write_begin(page, page_offset, chunk + skip);
                 if (!err) {
                         if (buffer)
                                 memcpy(data + skip, buffer + buffer_pos, chunk);
diff --git a/fs/seq_file.c b/fs/seq_file.c

index 0e7cb1395a94cfb2c98aad791672576315b88526..05d6b0e78c959a341137c97fbb2ea2fa89b25197 100644 (file)
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -462,9 +462,7 @@ int seq_path_root(struct seq_file *m, struct path *path, struct path *root,
         if (size) {
                 char *p;
  
-               spin_lock(&dcache_lock);
                 p = __d_path(path, root, buf, size);
-               spin_unlock(&dcache_lock);
                 res = PTR_ERR(p);
                 if (!IS_ERR(p)) {
                         char *end = mangle_path(buf, p, esc);
diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c

index 00a70cab1f36ea7fc7bd96317d4bccdd8486a736..f678d421e54166917f7c8ca7cf3a51632cbd1d52 100644 (file)
--- a/fs/smbfs/dir.c
+++ b/fs/smbfs/dir.c
@@ -406,21 +406,15 @@ void
  smb_renew_times(struct dentry * dentry)
  {
         dget(dentry);
-       spin_lock(&dentry->d_lock);
-       for (;;) {
-               struct dentry *parent;
+       dentry->d_time = jiffies;
  
-               dentry->d_time = jiffies;
-               if (IS_ROOT(dentry))
-                       break;
-               parent = dentry->d_parent;
-               dget(parent);
-               spin_unlock(&dentry->d_lock);
+       while (!IS_ROOT(dentry)) {
+               struct dentry *parent = dget_parent(dentry);
                 dput(dentry);
                 dentry = parent;
-               spin_lock(&dentry->d_lock);
+
+               dentry->d_time = jiffies;
         }
-       spin_unlock(&dentry->d_lock);
         dput(dentry);
  }
  
diff --git a/fs/smbfs/inode.c b/fs/smbfs/inode.c

index 8fc5e50e142fad71ad05aa38f726ae0d6d4ca566..f6e9ee59757ec29c11be43e6fcab2713bab2562e 100644 (file)
--- a/fs/smbfs/inode.c
+++ b/fs/smbfs/inode.c
@@ -229,7 +229,6 @@ smb_invalidate_inodes(struct smb_sb_info *server)
  {
         VERBOSE("\n");
         shrink_dcache_sb(SB_of(server));
-       invalidate_inodes(SB_of(server));
  }
  
  /*
diff --git a/fs/smbfs/proc.c b/fs/smbfs/proc.c

index 71c29b6670b402719f8475ed34b39a306bda807d..3dcf638d4d3a0ff836fd9aba946361edd894b630 100644 (file)
--- a/fs/smbfs/proc.c
+++ b/fs/smbfs/proc.c
@@ -332,16 +332,15 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
          * and store it in reversed order [see reverse_string()]
          */
         dget(entry);
-       spin_lock(&entry->d_lock);
         while (!IS_ROOT(entry)) {
                 struct dentry *parent;
  
                 if (maxlen < (3<<unicode)) {
-                       spin_unlock(&entry->d_lock);
                         dput(entry);
                         return -ENAMETOOLONG;
                 }
  
+               spin_lock(&entry->d_lock);
                 len = server->ops->convert(path, maxlen-2, 
                                       entry->d_name.name, entry->d_name.len,
                                       server->local_nls, server->remote_nls);
@@ -359,15 +358,12 @@ static int smb_build_path(struct smb_sb_info *server, unsigned char *buf,
                 }
                 *path++ = '\\';
                 maxlen -= len+1;
-
-               parent = entry->d_parent;
-               dget(parent);
                 spin_unlock(&entry->d_lock);
+
+               parent = dget_parent(entry);
                 dput(entry);
                 entry = parent;
-               spin_lock(&entry->d_lock);
         }
-       spin_unlock(&entry->d_lock);
         dput(entry);
         reverse_string(buf, path-buf);
  
diff --git a/fs/super.c b/fs/super.c

index 8819e3a7ff203fb521b537672d16d5e2d4cea80d..b9c9869165db359d018134e99e0a39e2738e6dba 100644 (file)
--- a/fs/super.c
+++ b/fs/super.c
@@ -273,14 +273,14 @@ void generic_shutdown_super(struct super_block *sb)
                 get_fs_excl();
                 sb->s_flags &= ~MS_ACTIVE;
  
-               /* bad name - it should be evict_inodes() */
-               invalidate_inodes(sb);
+               fsnotify_unmount_inodes(&sb->s_inodes);
+
+               evict_inodes(sb);
  
                 if (sop->put_super)
                         sop->put_super(sb);
  
-               /* Forget any remaining inodes */
-               if (invalidate_inodes(sb)) {
+               if (!list_empty(&sb->s_inodes)) {
                         printk("VFS: Busy inodes after unmount of %s. "
                            "Self-destruct in 5 seconds.  Have a nice day...\n",
                            sb->s_id);
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c

index 33e047b59b8d659975d6c325835f0d11a2df27d4..11e7f7d11cd06fe85bf694397d3608b805db1fe3 100644 (file)
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -126,7 +126,7 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
  
         inode->i_ctime = CURRENT_TIME_SEC;
         inode_inc_link_count(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
  
         return add_nondir(dentry, inode);
  }
diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c

index 87ebcce72213e2889f66127125908190a4ef213d..14f64b689d7f58edef4be03d58a8712c442f62ef 100644 (file)
--- a/fs/ubifs/dir.c
+++ b/fs/ubifs/dir.c
@@ -550,7 +550,7 @@ static int ubifs_link(struct dentry *old_dentry, struct inode *dir,
  
         lock_2_inodes(dir, inode);
         inc_nlink(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         inode->i_ctime = ubifs_current_time(inode);
         dir->i_size += sz_change;
         dir_ui->ui_size = dir->i_size;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c

index bf5fc674193c8bba9e5e31589ba883d49f106270..6d8dc02baebb57e05e5839464b24e8f443263546 100644 (file)
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -1101,7 +1101,7 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
         inc_nlink(inode);
         inode->i_ctime = current_fs_time(inode->i_sb);
         mark_inode_dirty(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         d_instantiate(dentry, inode);
         unlock_kernel();
  
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c

index b056f02b1fb306c810f4b429ae99417a8f6238e5..12f39b9e4437db73784cd77b8e7f898457d157db 100644 (file)
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -180,7 +180,7 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
  
         inode->i_ctime = CURRENT_TIME_SEC;
         inode_inc_link_count(inode);
-       atomic_inc(&inode->i_count);
+       ihold(inode);
  
         error = ufs_add_nondir(dentry, inode);
         unlock_kernel();
diff --git a/fs/xfs/linux-2.6/xfs_buf.c b/fs/xfs/linux-2.6/xfs_buf.c

index ba5312802aa99dfe266148b90d9dfa8e3f54e19d..63fd2c07cb57d861837bf463d9ad7b3aa5392088 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_buf.c
+++ b/fs/xfs/linux-2.6/xfs_buf.c
@@ -1580,6 +1580,7 @@ xfs_mapping_buftarg(
                         XFS_BUFTARG_NAME(btp));
                 return ENOMEM;
         }
+       inode->i_ino = get_next_ino();
         inode->i_mode = S_IFBLK;
         inode->i_bdev = bdev;
         inode->i_rdev = bdev->bd_dev;
diff --git a/fs/xfs/linux-2.6/xfs_iops.c b/fs/xfs/linux-2.6/xfs_iops.c

index ec858e09d546bf3bf34ca1dcbe0f7b74cb52ffa9..96107efc0c61908c3f1a94a646604836fc2131e1 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_iops.c
+++ b/fs/xfs/linux-2.6/xfs_iops.c
@@ -317,7 +317,7 @@ xfs_vn_link(
         if (unlikely(error))
                 return -error;
  
-       atomic_inc(&inode->i_count);
+       ihold(inode);
         d_instantiate(dentry, inode);
         return 0;
  }
@@ -760,7 +760,9 @@ xfs_setup_inode(
  
         inode->i_ino = ip->i_ino;
         inode->i_state = I_NEW;
-       inode_add_to_lists(ip->i_mount->m_super, inode);
+
+       inode_sb_list_add(inode);
+       insert_inode_hash(inode);
  
         inode->i_mode   = ip->i_d.di_mode;
         inode->i_nlink  = ip->i_d.di_nlink;
diff --git a/fs/xfs/linux-2.6/xfs_super.c b/fs/xfs/linux-2.6/xfs_super.c

index ab31ce5aeaf9cde4c5c008e1492c7a42f5399863..cf808782c0650d86e967e149a8fac2baf80dd093 100644 (file)
--- a/fs/xfs/linux-2.6/xfs_super.c
+++ b/fs/xfs/linux-2.6/xfs_super.c
@@ -576,7 +576,7 @@ xfs_max_file_offset(
  
         /* Figure out maximum filesize, on Linux this can depend on
          * the filesystem blocksize (on 32 bit platforms).
-        * __block_prepare_write does this in an [unsigned] long...
+        * __block_write_begin does this in an [unsigned] long...
          *      page->index << (PAGE_CACHE_SHIFT - bbits)
          * So, for page sized blocks (4K on 32 bit platforms),
          * this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h

index fac52290de90c050b6ab0c9ea5d208541cd4c839..fb2ca2e4cdc935c112d1504a85aca3130f511179 100644 (file)
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -500,7 +500,7 @@ void                xfs_mark_inode_dirty_sync(xfs_inode_t *);
  #define IHOLD(ip) \
  do { \
         ASSERT(atomic_read(&VFS_I(ip)->i_count) > 0) ; \
-       atomic_inc(&(VFS_I(ip)->i_count)); \
+       ihold(VFS_I(ip)); \
         trace_xfs_ihold(ip, _THIS_IP_); \
  } while (0)
  
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h

index dd1b25b2641c6474925537ced10bd45831912754..68d1fe7b877c82e2d302fb31f3702836bc6d6843 100644 (file)
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -212,7 +212,6 @@ int generic_write_end(struct file *, struct address_space *,
                                 loff_t, unsigned, unsigned,
                                 struct page *, void *);
  void page_zero_new_buffers(struct page *page, unsigned from, unsigned to);
-int block_prepare_write(struct page*, unsigned, unsigned, get_block_t*);
  int cont_write_begin(struct file *, struct address_space *, loff_t,
                         unsigned, unsigned, struct page **, void **,
                         get_block_t *, loff_t *);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 4658777b41cc24d7a6cca8465cdbf72399e1fed5..240eb1d4f87645672217429ed0699cf21ce84d72 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -92,6 +92,9 @@ struct inodes_stat_t {
  /* Expect random access pattern */
  #define FMODE_RANDOM           ((__force fmode_t)0x1000)
  
+/* File is huge (eg. /dev/kmem): treat loff_t as unsigned */
+#define FMODE_UNSIGNED_OFFSET  ((__force fmode_t)0x2000)
+
  /* File was opened by fanotify and shouldn't generate fanotify events */
  #define FMODE_NONOTIFY         ((__force fmode_t)0x1000000)
  
@@ -722,7 +725,8 @@ struct posix_acl;
  
  struct inode {
         struct hlist_node       i_hash;
-       struct list_head        i_list;         /* backing dev IO list */
+       struct list_head        i_wb_list;      /* backing dev IO list */
+       struct list_head        i_lru;          /* inode LRU list */
         struct list_head        i_sb_list;
         struct list_head        i_dentry;
         unsigned long           i_ino;
@@ -789,6 +793,11 @@ struct inode {
         void                    *i_private; /* fs or device private pointer */
  };
  
+static inline int inode_unhashed(struct inode *inode)
+{
+       return hlist_unhashed(&inode->i_hash);
+}
+
  /*
   * inode->i_mutex nesting subclasses for the lock validator:
   *
@@ -1639,16 +1648,17 @@ struct super_operations {
   *
   * Q: What is the difference between I_WILL_FREE and I_FREEING?
   */
-#define I_DIRTY_SYNC           1
-#define I_DIRTY_DATASYNC       2
-#define I_DIRTY_PAGES          4
+#define I_DIRTY_SYNC           (1 << 0)
+#define I_DIRTY_DATASYNC       (1 << 1)
+#define I_DIRTY_PAGES          (1 << 2)
  #define __I_NEW                        3
  #define I_NEW                  (1 << __I_NEW)
-#define I_WILL_FREE            16
-#define I_FREEING              32
-#define I_CLEAR                        64
+#define I_WILL_FREE            (1 << 4)
+#define I_FREEING              (1 << 5)
+#define I_CLEAR                        (1 << 6)
  #define __I_SYNC               7
  #define I_SYNC                 (1 << __I_SYNC)
+#define I_REFERENCED           (1 << 8)
  
  #define I_DIRTY (I_DIRTY_SYNC | I_DIRTY_DATASYNC | I_DIRTY_PAGES)
  
@@ -1740,6 +1750,7 @@ static inline void file_accessed(struct file *file)
  }
  
  int sync_inode(struct inode *inode, struct writeback_control *wbc);
+int sync_inode_metadata(struct inode *inode, int wait);
  
  struct file_system_type {
         const char *name;
@@ -2084,7 +2095,6 @@ extern int check_disk_change(struct block_device *);
  extern int __invalidate_device(struct block_device *);
  extern int invalidate_partition(struct gendisk *, int);
  #endif
-extern int invalidate_inodes(struct super_block *);
  unsigned long invalidate_mapping_pages(struct address_space *mapping,
                                         pgoff_t start, pgoff_t end);
  
@@ -2168,7 +2178,7 @@ extern loff_t vfs_llseek(struct file *file, loff_t offset, int origin);
  
  extern int inode_init_always(struct super_block *, struct inode *);
  extern void inode_init_once(struct inode *);
-extern void inode_add_to_lists(struct super_block *, struct inode *);
+extern void ihold(struct inode * inode);
  extern void iput(struct inode *);
  extern struct inode * igrab(struct inode *);
  extern ino_t iunique(struct super_block *, ino_t);
@@ -2188,11 +2198,11 @@ extern struct inode * iget_locked(struct super_block *, unsigned long);
  extern int insert_inode_locked4(struct inode *, unsigned long, int (*test)(struct inode *, void *), void *);
  extern int insert_inode_locked(struct inode *);
  extern void unlock_new_inode(struct inode *);
+extern unsigned int get_next_ino(void);
  
  extern void __iget(struct inode * inode);
  extern void iget_failed(struct inode *);
  extern void end_writeback(struct inode *);
-extern void destroy_inode(struct inode *);
  extern void __destroy_inode(struct inode *);
  extern struct inode *new_inode(struct super_block *);
  extern int should_remove_suid(struct dentry *);
@@ -2200,9 +2210,11 @@ extern int file_remove_suid(struct file *);
  
  extern void __insert_inode_hash(struct inode *, unsigned long hashval);
  extern void remove_inode_hash(struct inode *);
-static inline void insert_inode_hash(struct inode *inode) {
+static inline void insert_inode_hash(struct inode *inode)
+{
         __insert_inode_hash(inode, inode->i_ino);
  }
+extern void inode_sb_list_add(struct inode *inode);
  
  #ifdef CONFIG_BLOCK
  extern void submit_bio(int, struct bio *);
@@ -2485,7 +2497,10 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
  struct ctl_table;
  int proc_nr_files(struct ctl_table *table, int write,
                   void __user *buffer, size_t *lenp, loff_t *ppos);
-
+int proc_nr_dentry(struct ctl_table *table, int write,
+                 void __user *buffer, size_t *lenp, loff_t *ppos);
+int proc_nr_inodes(struct ctl_table *table, int write,
+                  void __user *buffer, size_t *lenp, loff_t *ppos);
  int __init get_filesystem_list(char *buf);
  
  #define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
diff --git a/include/linux/list.h b/include/linux/list.h

index 88a000617d775fb74333a5f54aceac8460cb5008..9a5f8a71810c55f231565422f8912bea5bbe2d7b 100644 (file)
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -636,6 +636,12 @@ static inline void hlist_add_after(struct hlist_node *n,
                 next->next->pprev  = &next->next;
  }
  
+/* after that we'll appear to be on some hlist and hlist_del will work */
+static inline void hlist_add_fake(struct hlist_node *n)
+{
+       n->pprev = &n->next;
+}
+
  /*
   * Move a list from one list head to another. Fixup the pprev
   * reference of the first entry if it exists.
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h

index 91a4177e60ce3779e21c7a3fd8bfbc08858d43f3..5ca47e59b7278ffbb0dd4fc624d9e4cf1697d0ae 100644 (file)
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -2072,6 +2072,8 @@ void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
  void i_attrs_to_sd_attrs(struct inode *inode, __u16 * sd_attrs);
  int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
  
+int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
+
  /* namei.c */
  void set_de_name_and_namelen(struct reiserfs_dir_entry *de);
  int search_by_entry_key(struct super_block *sb, const struct cpu_key *key,
diff --git a/include/linux/writeback.h b/include/linux/writeback.h

index c7299d2ace6b373d802a01aee3216acfa8a64f41..d5c7aaadda59a926032794a4d3a27a46f01bab3c 100644 (file)
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -10,8 +10,6 @@
  struct backing_dev_info;
  
  extern spinlock_t inode_lock;
-extern struct list_head inode_in_use;
-extern struct list_head inode_unused;
  
  /*
   * fs/fs-writeback.c
diff --git a/ipc/mqueue.c b/ipc/mqueue.c

index e1e7b9635f5da7c986dfd18426bce94c177d6630..3a61ffefe88472af269e0bc218d6f8eb2959a1c9 100644 (file)
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -116,6 +116,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb,
  
         inode = new_inode(sb);
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_uid = current_fsuid();
                 inode->i_gid = current_fsgid();
@@ -769,7 +770,7 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
  
         inode = dentry->d_inode;
         if (inode)
-               atomic_inc(&inode->i_count);
+               ihold(inode);
         err = mnt_want_write(ipc_ns->mq_mnt);
         if (err)
                 goto out_err;
diff --git a/kernel/cgroup.c b/kernel/cgroup.c

index 7b69b8d0313d63a92bd4efaf4d07676456fe71fc..9270d532ec3c7c6a5be2cf3d1eca21788036b81f 100644 (file)
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -777,6 +777,7 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb)
         struct inode *inode = new_inode(sb);
  
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_uid = current_fsuid();
                 inode->i_gid = current_fsgid();
diff --git a/kernel/futex.c b/kernel/futex.c

index a118bf160e0b05a4b24404beda19d07adead5161..6c683b37f2ce25291a5a0c8f490a16c08c84d674 100644 (file)
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -169,7 +169,7 @@ static void get_futex_key_refs(union futex_key *key)
  
         switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) {
         case FUT_OFF_INODE:
-               atomic_inc(&key->shared.inode->i_count);
+               ihold(key->shared.inode);
                 break;
         case FUT_OFF_MMSHARED:
                 atomic_inc(&key->private.mm->mm_count);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c

index 48d9d689498fbaa394e4d6ec8f4403c90767d7f1..c33a1edb799fda6db2e16fdf9d1d0401a9f78278 100644 (file)
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1338,14 +1338,14 @@ static struct ctl_table fs_table[] = {
                 .data           = &inodes_stat,
                 .maxlen         = 2*sizeof(int),
                 .mode           = 0444,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_nr_inodes,
         },
         {
                 .procname       = "inode-state",
                 .data           = &inodes_stat,
                 .maxlen         = 7*sizeof(int),
                 .mode           = 0444,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_nr_inodes,
         },
         {
                 .procname       = "file-nr",
@@ -1375,7 +1375,7 @@ static struct ctl_table fs_table[] = {
                 .data           = &dentry_stat,
                 .maxlen         = 6*sizeof(int),
                 .mode           = 0444,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_nr_dentry,
         },
         {
                 .procname       = "overflowuid",
diff --git a/mm/backing-dev.c b/mm/backing-dev.c

index f2eb27884ffa88c99d4662fa2cdb3035f216289c..027100d30227fead0a4010d1feb0e2791a98fcaa 100644 (file)
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -74,11 +74,11 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
  
         nr_wb = nr_dirty = nr_io = nr_more_io = 0;
         spin_lock(&inode_lock);
-       list_for_each_entry(inode, &wb->b_dirty, i_list)
+       list_for_each_entry(inode, &wb->b_dirty, i_wb_list)
                 nr_dirty++;
-       list_for_each_entry(inode, &wb->b_io, i_list)
+       list_for_each_entry(inode, &wb->b_io, i_wb_list)
                 nr_io++;
-       list_for_each_entry(inode, &wb->b_more_io, i_list)
+       list_for_each_entry(inode, &wb->b_more_io, i_wb_list)
                 nr_more_io++;
         spin_unlock(&inode_lock);
  
diff --git a/mm/shmem.c b/mm/shmem.c

index 080b09a57a8fab1a4565757d13bb7a4fdb74b74a..f6d350e8adc56da67313a782569e81c34af27286 100644 (file)
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1586,6 +1586,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
  
         inode = new_inode(sb);
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode_init_owner(inode, dir, mode);
                 inode->i_blocks = 0;
                 inode->i_mapping->backing_dev_info = &shmem_backing_dev_info;
@@ -1903,7 +1904,7 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr
         dir->i_size += BOGO_DIRENT_SIZE;
         inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME;
         inc_nlink(inode);
-       atomic_inc(&inode->i_count);    /* New dentry reference */
+       ihold(inode);   /* New dentry reference */
         dget(dentry);           /* Extra pinning count for the created dentry */
         d_instantiate(dentry, inode);
  out:
@@ -2146,7 +2147,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
         if (*len < 3)
                 return 255;
  
-       if (hlist_unhashed(&inode->i_hash)) {
+       if (inode_unhashed(inode)) {
                 /* Unfortunately insert_inode_hash is not idempotent,
                  * so as we hash inodes here rather than at creation
                  * time, we need a lock to ensure we only try
@@ -2154,7 +2155,7 @@ static int shmem_encode_fh(struct dentry *dentry, __u32 *fh, int *len,
                  */
                 static DEFINE_SPINLOCK(lock);
                 spin_lock(&lock);
-               if (hlist_unhashed(&inode->i_hash))
+               if (inode_unhashed(inode))
                         __insert_inode_hash(inode,
                                             inode->i_ino + inode->i_generation);
                 spin_unlock(&lock);
diff --git a/net/socket.c b/net/socket.c

index 7f67c072d4969670a0355c7aaad033dbdc651a5f..ee3cd280c76e9cfb58024fa22f3017352f0467bc 100644 (file)
--- a/net/socket.c
+++ b/net/socket.c
@@ -377,7 +377,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags)
                   &socket_file_ops);
         if (unlikely(!file)) {
                 /* drop dentry, keep inode */
-               atomic_inc(&path.dentry->d_inode->i_count);
+               ihold(path.dentry->d_inode);
                 path_put(&path);
                 put_unused_fd(fd);
                 return -ENFILE;
@@ -480,6 +480,7 @@ static struct socket *sock_alloc(void)
         sock = SOCKET_I(inode);
  
         kmemcheck_annotate_bitfield(sock, type);
+       inode->i_ino = get_next_ino();
         inode->i_mode = S_IFSOCK | S_IRWXUGO;
         inode->i_uid = current_fsuid();
         inode->i_gid = current_fsgid();
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c

index 52f252432144ab439c9083acf20e8d487fbbd12d..7df92d237cb8103171a892f49a98aaca1cd05e49 100644 (file)
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -445,6 +445,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
         struct inode *inode = new_inode(sb);
         if (!inode)
                 return NULL;
+       inode->i_ino = get_next_ino();
         inode->i_mode = mode;
         inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
         switch(mode & S_IFMT) {
diff --git a/security/apparmor/path.c b/security/apparmor/path.c

index 82396050f18646ac0519352321637e930c05e367..36cc0cc39e78e135630384e6500f4a789fe7aa2a 100644 (file)
--- a/security/apparmor/path.c
+++ b/security/apparmor/path.c
@@ -72,10 +72,8 @@ static int d_namespace_path(struct path *path, char *buf, int buflen,
                 path_get(&root);
         }
  
-       spin_lock(&dcache_lock);
         tmp = root;
         res = __d_path(path, &tmp, buf, buflen);
-       spin_unlock(&dcache_lock);
  
         *name = res;
         /* handle error conditions - and still allow a partial path to
diff --git a/security/inode.c b/security/inode.c

index 88839866cbcd029a52422b51cf163ec8167a4034..cb8f47c66a58bb7eb11c0a75319f378261393f14 100644 (file)
--- a/security/inode.c
+++ b/security/inode.c
@@ -61,6 +61,7 @@ static struct inode *get_inode(struct super_block *sb, int mode, dev_t dev)
         struct inode *inode = new_inode(sb);
  
         if (inode) {
+               inode->i_ino = get_next_ino();
                 inode->i_mode = mode;
                 inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
                 switch (mode & S_IFMT) {
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c

index 87e0556bae70ff977ea290b3cdfcc2c308d8edf5..55a755c1a1bd9546a4655e83351cc93bc92cf655 100644 (file)
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -978,6 +978,7 @@ static struct inode *sel_make_inode(struct super_block *sb, int mode)
         struct inode *ret = new_inode(sb);
  
         if (ret) {
+               ret->i_ino = get_next_ino();
                 ret->i_mode = mode;
                 ret->i_atime = ret->i_mtime = ret->i_ctime = CURRENT_TIME;
         }
diff --git a/security/tomoyo/realpath.c b/security/tomoyo/realpath.c

index ed8ccd680102b53d98f9b98cb9a3aa05c8143090..1d0bf8fa1922fdedec280ba5085f8cafe43d1d68 100644 (file)
--- a/security/tomoyo/realpath.c
+++ b/security/tomoyo/realpath.c
@@ -127,10 +127,8 @@ char *tomoyo_realpath_from_path(struct path *path)
                 /* If we don't have a vfsmount, we can't calculate. */
                 if (!path->mnt)
                         break;
-               spin_lock(&dcache_lock);
                 /* go to whatever namespace root we are under */
                 pos = __d_path(path, &ns_root, buf, buf_len);
-               spin_unlock(&dcache_lock);
                 /* Prepend "/proc" prefix if using internal proc vfs mount. */
                 if (!IS_ERR(pos) && (path->mnt->mnt_flags & MNT_INTERNAL) &&
                     (path->mnt->mnt_sb->s_magic == PROC_SUPER_MAGIC)) {
author	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 27 Oct 2010 00:58:44 +0000 (17:58 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Wed, 27 Oct 2010 00:58:44 +0000 (17:58 -0700)
Documentation/filesystems/Locking		patch \| blob \| blame \| history
Documentation/filesystems/sharedsubtree.txt		patch \| blob \| blame \| history
drivers/char/mem.c		patch \| blob \| blame \| history
drivers/infiniband/hw/ipath/ipath_fs.c		patch \| blob \| blame \| history
drivers/infiniband/hw/qib/qib_fs.c		patch \| blob \| blame \| history
drivers/misc/ibmasm/ibmasmfs.c		patch \| blob \| blame \| history
drivers/oprofile/oprofilefs.c		patch \| blob \| blame \| history
drivers/staging/pohmelfs/inode.c		patch \| blob \| blame \| history
drivers/usb/core/inode.c		patch \| blob \| blame \| history
drivers/usb/gadget/f_fs.c		patch \| blob \| blame \| history
drivers/usb/gadget/inode.c		patch \| blob \| blame \| history
fs/9p/vfs_inode.c		patch \| blob \| blame \| history
fs/affs/file.c		patch \| blob \| blame \| history
fs/affs/inode.c		patch \| blob \| blame \| history
fs/afs/dir.c		patch \| blob \| blame \| history
fs/aio.c		patch \| blob \| blame \| history
fs/anon_inodes.c		patch \| blob \| blame \| history
fs/autofs4/inode.c		patch \| blob \| blame \| history
fs/bfs/dir.c		patch \| blob \| blame \| history
fs/binfmt_misc.c		patch \| blob \| blame \| history
fs/block_dev.c		patch \| blob \| blame \| history
fs/btrfs/inode.c		patch \| blob \| blame \| history
fs/buffer.c		patch \| blob \| blame \| history
fs/coda/dir.c		patch \| blob \| blame \| history
fs/configfs/inode.c		patch \| blob \| blame \| history
fs/dcache.c		patch \| blob \| blame \| history
fs/debugfs/inode.c		patch \| blob \| blame \| history
fs/exofs/file.c		patch \| blob \| blame \| history
fs/exofs/namei.c		patch \| blob \| blame \| history
fs/exportfs/expfs.c		patch \| blob \| blame \| history
fs/ext2/dir.c		patch \| blob \| blame \| history
fs/ext2/ext2.h		patch \| blob \| blame \| history
fs/ext2/inode.c		patch \| blob \| blame \| history
fs/ext2/namei.c		patch \| blob \| blame \| history
fs/ext2/super.c		patch \| blob \| blame \| history
fs/ext2/xattr.c		patch \| blob \| blame \| history
fs/ext3/inode.c		patch \| blob \| blame \| history
fs/ext3/namei.c		patch \| blob \| blame \| history
fs/ext4/inode.c		patch \| blob \| blame \| history
fs/ext4/mballoc.c		patch \| blob \| blame \| history
fs/ext4/namei.c		patch \| blob \| blame \| history
fs/freevxfs/vxfs_inode.c		patch \| blob \| blame \| history
fs/fs-writeback.c		patch \| blob \| blame \| history
fs/fuse/control.c		patch \| blob \| blame \| history
fs/gfs2/aops.c		patch \| blob \| blame \| history
fs/gfs2/ops_fstype.c		patch \| blob \| blame \| history
fs/gfs2/ops_inode.c		patch \| blob \| blame \| history
fs/gfs2/super.c		patch \| blob \| blame \| history
fs/hfs/hfs_fs.h		patch \| blob \| blame \| history
fs/hfs/inode.c		patch \| blob \| blame \| history
fs/hfs/mdb.c		patch \| blob \| blame \| history
fs/hfs/super.c		patch \| blob \| blame \| history
fs/hfsplus/dir.c		patch \| blob \| blame \| history
fs/hfsplus/inode.c		patch \| blob \| blame \| history
fs/hugetlbfs/inode.c		patch \| blob \| blame \| history
fs/inode.c		patch \| blob \| blame \| history
fs/internal.h		patch \| blob \| blame \| history
fs/isofs/inode.c		patch \| blob \| blame \| history
fs/jffs2/dir.c		patch \| blob \| blame \| history
fs/jfs/jfs_imap.c		patch \| blob \| blame \| history
fs/jfs/jfs_txnmgr.c		patch \| blob \| blame \| history
fs/jfs/namei.c		patch \| blob \| blame \| history
fs/libfs.c		patch \| blob \| blame \| history
fs/logfs/dir.c		patch \| blob \| blame \| history
fs/minix/namei.c		patch \| blob \| blame \| history
fs/namei.c		patch \| blob \| blame \| history
fs/namespace.c		patch \| blob \| blame \| history
fs/nfs/dir.c		patch \| blob \| blame \| history
fs/nfs/getroot.c		patch \| blob \| blame \| history
fs/nfsd/vfs.c		patch \| blob \| blame \| history
fs/nilfs2/namei.c		patch \| blob \| blame \| history
fs/notify/fsnotify.c		patch \| blob \| blame \| history
fs/notify/inode_mark.c		patch \| blob \| blame \| history
fs/ntfs/super.c		patch \| blob \| blame \| history
fs/ocfs2/aops.c		patch \| blob \| blame \| history
fs/ocfs2/aops.h		patch \| blob \| blame \| history
fs/ocfs2/dlmfs/dlmfs.c		patch \| blob \| blame \| history
fs/ocfs2/file.c		patch \| blob \| blame \| history
fs/ocfs2/namei.c		patch \| blob \| blame \| history
fs/pipe.c		patch \| blob \| blame \| history
fs/proc/base.c		patch \| blob \| blame \| history
fs/proc/proc_sysctl.c		patch \| blob \| blame \| history
fs/ramfs/inode.c		patch \| blob \| blame \| history
fs/read_write.c		patch \| blob \| blame \| history
fs/reiserfs/inode.c		patch \| blob \| blame \| history
fs/reiserfs/ioctl.c		patch \| blob \| blame \| history
fs/reiserfs/namei.c		patch \| blob \| blame \| history
fs/reiserfs/xattr.c		patch \| blob \| blame \| history
fs/seq_file.c		patch \| blob \| blame \| history
fs/smbfs/dir.c		patch \| blob \| blame \| history
fs/smbfs/inode.c		patch \| blob \| blame \| history
fs/smbfs/proc.c		patch \| blob \| blame \| history
fs/super.c		patch \| blob \| blame \| history
fs/sysv/namei.c		patch \| blob \| blame \| history
fs/ubifs/dir.c		patch \| blob \| blame \| history
fs/udf/namei.c		patch \| blob \| blame \| history
fs/ufs/namei.c		patch \| blob \| blame \| history
fs/xfs/linux-2.6/xfs_buf.c		patch \| blob \| blame \| history
fs/xfs/linux-2.6/xfs_iops.c		patch \| blob \| blame \| history
fs/xfs/linux-2.6/xfs_super.c		patch \| blob \| blame \| history
fs/xfs/xfs_inode.h		patch \| blob \| blame \| history
include/linux/buffer_head.h		patch \| blob \| blame \| history
include/linux/fs.h		patch \| blob \| blame \| history
include/linux/list.h		patch \| blob \| blame \| history
include/linux/reiserfs_fs.h		patch \| blob \| blame \| history
include/linux/writeback.h		patch \| blob \| blame \| history
ipc/mqueue.c		patch \| blob \| blame \| history
kernel/cgroup.c		patch \| blob \| blame \| history
kernel/futex.c		patch \| blob \| blame \| history
kernel/sysctl.c		patch \| blob \| blame \| history
mm/backing-dev.c		patch \| blob \| blame \| history
mm/shmem.c		patch \| blob \| blame \| history
net/socket.c		patch \| blob \| blame \| history
net/sunrpc/rpc_pipe.c		patch \| blob \| blame \| history
security/apparmor/path.c		patch \| blob \| blame \| history
security/inode.c		patch \| blob \| blame \| history
security/selinux/selinuxfs.c		patch \| blob \| blame \| history
security/tomoyo/realpath.c		patch \| blob \| blame \| history