fs/notify/fanotify/fanotify_user.c

   1 #include <linux/fcntl.h>
   2 #include <linux/file.h>
   3 #include <linux/fs.h>
   4 #include <linux/anon_inodes.h>
   5 #include <linux/fsnotify_backend.h>
   6 #include <linux/init.h>
   7 #include <linux/mount.h>
   8 #include <linux/namei.h>
   9 #include <linux/poll.h>
  10 #include <linux/security.h>
  11 #include <linux/syscalls.h>
  12 #include <linux/types.h>
  13 #include <linux/uaccess.h>
  14
  15 #include <asm/ioctls.h>
  16
  17 #include "fanotify.h"
  18
  19 static struct kmem_cache *fanotify_mark_cache __read_mostly;
  20
  21 /*
  22  * Get an fsnotify notification event if one exists and is small
  23  * enough to fit in "count". Return an error pointer if the count
  24  * is not large enough.
  25  *
  26  * Called with the group->notification_mutex held.
  27  */
  28 static struct fsnotify_event *get_one_event(struct fsnotify_group *group,
  29                                             size_t count)
  30 {
  31         BUG_ON(!mutex_is_locked(&group->notification_mutex));
  32
  33         pr_debug("%s: group=%p count=%zd\n", __func__, group, count);
  34
  35         if (fsnotify_notify_queue_is_empty(group))
  36                 return NULL;
  37
  38         if (FAN_EVENT_METADATA_LEN > count)
  39                 return ERR_PTR(-EINVAL);
  40
  41         /* held the notification_mutex the whole time, so this is the
  42          * same event we peeked above */
  43         return fsnotify_remove_notify_event(group);
  44 }
  45
  46 static int create_and_fill_fd(struct fsnotify_group *group,
  47                               struct fanotify_event_metadata *metadata,
  48                               struct fsnotify_event *event)
  49 {
  50         int client_fd;
  51         struct dentry *dentry;
  52         struct vfsmount *mnt;
  53         struct file *new_file;
  54
  55         pr_debug("%s: group=%p metadata=%p event=%p\n", __func__, group,
  56                  metadata, event);
  57
  58         client_fd = get_unused_fd();
  59         if (client_fd < 0)
  60                 return client_fd;
  61
  62         if (event->data_type != FSNOTIFY_EVENT_PATH) {
  63                 WARN_ON(1);
  64                 put_unused_fd(client_fd);
  65                 return -EINVAL;
  66         }
  67
  68         /*
  69          * we need a new file handle for the userspace program so it can read even if it was
  70          * originally opened O_WRONLY.
  71          */
  72         dentry = dget(event->path.dentry);
  73         mnt = mntget(event->path.mnt);
  74         /* it's possible this event was an overflow event.  in that case dentry and mnt
  75          * are NULL;  That's fine, just don't call dentry open */
  76         if (dentry && mnt)
  77                 new_file = dentry_open(dentry, mnt,
  78                                        O_RDONLY | O_LARGEFILE | FMODE_NONOTIFY,
  79                                        current_cred());
  80         else
  81                 new_file = ERR_PTR(-EOVERFLOW);
  82         if (IS_ERR(new_file)) {
  83                 /*
  84                  * we still send an event even if we can't open the file.  this
  85                  * can happen when say tasks are gone and we try to open their
  86                  * /proc files or we try to open a WRONLY file like in sysfs
  87                  * we just send the errno to userspace since there isn't much
  88                  * else we can do.
  89                  */
  90                 put_unused_fd(client_fd);
  91                 client_fd = PTR_ERR(new_file);
  92         } else {
  93                 fd_install(client_fd, new_file);
  94         }
  95
  96         metadata->fd = client_fd;
  97
  98         return 0;
  99 }
 100
 101 static ssize_t fill_event_metadata(struct fsnotify_group *group,
 102                                    struct fanotify_event_metadata *metadata,
 103                                    struct fsnotify_event *event)
 104 {
 105         pr_debug("%s: group=%p metadata=%p event=%p\n", __func__,
 106                  group, metadata, event);
 107
 108         metadata->event_len = FAN_EVENT_METADATA_LEN;
 109         metadata->vers = FANOTIFY_METADATA_VERSION;
 110         metadata->mask = fanotify_outgoing_mask(event->mask);
 111
 112         return create_and_fill_fd(group, metadata, event);
 113
 114 }
 115
 116 static ssize_t copy_event_to_user(struct fsnotify_group *group,
 117                                   struct fsnotify_event *event,
 118                                   char __user *buf)
 119 {
 120         struct fanotify_event_metadata fanotify_event_metadata;
 121         int ret;
 122
 123         pr_debug("%s: group=%p event=%p\n", __func__, group, event);
 124
 125         ret = fill_event_metadata(group, &fanotify_event_metadata, event);
 126         if (ret)
 127                 return ret;
 128
 129         if (copy_to_user(buf, &fanotify_event_metadata, FAN_EVENT_METADATA_LEN))
 130                 return -EFAULT;
 131
 132         return FAN_EVENT_METADATA_LEN;
 133 }
 134
 135 /* intofiy userspace file descriptor functions */
 136 static unsigned int fanotify_poll(struct file *file, poll_table *wait)
 137 {
 138         struct fsnotify_group *group = file->private_data;
 139         int ret = 0;
 140
 141         poll_wait(file, &group->notification_waitq, wait);
 142         mutex_lock(&group->notification_mutex);
 143         if (!fsnotify_notify_queue_is_empty(group))
 144                 ret = POLLIN | POLLRDNORM;
 145         mutex_unlock(&group->notification_mutex);
 146
 147         return ret;
 148 }
 149
 150 static ssize_t fanotify_read(struct file *file, char __user *buf,
 151                              size_t count, loff_t *pos)
 152 {
 153         struct fsnotify_group *group;
 154         struct fsnotify_event *kevent;
 155         char __user *start;
 156         int ret;
 157         DEFINE_WAIT(wait);
 158
 159         start = buf;
 160         group = file->private_data;
 161
 162         pr_debug("%s: group=%p\n", __func__, group);
 163
 164         while (1) {
 165                 prepare_to_wait(&group->notification_waitq, &wait, TASK_INTERRUPTIBLE);
 166
 167                 mutex_lock(&group->notification_mutex);
 168                 kevent = get_one_event(group, count);
 169                 mutex_unlock(&group->notification_mutex);
 170
 171                 if (kevent) {
 172                         ret = PTR_ERR(kevent);
 173                         if (IS_ERR(kevent))
 174                                 break;
 175                         ret = copy_event_to_user(group, kevent, buf);
 176                         fsnotify_put_event(kevent);
 177                         if (ret < 0)
 178                                 break;
 179                         buf += ret;
 180                         count -= ret;
 181                         continue;
 182                 }
 183
 184                 ret = -EAGAIN;
 185                 if (file->f_flags & O_NONBLOCK)
 186                         break;
 187                 ret = -EINTR;
 188                 if (signal_pending(current))
 189                         break;
 190
 191                 if (start != buf)
 192                         break;
 193
 194                 schedule();
 195         }
 196
 197         finish_wait(&group->notification_waitq, &wait);
 198         if (start != buf && ret != -EFAULT)
 199                 ret = buf - start;
 200         return ret;
 201 }
 202
 203 static int fanotify_release(struct inode *ignored, struct file *file)
 204 {
 205         struct fsnotify_group *group = file->private_data;
 206
 207         pr_debug("%s: file=%p group=%p\n", __func__, file, group);
 208
 209         /* matches the fanotify_init->fsnotify_alloc_group */
 210         fsnotify_put_group(group);
 211
 212         return 0;
 213 }
 214
 215 static long fanotify_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 216 {
 217         struct fsnotify_group *group;
 218         struct fsnotify_event_holder *holder;
 219         void __user *p;
 220         int ret = -ENOTTY;
 221         size_t send_len = 0;
 222
 223         group = file->private_data;
 224
 225         p = (void __user *) arg;
 226
 227         switch (cmd) {
 228         case FIONREAD:
 229                 mutex_lock(&group->notification_mutex);
 230                 list_for_each_entry(holder, &group->notification_list, event_list)
 231                         send_len += FAN_EVENT_METADATA_LEN;
 232                 mutex_unlock(&group->notification_mutex);
 233                 ret = put_user(send_len, (int __user *) p);
 234                 break;
 235         }
 236
 237         return ret;
 238 }
 239
 240 static const struct file_operations fanotify_fops = {
 241         .poll           = fanotify_poll,
 242         .read           = fanotify_read,
 243         .fasync         = NULL,
 244         .release        = fanotify_release,
 245         .unlocked_ioctl = fanotify_ioctl,
 246         .compat_ioctl   = fanotify_ioctl,
 247 };
 248
 249 static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
 250 {
 251         kmem_cache_free(fanotify_mark_cache, fsn_mark);
 252 }
 253
 254 static int fanotify_find_path(int dfd, const char __user *filename,
 255                               struct path *path, unsigned int flags)
 256 {
 257         int ret;
 258
 259         pr_debug("%s: dfd=%d filename=%p flags=%x\n", __func__,
 260                  dfd, filename, flags);
 261
 262         if (filename == NULL) {
 263                 struct file *file;
 264                 int fput_needed;
 265
 266                 ret = -EBADF;
 267                 file = fget_light(dfd, &fput_needed);
 268                 if (!file)
 269                         goto out;
 270
 271                 ret = -ENOTDIR;
 272                 if ((flags & FAN_MARK_ONLYDIR) &&
 273                     !(S_ISDIR(file->f_path.dentry->d_inode->i_mode))) {
 274                         fput_light(file, fput_needed);
 275                         goto out;
 276                 }
 277
 278                 *path = file->f_path;
 279                 path_get(path);
 280                 fput_light(file, fput_needed);
 281         } else {
 282                 unsigned int lookup_flags = 0;
 283
 284                 if (!(flags & FAN_MARK_DONT_FOLLOW))
 285                         lookup_flags |= LOOKUP_FOLLOW;
 286                 if (flags & FAN_MARK_ONLYDIR)
 287                         lookup_flags |= LOOKUP_DIRECTORY;
 288
 289                 ret = user_path_at(dfd, filename, lookup_flags, path);
 290                 if (ret)
 291                         goto out;
 292         }
 293
 294         /* you can only watch an inode if you have read permissions on it */
 295         ret = inode_permission(path->dentry->d_inode, MAY_READ);
 296         if (ret)
 297                 path_put(path);
 298 out:
 299         return ret;
 300 }
 301
 302 static int fanotify_remove_mark(struct fsnotify_group *group,
 303                                 struct inode *inode,
 304                                 __u32 mask)
 305 {
 306         struct fsnotify_mark *fsn_mark;
 307         __u32 new_mask;
 308
 309         pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
 310                  group, inode, mask);
 311
 312         fsn_mark = fsnotify_find_mark(group, inode);
 313         if (!fsn_mark)
 314                 return -ENOENT;
 315
 316         spin_lock(&fsn_mark->lock);
 317         fsn_mark->mask &= ~mask;
 318         new_mask = fsn_mark->mask;
 319         spin_unlock(&fsn_mark->lock);
 320
 321         if (!new_mask)
 322                 fsnotify_destroy_mark(fsn_mark);
 323         else
 324                 fsnotify_recalc_inode_mask(inode);
 325
 326         fsnotify_recalc_group_mask(group);
 327
 328         /* matches the fsnotify_find_mark() */
 329         fsnotify_put_mark(fsn_mark);
 330
 331         return 0;
 332 }
 333
 334 static int fanotify_add_mark(struct fsnotify_group *group,
 335                              struct inode *inode,
 336                              __u32 mask)
 337 {
 338         struct fsnotify_mark *fsn_mark;
 339         __u32 old_mask, new_mask;
 340         int ret;
 341
 342         pr_debug("%s: group=%p inode=%p mask=%x\n", __func__,
 343                  group, inode, mask);
 344
 345         fsn_mark = fsnotify_find_mark(group, inode);
 346         if (!fsn_mark) {
 347                 struct fsnotify_mark *new_fsn_mark;
 348
 349                 ret = -ENOMEM;
 350                 new_fsn_mark = kmem_cache_alloc(fanotify_mark_cache, GFP_KERNEL);
 351                 if (!new_fsn_mark)
 352                         goto out;
 353
 354                 fsnotify_init_mark(new_fsn_mark, fanotify_free_mark);
 355                 ret = fsnotify_add_mark(new_fsn_mark, group, inode, 0);
 356                 if (ret) {
 357                         fanotify_free_mark(new_fsn_mark);
 358                         goto out;
 359                 }
 360
 361                 fsn_mark = new_fsn_mark;
 362         }
 363
 364         ret = 0;
 365
 366         spin_lock(&fsn_mark->lock);
 367         old_mask = fsn_mark->mask;
 368         fsn_mark->mask |= mask;
 369         new_mask = fsn_mark->mask;
 370         spin_unlock(&fsn_mark->lock);
 371
 372         /* we made changes to a mask, update the group mask and the inode mask
 373          * so things happen quickly. */
 374         if (old_mask != new_mask) {
 375                 /* more bits in old than in new? */
 376                 int dropped = (old_mask & ~new_mask);
 377                 /* more bits in this mark than the inode's mask? */
 378                 int do_inode = (new_mask & ~inode->i_fsnotify_mask);
 379                 /* more bits in this mark than the group? */
 380                 int do_group = (new_mask & ~group->mask);
 381
 382                 /* update the inode with this new mark */
 383                 if (dropped || do_inode)
 384                         fsnotify_recalc_inode_mask(inode);
 385
 386                 /* update the group mask with the new mask */
 387                 if (dropped || do_group)
 388                         fsnotify_recalc_group_mask(group);
 389         }
 390
 391         /* match the init or the find.... */
 392         fsnotify_put_mark(fsn_mark);
 393 out:
 394         return ret;
 395 }
 396
 397 static int fanotify_update_mark(struct fsnotify_group *group,
 398                                 struct inode *inode, int flags,
 399                                 __u32 mask)
 400 {
 401         pr_debug("%s: group=%p inode=%p flags=%x mask=%x\n", __func__,
 402                  group, inode, flags, mask);
 403
 404         if (flags & FAN_MARK_ADD)
 405                 fanotify_add_mark(group, inode, mask);
 406         else if (flags & FAN_MARK_REMOVE)
 407                 fanotify_remove_mark(group, inode, mask);
 408         else
 409                 BUG();
 410
 411         return 0;
 412 }
 413
 414 static bool fanotify_mark_validate_input(int flags,
 415                                          __u32 mask)
 416 {
 417         pr_debug("%s: flags=%x mask=%x\n", __func__, flags, mask);
 418
 419         /* are flags valid of this operation? */
 420         if (!fanotify_mark_flags_valid(flags))
 421                 return false;
 422         /* is the mask valid? */
 423         if (!fanotify_mask_valid(mask))
 424                 return false;
 425         return true;
 426 }
 427
 428 /* fanotify syscalls */
 429 SYSCALL_DEFINE3(fanotify_init, unsigned int, flags, unsigned int, event_f_flags,
 430                 unsigned int, priority)
 431 {
 432         struct fsnotify_group *group;
 433         int f_flags, fd;
 434
 435         pr_debug("%s: flags=%d event_f_flags=%d priority=%d\n",
 436                 __func__, flags, event_f_flags, priority);
 437
 438         if (event_f_flags)
 439                 return -EINVAL;
 440         if (priority)
 441                 return -EINVAL;
 442
 443         if (!capable(CAP_SYS_ADMIN))
 444                 return -EACCES;
 445
 446         if (flags & ~FAN_ALL_INIT_FLAGS)
 447                 return -EINVAL;
 448
 449         f_flags = (O_RDONLY | FMODE_NONOTIFY);
 450         if (flags & FAN_CLOEXEC)
 451                 f_flags |= O_CLOEXEC;
 452         if (flags & FAN_NONBLOCK)
 453                 f_flags |= O_NONBLOCK;
 454
 455         /* fsnotify_alloc_group takes a ref.  Dropped in fanotify_release */
 456         group = fsnotify_alloc_group(&fanotify_fsnotify_ops);
 457         if (IS_ERR(group))
 458                 return PTR_ERR(group);
 459
 460         fd = anon_inode_getfd("[fanotify]", &fanotify_fops, group, f_flags);
 461         if (fd < 0)
 462                 goto out_put_group;
 463
 464         return fd;
 465
 466 out_put_group:
 467         fsnotify_put_group(group);
 468         return fd;
 469 }
 470
 471 SYSCALL_DEFINE5(fanotify_mark, int, fanotify_fd, unsigned int, flags,
 472                 __u64, mask, int, dfd, const char  __user *, pathname)
 473 {
 474         struct inode *inode;
 475         struct fsnotify_group *group;
 476         struct file *filp;
 477         struct path path;
 478         int ret, fput_needed;
 479
 480         pr_debug("%s: fanotify_fd=%d flags=%x dfd=%d pathname=%p mask=%llx\n",
 481                  __func__, fanotify_fd, flags, dfd, pathname, mask);
 482
 483         /* we only use the lower 32 bits as of right now. */
 484         if (mask & ((__u64)0xffffffff << 32))
 485                 return -EINVAL;
 486
 487         if (!fanotify_mark_validate_input(flags, mask))
 488                 return -EINVAL;
 489
 490         filp = fget_light(fanotify_fd, &fput_needed);
 491         if (unlikely(!filp))
 492                 return -EBADF;
 493
 494         /* verify that this is indeed an fanotify instance */
 495         ret = -EINVAL;
 496         if (unlikely(filp->f_op != &fanotify_fops))
 497                 goto fput_and_out;
 498
 499         ret = fanotify_find_path(dfd, pathname, &path, flags);
 500         if (ret)
 501                 goto fput_and_out;
 502
 503         /* inode held in place by reference to path; group by fget on fd */
 504         inode = path.dentry->d_inode;
 505         group = filp->private_data;
 506
 507         /* create/update an inode mark */
 508         ret = fanotify_update_mark(group, inode, flags, mask);
 509
 510         path_put(&path);
 511 fput_and_out:
 512         fput_light(filp, fput_needed);
 513         return ret;
 514 }
 515
 516 /*
 517  * fanotify_user_setup - Our initialization function.  Note that we cannnot return
 518  * error because we have compiled-in VFS hooks.  So an (unlikely) failure here
 519  * must result in panic().
 520  */
 521 static int __init fanotify_user_setup(void)
 522 {
 523         fanotify_mark_cache = KMEM_CACHE(fsnotify_mark, SLAB_PANIC);
 524
 525         return 0;
 526 }
 527 device_initcall(fanotify_user_setup);