]> bbs.cooldavid.org Git - net-next-2.6.git/blob - fs/fuse/dev.c
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rostedt...
[net-next-2.6.git] / fs / fuse / dev.c
1 /*
2   FUSE: Filesystem in Userspace
3   Copyright (C) 2001-2008  Miklos Szeredi <miklos@szeredi.hu>
4
5   This program can be distributed under the terms of the GNU GPL.
6   See the file COPYING.
7 */
8
9 #include "fuse_i.h"
10
11 #include <linux/init.h>
12 #include <linux/module.h>
13 #include <linux/poll.h>
14 #include <linux/uio.h>
15 #include <linux/miscdevice.h>
16 #include <linux/pagemap.h>
17 #include <linux/file.h>
18 #include <linux/slab.h>
19
20 MODULE_ALIAS_MISCDEV(FUSE_MINOR);
21 MODULE_ALIAS("devname:fuse");
22
23 static struct kmem_cache *fuse_req_cachep;
24
25 static struct fuse_conn *fuse_get_conn(struct file *file)
26 {
27         /*
28          * Lockless access is OK, because file->private data is set
29          * once during mount and is valid until the file is released.
30          */
31         return file->private_data;
32 }
33
34 static void fuse_request_init(struct fuse_req *req)
35 {
36         memset(req, 0, sizeof(*req));
37         INIT_LIST_HEAD(&req->list);
38         INIT_LIST_HEAD(&req->intr_entry);
39         init_waitqueue_head(&req->waitq);
40         atomic_set(&req->count, 1);
41 }
42
43 struct fuse_req *fuse_request_alloc(void)
44 {
45         struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_KERNEL);
46         if (req)
47                 fuse_request_init(req);
48         return req;
49 }
50 EXPORT_SYMBOL_GPL(fuse_request_alloc);
51
52 struct fuse_req *fuse_request_alloc_nofs(void)
53 {
54         struct fuse_req *req = kmem_cache_alloc(fuse_req_cachep, GFP_NOFS);
55         if (req)
56                 fuse_request_init(req);
57         return req;
58 }
59
60 void fuse_request_free(struct fuse_req *req)
61 {
62         kmem_cache_free(fuse_req_cachep, req);
63 }
64
65 static void block_sigs(sigset_t *oldset)
66 {
67         sigset_t mask;
68
69         siginitsetinv(&mask, sigmask(SIGKILL));
70         sigprocmask(SIG_BLOCK, &mask, oldset);
71 }
72
73 static void restore_sigs(sigset_t *oldset)
74 {
75         sigprocmask(SIG_SETMASK, oldset, NULL);
76 }
77
78 static void __fuse_get_request(struct fuse_req *req)
79 {
80         atomic_inc(&req->count);
81 }
82
83 /* Must be called with > 1 refcount */
84 static void __fuse_put_request(struct fuse_req *req)
85 {
86         BUG_ON(atomic_read(&req->count) < 2);
87         atomic_dec(&req->count);
88 }
89
90 static void fuse_req_init_context(struct fuse_req *req)
91 {
92         req->in.h.uid = current_fsuid();
93         req->in.h.gid = current_fsgid();
94         req->in.h.pid = current->pid;
95 }
96
97 struct fuse_req *fuse_get_req(struct fuse_conn *fc)
98 {
99         struct fuse_req *req;
100         sigset_t oldset;
101         int intr;
102         int err;
103
104         atomic_inc(&fc->num_waiting);
105         block_sigs(&oldset);
106         intr = wait_event_interruptible(fc->blocked_waitq, !fc->blocked);
107         restore_sigs(&oldset);
108         err = -EINTR;
109         if (intr)
110                 goto out;
111
112         err = -ENOTCONN;
113         if (!fc->connected)
114                 goto out;
115
116         req = fuse_request_alloc();
117         err = -ENOMEM;
118         if (!req)
119                 goto out;
120
121         fuse_req_init_context(req);
122         req->waiting = 1;
123         return req;
124
125  out:
126         atomic_dec(&fc->num_waiting);
127         return ERR_PTR(err);
128 }
129 EXPORT_SYMBOL_GPL(fuse_get_req);
130
131 /*
132  * Return request in fuse_file->reserved_req.  However that may
133  * currently be in use.  If that is the case, wait for it to become
134  * available.
135  */
136 static struct fuse_req *get_reserved_req(struct fuse_conn *fc,
137                                          struct file *file)
138 {
139         struct fuse_req *req = NULL;
140         struct fuse_file *ff = file->private_data;
141
142         do {
143                 wait_event(fc->reserved_req_waitq, ff->reserved_req);
144                 spin_lock(&fc->lock);
145                 if (ff->reserved_req) {
146                         req = ff->reserved_req;
147                         ff->reserved_req = NULL;
148                         get_file(file);
149                         req->stolen_file = file;
150                 }
151                 spin_unlock(&fc->lock);
152         } while (!req);
153
154         return req;
155 }
156
157 /*
158  * Put stolen request back into fuse_file->reserved_req
159  */
160 static void put_reserved_req(struct fuse_conn *fc, struct fuse_req *req)
161 {
162         struct file *file = req->stolen_file;
163         struct fuse_file *ff = file->private_data;
164
165         spin_lock(&fc->lock);
166         fuse_request_init(req);
167         BUG_ON(ff->reserved_req);
168         ff->reserved_req = req;
169         wake_up_all(&fc->reserved_req_waitq);
170         spin_unlock(&fc->lock);
171         fput(file);
172 }
173
174 /*
175  * Gets a requests for a file operation, always succeeds
176  *
177  * This is used for sending the FLUSH request, which must get to
178  * userspace, due to POSIX locks which may need to be unlocked.
179  *
180  * If allocation fails due to OOM, use the reserved request in
181  * fuse_file.
182  *
183  * This is very unlikely to deadlock accidentally, since the
184  * filesystem should not have it's own file open.  If deadlock is
185  * intentional, it can still be broken by "aborting" the filesystem.
186  */
187 struct fuse_req *fuse_get_req_nofail(struct fuse_conn *fc, struct file *file)
188 {
189         struct fuse_req *req;
190
191         atomic_inc(&fc->num_waiting);
192         wait_event(fc->blocked_waitq, !fc->blocked);
193         req = fuse_request_alloc();
194         if (!req)
195                 req = get_reserved_req(fc, file);
196
197         fuse_req_init_context(req);
198         req->waiting = 1;
199         return req;
200 }
201
202 void fuse_put_request(struct fuse_conn *fc, struct fuse_req *req)
203 {
204         if (atomic_dec_and_test(&req->count)) {
205                 if (req->waiting)
206                         atomic_dec(&fc->num_waiting);
207
208                 if (req->stolen_file)
209                         put_reserved_req(fc, req);
210                 else
211                         fuse_request_free(req);
212         }
213 }
214 EXPORT_SYMBOL_GPL(fuse_put_request);
215
216 static unsigned len_args(unsigned numargs, struct fuse_arg *args)
217 {
218         unsigned nbytes = 0;
219         unsigned i;
220
221         for (i = 0; i < numargs; i++)
222                 nbytes += args[i].size;
223
224         return nbytes;
225 }
226
227 static u64 fuse_get_unique(struct fuse_conn *fc)
228 {
229         fc->reqctr++;
230         /* zero is special */
231         if (fc->reqctr == 0)
232                 fc->reqctr = 1;
233
234         return fc->reqctr;
235 }
236
237 static void queue_request(struct fuse_conn *fc, struct fuse_req *req)
238 {
239         req->in.h.unique = fuse_get_unique(fc);
240         req->in.h.len = sizeof(struct fuse_in_header) +
241                 len_args(req->in.numargs, (struct fuse_arg *) req->in.args);
242         list_add_tail(&req->list, &fc->pending);
243         req->state = FUSE_REQ_PENDING;
244         if (!req->waiting) {
245                 req->waiting = 1;
246                 atomic_inc(&fc->num_waiting);
247         }
248         wake_up(&fc->waitq);
249         kill_fasync(&fc->fasync, SIGIO, POLL_IN);
250 }
251
252 static void flush_bg_queue(struct fuse_conn *fc)
253 {
254         while (fc->active_background < fc->max_background &&
255                !list_empty(&fc->bg_queue)) {
256                 struct fuse_req *req;
257
258                 req = list_entry(fc->bg_queue.next, struct fuse_req, list);
259                 list_del(&req->list);
260                 fc->active_background++;
261                 queue_request(fc, req);
262         }
263 }
264
265 /*
266  * This function is called when a request is finished.  Either a reply
267  * has arrived or it was aborted (and not yet sent) or some error
268  * occurred during communication with userspace, or the device file
269  * was closed.  The requester thread is woken up (if still waiting),
270  * the 'end' callback is called if given, else the reference to the
271  * request is released
272  *
273  * Called with fc->lock, unlocks it
274  */
275 static void request_end(struct fuse_conn *fc, struct fuse_req *req)
276 __releases(&fc->lock)
277 {
278         void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
279         req->end = NULL;
280         list_del(&req->list);
281         list_del(&req->intr_entry);
282         req->state = FUSE_REQ_FINISHED;
283         if (req->background) {
284                 if (fc->num_background == fc->max_background) {
285                         fc->blocked = 0;
286                         wake_up_all(&fc->blocked_waitq);
287                 }
288                 if (fc->num_background == fc->congestion_threshold &&
289                     fc->connected && fc->bdi_initialized) {
290                         clear_bdi_congested(&fc->bdi, BLK_RW_SYNC);
291                         clear_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
292                 }
293                 fc->num_background--;
294                 fc->active_background--;
295                 flush_bg_queue(fc);
296         }
297         spin_unlock(&fc->lock);
298         wake_up(&req->waitq);
299         if (end)
300                 end(fc, req);
301         fuse_put_request(fc, req);
302 }
303
304 static void wait_answer_interruptible(struct fuse_conn *fc,
305                                       struct fuse_req *req)
306 __releases(&fc->lock)
307 __acquires(&fc->lock)
308 {
309         if (signal_pending(current))
310                 return;
311
312         spin_unlock(&fc->lock);
313         wait_event_interruptible(req->waitq, req->state == FUSE_REQ_FINISHED);
314         spin_lock(&fc->lock);
315 }
316
317 static void queue_interrupt(struct fuse_conn *fc, struct fuse_req *req)
318 {
319         list_add_tail(&req->intr_entry, &fc->interrupts);
320         wake_up(&fc->waitq);
321         kill_fasync(&fc->fasync, SIGIO, POLL_IN);
322 }
323
324 static void request_wait_answer(struct fuse_conn *fc, struct fuse_req *req)
325 __releases(&fc->lock)
326 __acquires(&fc->lock)
327 {
328         if (!fc->no_interrupt) {
329                 /* Any signal may interrupt this */
330                 wait_answer_interruptible(fc, req);
331
332                 if (req->aborted)
333                         goto aborted;
334                 if (req->state == FUSE_REQ_FINISHED)
335                         return;
336
337                 req->interrupted = 1;
338                 if (req->state == FUSE_REQ_SENT)
339                         queue_interrupt(fc, req);
340         }
341
342         if (!req->force) {
343                 sigset_t oldset;
344
345                 /* Only fatal signals may interrupt this */
346                 block_sigs(&oldset);
347                 wait_answer_interruptible(fc, req);
348                 restore_sigs(&oldset);
349
350                 if (req->aborted)
351                         goto aborted;
352                 if (req->state == FUSE_REQ_FINISHED)
353                         return;
354
355                 /* Request is not yet in userspace, bail out */
356                 if (req->state == FUSE_REQ_PENDING) {
357                         list_del(&req->list);
358                         __fuse_put_request(req);
359                         req->out.h.error = -EINTR;
360                         return;
361                 }
362         }
363
364         /*
365          * Either request is already in userspace, or it was forced.
366          * Wait it out.
367          */
368         spin_unlock(&fc->lock);
369         wait_event(req->waitq, req->state == FUSE_REQ_FINISHED);
370         spin_lock(&fc->lock);
371
372         if (!req->aborted)
373                 return;
374
375  aborted:
376         BUG_ON(req->state != FUSE_REQ_FINISHED);
377         if (req->locked) {
378                 /* This is uninterruptible sleep, because data is
379                    being copied to/from the buffers of req.  During
380                    locked state, there mustn't be any filesystem
381                    operation (e.g. page fault), since that could lead
382                    to deadlock */
383                 spin_unlock(&fc->lock);
384                 wait_event(req->waitq, !req->locked);
385                 spin_lock(&fc->lock);
386         }
387 }
388
389 void fuse_request_send(struct fuse_conn *fc, struct fuse_req *req)
390 {
391         req->isreply = 1;
392         spin_lock(&fc->lock);
393         if (!fc->connected)
394                 req->out.h.error = -ENOTCONN;
395         else if (fc->conn_error)
396                 req->out.h.error = -ECONNREFUSED;
397         else {
398                 queue_request(fc, req);
399                 /* acquire extra reference, since request is still needed
400                    after request_end() */
401                 __fuse_get_request(req);
402
403                 request_wait_answer(fc, req);
404         }
405         spin_unlock(&fc->lock);
406 }
407 EXPORT_SYMBOL_GPL(fuse_request_send);
408
409 static void fuse_request_send_nowait_locked(struct fuse_conn *fc,
410                                             struct fuse_req *req)
411 {
412         req->background = 1;
413         fc->num_background++;
414         if (fc->num_background == fc->max_background)
415                 fc->blocked = 1;
416         if (fc->num_background == fc->congestion_threshold &&
417             fc->bdi_initialized) {
418                 set_bdi_congested(&fc->bdi, BLK_RW_SYNC);
419                 set_bdi_congested(&fc->bdi, BLK_RW_ASYNC);
420         }
421         list_add_tail(&req->list, &fc->bg_queue);
422         flush_bg_queue(fc);
423 }
424
425 static void fuse_request_send_nowait(struct fuse_conn *fc, struct fuse_req *req)
426 {
427         spin_lock(&fc->lock);
428         if (fc->connected) {
429                 fuse_request_send_nowait_locked(fc, req);
430                 spin_unlock(&fc->lock);
431         } else {
432                 req->out.h.error = -ENOTCONN;
433                 request_end(fc, req);
434         }
435 }
436
437 void fuse_request_send_noreply(struct fuse_conn *fc, struct fuse_req *req)
438 {
439         req->isreply = 0;
440         fuse_request_send_nowait(fc, req);
441 }
442
443 void fuse_request_send_background(struct fuse_conn *fc, struct fuse_req *req)
444 {
445         req->isreply = 1;
446         fuse_request_send_nowait(fc, req);
447 }
448 EXPORT_SYMBOL_GPL(fuse_request_send_background);
449
450 /*
451  * Called under fc->lock
452  *
453  * fc->connected must have been checked previously
454  */
455 void fuse_request_send_background_locked(struct fuse_conn *fc,
456                                          struct fuse_req *req)
457 {
458         req->isreply = 1;
459         fuse_request_send_nowait_locked(fc, req);
460 }
461
462 /*
463  * Lock the request.  Up to the next unlock_request() there mustn't be
464  * anything that could cause a page-fault.  If the request was already
465  * aborted bail out.
466  */
467 static int lock_request(struct fuse_conn *fc, struct fuse_req *req)
468 {
469         int err = 0;
470         if (req) {
471                 spin_lock(&fc->lock);
472                 if (req->aborted)
473                         err = -ENOENT;
474                 else
475                         req->locked = 1;
476                 spin_unlock(&fc->lock);
477         }
478         return err;
479 }
480
481 /*
482  * Unlock request.  If it was aborted during being locked, the
483  * requester thread is currently waiting for it to be unlocked, so
484  * wake it up.
485  */
486 static void unlock_request(struct fuse_conn *fc, struct fuse_req *req)
487 {
488         if (req) {
489                 spin_lock(&fc->lock);
490                 req->locked = 0;
491                 if (req->aborted)
492                         wake_up(&req->waitq);
493                 spin_unlock(&fc->lock);
494         }
495 }
496
497 struct fuse_copy_state {
498         struct fuse_conn *fc;
499         int write;
500         struct fuse_req *req;
501         const struct iovec *iov;
502         unsigned long nr_segs;
503         unsigned long seglen;
504         unsigned long addr;
505         struct page *pg;
506         void *mapaddr;
507         void *buf;
508         unsigned len;
509 };
510
511 static void fuse_copy_init(struct fuse_copy_state *cs, struct fuse_conn *fc,
512                            int write, struct fuse_req *req,
513                            const struct iovec *iov, unsigned long nr_segs)
514 {
515         memset(cs, 0, sizeof(*cs));
516         cs->fc = fc;
517         cs->write = write;
518         cs->req = req;
519         cs->iov = iov;
520         cs->nr_segs = nr_segs;
521 }
522
523 /* Unmap and put previous page of userspace buffer */
524 static void fuse_copy_finish(struct fuse_copy_state *cs)
525 {
526         if (cs->mapaddr) {
527                 kunmap_atomic(cs->mapaddr, KM_USER0);
528                 if (cs->write) {
529                         flush_dcache_page(cs->pg);
530                         set_page_dirty_lock(cs->pg);
531                 }
532                 put_page(cs->pg);
533                 cs->mapaddr = NULL;
534         }
535 }
536
537 /*
538  * Get another pagefull of userspace buffer, and map it to kernel
539  * address space, and lock request
540  */
541 static int fuse_copy_fill(struct fuse_copy_state *cs)
542 {
543         unsigned long offset;
544         int err;
545
546         unlock_request(cs->fc, cs->req);
547         fuse_copy_finish(cs);
548         if (!cs->seglen) {
549                 BUG_ON(!cs->nr_segs);
550                 cs->seglen = cs->iov[0].iov_len;
551                 cs->addr = (unsigned long) cs->iov[0].iov_base;
552                 cs->iov++;
553                 cs->nr_segs--;
554         }
555         down_read(&current->mm->mmap_sem);
556         err = get_user_pages(current, current->mm, cs->addr, 1, cs->write, 0,
557                              &cs->pg, NULL);
558         up_read(&current->mm->mmap_sem);
559         if (err < 0)
560                 return err;
561         BUG_ON(err != 1);
562         offset = cs->addr % PAGE_SIZE;
563         cs->mapaddr = kmap_atomic(cs->pg, KM_USER0);
564         cs->buf = cs->mapaddr + offset;
565         cs->len = min(PAGE_SIZE - offset, cs->seglen);
566         cs->seglen -= cs->len;
567         cs->addr += cs->len;
568
569         return lock_request(cs->fc, cs->req);
570 }
571
572 /* Do as much copy to/from userspace buffer as we can */
573 static int fuse_copy_do(struct fuse_copy_state *cs, void **val, unsigned *size)
574 {
575         unsigned ncpy = min(*size, cs->len);
576         if (val) {
577                 if (cs->write)
578                         memcpy(cs->buf, *val, ncpy);
579                 else
580                         memcpy(*val, cs->buf, ncpy);
581                 *val += ncpy;
582         }
583         *size -= ncpy;
584         cs->len -= ncpy;
585         cs->buf += ncpy;
586         return ncpy;
587 }
588
589 /*
590  * Copy a page in the request to/from the userspace buffer.  Must be
591  * done atomically
592  */
593 static int fuse_copy_page(struct fuse_copy_state *cs, struct page *page,
594                           unsigned offset, unsigned count, int zeroing)
595 {
596         if (page && zeroing && count < PAGE_SIZE) {
597                 void *mapaddr = kmap_atomic(page, KM_USER1);
598                 memset(mapaddr, 0, PAGE_SIZE);
599                 kunmap_atomic(mapaddr, KM_USER1);
600         }
601         while (count) {
602                 if (!cs->len) {
603                         int err = fuse_copy_fill(cs);
604                         if (err)
605                                 return err;
606                 }
607                 if (page) {
608                         void *mapaddr = kmap_atomic(page, KM_USER1);
609                         void *buf = mapaddr + offset;
610                         offset += fuse_copy_do(cs, &buf, &count);
611                         kunmap_atomic(mapaddr, KM_USER1);
612                 } else
613                         offset += fuse_copy_do(cs, NULL, &count);
614         }
615         if (page && !cs->write)
616                 flush_dcache_page(page);
617         return 0;
618 }
619
620 /* Copy pages in the request to/from userspace buffer */
621 static int fuse_copy_pages(struct fuse_copy_state *cs, unsigned nbytes,
622                            int zeroing)
623 {
624         unsigned i;
625         struct fuse_req *req = cs->req;
626         unsigned offset = req->page_offset;
627         unsigned count = min(nbytes, (unsigned) PAGE_SIZE - offset);
628
629         for (i = 0; i < req->num_pages && (nbytes || zeroing); i++) {
630                 struct page *page = req->pages[i];
631                 int err = fuse_copy_page(cs, page, offset, count, zeroing);
632                 if (err)
633                         return err;
634
635                 nbytes -= count;
636                 count = min(nbytes, (unsigned) PAGE_SIZE);
637                 offset = 0;
638         }
639         return 0;
640 }
641
642 /* Copy a single argument in the request to/from userspace buffer */
643 static int fuse_copy_one(struct fuse_copy_state *cs, void *val, unsigned size)
644 {
645         while (size) {
646                 if (!cs->len) {
647                         int err = fuse_copy_fill(cs);
648                         if (err)
649                                 return err;
650                 }
651                 fuse_copy_do(cs, &val, &size);
652         }
653         return 0;
654 }
655
656 /* Copy request arguments to/from userspace buffer */
657 static int fuse_copy_args(struct fuse_copy_state *cs, unsigned numargs,
658                           unsigned argpages, struct fuse_arg *args,
659                           int zeroing)
660 {
661         int err = 0;
662         unsigned i;
663
664         for (i = 0; !err && i < numargs; i++)  {
665                 struct fuse_arg *arg = &args[i];
666                 if (i == numargs - 1 && argpages)
667                         err = fuse_copy_pages(cs, arg->size, zeroing);
668                 else
669                         err = fuse_copy_one(cs, arg->value, arg->size);
670         }
671         return err;
672 }
673
674 static int request_pending(struct fuse_conn *fc)
675 {
676         return !list_empty(&fc->pending) || !list_empty(&fc->interrupts);
677 }
678
679 /* Wait until a request is available on the pending list */
680 static void request_wait(struct fuse_conn *fc)
681 __releases(&fc->lock)
682 __acquires(&fc->lock)
683 {
684         DECLARE_WAITQUEUE(wait, current);
685
686         add_wait_queue_exclusive(&fc->waitq, &wait);
687         while (fc->connected && !request_pending(fc)) {
688                 set_current_state(TASK_INTERRUPTIBLE);
689                 if (signal_pending(current))
690                         break;
691
692                 spin_unlock(&fc->lock);
693                 schedule();
694                 spin_lock(&fc->lock);
695         }
696         set_current_state(TASK_RUNNING);
697         remove_wait_queue(&fc->waitq, &wait);
698 }
699
700 /*
701  * Transfer an interrupt request to userspace
702  *
703  * Unlike other requests this is assembled on demand, without a need
704  * to allocate a separate fuse_req structure.
705  *
706  * Called with fc->lock held, releases it
707  */
708 static int fuse_read_interrupt(struct fuse_conn *fc, struct fuse_req *req,
709                                const struct iovec *iov, unsigned long nr_segs)
710 __releases(&fc->lock)
711 {
712         struct fuse_copy_state cs;
713         struct fuse_in_header ih;
714         struct fuse_interrupt_in arg;
715         unsigned reqsize = sizeof(ih) + sizeof(arg);
716         int err;
717
718         list_del_init(&req->intr_entry);
719         req->intr_unique = fuse_get_unique(fc);
720         memset(&ih, 0, sizeof(ih));
721         memset(&arg, 0, sizeof(arg));
722         ih.len = reqsize;
723         ih.opcode = FUSE_INTERRUPT;
724         ih.unique = req->intr_unique;
725         arg.unique = req->in.h.unique;
726
727         spin_unlock(&fc->lock);
728         if (iov_length(iov, nr_segs) < reqsize)
729                 return -EINVAL;
730
731         fuse_copy_init(&cs, fc, 1, NULL, iov, nr_segs);
732         err = fuse_copy_one(&cs, &ih, sizeof(ih));
733         if (!err)
734                 err = fuse_copy_one(&cs, &arg, sizeof(arg));
735         fuse_copy_finish(&cs);
736
737         return err ? err : reqsize;
738 }
739
740 /*
741  * Read a single request into the userspace filesystem's buffer.  This
742  * function waits until a request is available, then removes it from
743  * the pending list and copies request data to userspace buffer.  If
744  * no reply is needed (FORGET) or request has been aborted or there
745  * was an error during the copying then it's finished by calling
746  * request_end().  Otherwise add it to the processing list, and set
747  * the 'sent' flag.
748  */
749 static ssize_t fuse_dev_read(struct kiocb *iocb, const struct iovec *iov,
750                               unsigned long nr_segs, loff_t pos)
751 {
752         int err;
753         struct fuse_req *req;
754         struct fuse_in *in;
755         struct fuse_copy_state cs;
756         unsigned reqsize;
757         struct file *file = iocb->ki_filp;
758         struct fuse_conn *fc = fuse_get_conn(file);
759         if (!fc)
760                 return -EPERM;
761
762  restart:
763         spin_lock(&fc->lock);
764         err = -EAGAIN;
765         if ((file->f_flags & O_NONBLOCK) && fc->connected &&
766             !request_pending(fc))
767                 goto err_unlock;
768
769         request_wait(fc);
770         err = -ENODEV;
771         if (!fc->connected)
772                 goto err_unlock;
773         err = -ERESTARTSYS;
774         if (!request_pending(fc))
775                 goto err_unlock;
776
777         if (!list_empty(&fc->interrupts)) {
778                 req = list_entry(fc->interrupts.next, struct fuse_req,
779                                  intr_entry);
780                 return fuse_read_interrupt(fc, req, iov, nr_segs);
781         }
782
783         req = list_entry(fc->pending.next, struct fuse_req, list);
784         req->state = FUSE_REQ_READING;
785         list_move(&req->list, &fc->io);
786
787         in = &req->in;
788         reqsize = in->h.len;
789         /* If request is too large, reply with an error and restart the read */
790         if (iov_length(iov, nr_segs) < reqsize) {
791                 req->out.h.error = -EIO;
792                 /* SETXATTR is special, since it may contain too large data */
793                 if (in->h.opcode == FUSE_SETXATTR)
794                         req->out.h.error = -E2BIG;
795                 request_end(fc, req);
796                 goto restart;
797         }
798         spin_unlock(&fc->lock);
799         fuse_copy_init(&cs, fc, 1, req, iov, nr_segs);
800         err = fuse_copy_one(&cs, &in->h, sizeof(in->h));
801         if (!err)
802                 err = fuse_copy_args(&cs, in->numargs, in->argpages,
803                                      (struct fuse_arg *) in->args, 0);
804         fuse_copy_finish(&cs);
805         spin_lock(&fc->lock);
806         req->locked = 0;
807         if (req->aborted) {
808                 request_end(fc, req);
809                 return -ENODEV;
810         }
811         if (err) {
812                 req->out.h.error = -EIO;
813                 request_end(fc, req);
814                 return err;
815         }
816         if (!req->isreply)
817                 request_end(fc, req);
818         else {
819                 req->state = FUSE_REQ_SENT;
820                 list_move_tail(&req->list, &fc->processing);
821                 if (req->interrupted)
822                         queue_interrupt(fc, req);
823                 spin_unlock(&fc->lock);
824         }
825         return reqsize;
826
827  err_unlock:
828         spin_unlock(&fc->lock);
829         return err;
830 }
831
832 static int fuse_notify_poll(struct fuse_conn *fc, unsigned int size,
833                             struct fuse_copy_state *cs)
834 {
835         struct fuse_notify_poll_wakeup_out outarg;
836         int err = -EINVAL;
837
838         if (size != sizeof(outarg))
839                 goto err;
840
841         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
842         if (err)
843                 goto err;
844
845         fuse_copy_finish(cs);
846         return fuse_notify_poll_wakeup(fc, &outarg);
847
848 err:
849         fuse_copy_finish(cs);
850         return err;
851 }
852
853 static int fuse_notify_inval_inode(struct fuse_conn *fc, unsigned int size,
854                                    struct fuse_copy_state *cs)
855 {
856         struct fuse_notify_inval_inode_out outarg;
857         int err = -EINVAL;
858
859         if (size != sizeof(outarg))
860                 goto err;
861
862         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
863         if (err)
864                 goto err;
865         fuse_copy_finish(cs);
866
867         down_read(&fc->killsb);
868         err = -ENOENT;
869         if (fc->sb) {
870                 err = fuse_reverse_inval_inode(fc->sb, outarg.ino,
871                                                outarg.off, outarg.len);
872         }
873         up_read(&fc->killsb);
874         return err;
875
876 err:
877         fuse_copy_finish(cs);
878         return err;
879 }
880
881 static int fuse_notify_inval_entry(struct fuse_conn *fc, unsigned int size,
882                                    struct fuse_copy_state *cs)
883 {
884         struct fuse_notify_inval_entry_out outarg;
885         int err = -ENOMEM;
886         char *buf;
887         struct qstr name;
888
889         buf = kzalloc(FUSE_NAME_MAX + 1, GFP_KERNEL);
890         if (!buf)
891                 goto err;
892
893         err = -EINVAL;
894         if (size < sizeof(outarg))
895                 goto err;
896
897         err = fuse_copy_one(cs, &outarg, sizeof(outarg));
898         if (err)
899                 goto err;
900
901         err = -ENAMETOOLONG;
902         if (outarg.namelen > FUSE_NAME_MAX)
903                 goto err;
904
905         name.name = buf;
906         name.len = outarg.namelen;
907         err = fuse_copy_one(cs, buf, outarg.namelen + 1);
908         if (err)
909                 goto err;
910         fuse_copy_finish(cs);
911         buf[outarg.namelen] = 0;
912         name.hash = full_name_hash(name.name, name.len);
913
914         down_read(&fc->killsb);
915         err = -ENOENT;
916         if (fc->sb)
917                 err = fuse_reverse_inval_entry(fc->sb, outarg.parent, &name);
918         up_read(&fc->killsb);
919         kfree(buf);
920         return err;
921
922 err:
923         kfree(buf);
924         fuse_copy_finish(cs);
925         return err;
926 }
927
928 static int fuse_notify(struct fuse_conn *fc, enum fuse_notify_code code,
929                        unsigned int size, struct fuse_copy_state *cs)
930 {
931         switch (code) {
932         case FUSE_NOTIFY_POLL:
933                 return fuse_notify_poll(fc, size, cs);
934
935         case FUSE_NOTIFY_INVAL_INODE:
936                 return fuse_notify_inval_inode(fc, size, cs);
937
938         case FUSE_NOTIFY_INVAL_ENTRY:
939                 return fuse_notify_inval_entry(fc, size, cs);
940
941         default:
942                 fuse_copy_finish(cs);
943                 return -EINVAL;
944         }
945 }
946
947 /* Look up request on processing list by unique ID */
948 static struct fuse_req *request_find(struct fuse_conn *fc, u64 unique)
949 {
950         struct list_head *entry;
951
952         list_for_each(entry, &fc->processing) {
953                 struct fuse_req *req;
954                 req = list_entry(entry, struct fuse_req, list);
955                 if (req->in.h.unique == unique || req->intr_unique == unique)
956                         return req;
957         }
958         return NULL;
959 }
960
961 static int copy_out_args(struct fuse_copy_state *cs, struct fuse_out *out,
962                          unsigned nbytes)
963 {
964         unsigned reqsize = sizeof(struct fuse_out_header);
965
966         if (out->h.error)
967                 return nbytes != reqsize ? -EINVAL : 0;
968
969         reqsize += len_args(out->numargs, out->args);
970
971         if (reqsize < nbytes || (reqsize > nbytes && !out->argvar))
972                 return -EINVAL;
973         else if (reqsize > nbytes) {
974                 struct fuse_arg *lastarg = &out->args[out->numargs-1];
975                 unsigned diffsize = reqsize - nbytes;
976                 if (diffsize > lastarg->size)
977                         return -EINVAL;
978                 lastarg->size -= diffsize;
979         }
980         return fuse_copy_args(cs, out->numargs, out->argpages, out->args,
981                               out->page_zeroing);
982 }
983
984 /*
985  * Write a single reply to a request.  First the header is copied from
986  * the write buffer.  The request is then searched on the processing
987  * list by the unique ID found in the header.  If found, then remove
988  * it from the list and copy the rest of the buffer to the request.
989  * The request is finished by calling request_end()
990  */
991 static ssize_t fuse_dev_write(struct kiocb *iocb, const struct iovec *iov,
992                                unsigned long nr_segs, loff_t pos)
993 {
994         int err;
995         size_t nbytes = iov_length(iov, nr_segs);
996         struct fuse_req *req;
997         struct fuse_out_header oh;
998         struct fuse_copy_state cs;
999         struct fuse_conn *fc = fuse_get_conn(iocb->ki_filp);
1000         if (!fc)
1001                 return -EPERM;
1002
1003         fuse_copy_init(&cs, fc, 0, NULL, iov, nr_segs);
1004         if (nbytes < sizeof(struct fuse_out_header))
1005                 return -EINVAL;
1006
1007         err = fuse_copy_one(&cs, &oh, sizeof(oh));
1008         if (err)
1009                 goto err_finish;
1010
1011         err = -EINVAL;
1012         if (oh.len != nbytes)
1013                 goto err_finish;
1014
1015         /*
1016          * Zero oh.unique indicates unsolicited notification message
1017          * and error contains notification code.
1018          */
1019         if (!oh.unique) {
1020                 err = fuse_notify(fc, oh.error, nbytes - sizeof(oh), &cs);
1021                 return err ? err : nbytes;
1022         }
1023
1024         err = -EINVAL;
1025         if (oh.error <= -1000 || oh.error > 0)
1026                 goto err_finish;
1027
1028         spin_lock(&fc->lock);
1029         err = -ENOENT;
1030         if (!fc->connected)
1031                 goto err_unlock;
1032
1033         req = request_find(fc, oh.unique);
1034         if (!req)
1035                 goto err_unlock;
1036
1037         if (req->aborted) {
1038                 spin_unlock(&fc->lock);
1039                 fuse_copy_finish(&cs);
1040                 spin_lock(&fc->lock);
1041                 request_end(fc, req);
1042                 return -ENOENT;
1043         }
1044         /* Is it an interrupt reply? */
1045         if (req->intr_unique == oh.unique) {
1046                 err = -EINVAL;
1047                 if (nbytes != sizeof(struct fuse_out_header))
1048                         goto err_unlock;
1049
1050                 if (oh.error == -ENOSYS)
1051                         fc->no_interrupt = 1;
1052                 else if (oh.error == -EAGAIN)
1053                         queue_interrupt(fc, req);
1054
1055                 spin_unlock(&fc->lock);
1056                 fuse_copy_finish(&cs);
1057                 return nbytes;
1058         }
1059
1060         req->state = FUSE_REQ_WRITING;
1061         list_move(&req->list, &fc->io);
1062         req->out.h = oh;
1063         req->locked = 1;
1064         cs.req = req;
1065         spin_unlock(&fc->lock);
1066
1067         err = copy_out_args(&cs, &req->out, nbytes);
1068         fuse_copy_finish(&cs);
1069
1070         spin_lock(&fc->lock);
1071         req->locked = 0;
1072         if (!err) {
1073                 if (req->aborted)
1074                         err = -ENOENT;
1075         } else if (!req->aborted)
1076                 req->out.h.error = -EIO;
1077         request_end(fc, req);
1078
1079         return err ? err : nbytes;
1080
1081  err_unlock:
1082         spin_unlock(&fc->lock);
1083  err_finish:
1084         fuse_copy_finish(&cs);
1085         return err;
1086 }
1087
1088 static unsigned fuse_dev_poll(struct file *file, poll_table *wait)
1089 {
1090         unsigned mask = POLLOUT | POLLWRNORM;
1091         struct fuse_conn *fc = fuse_get_conn(file);
1092         if (!fc)
1093                 return POLLERR;
1094
1095         poll_wait(file, &fc->waitq, wait);
1096
1097         spin_lock(&fc->lock);
1098         if (!fc->connected)
1099                 mask = POLLERR;
1100         else if (request_pending(fc))
1101                 mask |= POLLIN | POLLRDNORM;
1102         spin_unlock(&fc->lock);
1103
1104         return mask;
1105 }
1106
1107 /*
1108  * Abort all requests on the given list (pending or processing)
1109  *
1110  * This function releases and reacquires fc->lock
1111  */
1112 static void end_requests(struct fuse_conn *fc, struct list_head *head)
1113 __releases(&fc->lock)
1114 __acquires(&fc->lock)
1115 {
1116         while (!list_empty(head)) {
1117                 struct fuse_req *req;
1118                 req = list_entry(head->next, struct fuse_req, list);
1119                 req->out.h.error = -ECONNABORTED;
1120                 request_end(fc, req);
1121                 spin_lock(&fc->lock);
1122         }
1123 }
1124
1125 /*
1126  * Abort requests under I/O
1127  *
1128  * The requests are set to aborted and finished, and the request
1129  * waiter is woken up.  This will make request_wait_answer() wait
1130  * until the request is unlocked and then return.
1131  *
1132  * If the request is asynchronous, then the end function needs to be
1133  * called after waiting for the request to be unlocked (if it was
1134  * locked).
1135  */
1136 static void end_io_requests(struct fuse_conn *fc)
1137 __releases(&fc->lock)
1138 __acquires(&fc->lock)
1139 {
1140         while (!list_empty(&fc->io)) {
1141                 struct fuse_req *req =
1142                         list_entry(fc->io.next, struct fuse_req, list);
1143                 void (*end) (struct fuse_conn *, struct fuse_req *) = req->end;
1144
1145                 req->aborted = 1;
1146                 req->out.h.error = -ECONNABORTED;
1147                 req->state = FUSE_REQ_FINISHED;
1148                 list_del_init(&req->list);
1149                 wake_up(&req->waitq);
1150                 if (end) {
1151                         req->end = NULL;
1152                         __fuse_get_request(req);
1153                         spin_unlock(&fc->lock);
1154                         wait_event(req->waitq, !req->locked);
1155                         end(fc, req);
1156                         fuse_put_request(fc, req);
1157                         spin_lock(&fc->lock);
1158                 }
1159         }
1160 }
1161
1162 /*
1163  * Abort all requests.
1164  *
1165  * Emergency exit in case of a malicious or accidental deadlock, or
1166  * just a hung filesystem.
1167  *
1168  * The same effect is usually achievable through killing the
1169  * filesystem daemon and all users of the filesystem.  The exception
1170  * is the combination of an asynchronous request and the tricky
1171  * deadlock (see Documentation/filesystems/fuse.txt).
1172  *
1173  * During the aborting, progression of requests from the pending and
1174  * processing lists onto the io list, and progression of new requests
1175  * onto the pending list is prevented by req->connected being false.
1176  *
1177  * Progression of requests under I/O to the processing list is
1178  * prevented by the req->aborted flag being true for these requests.
1179  * For this reason requests on the io list must be aborted first.
1180  */
1181 void fuse_abort_conn(struct fuse_conn *fc)
1182 {
1183         spin_lock(&fc->lock);
1184         if (fc->connected) {
1185                 fc->connected = 0;
1186                 fc->blocked = 0;
1187                 end_io_requests(fc);
1188                 end_requests(fc, &fc->pending);
1189                 end_requests(fc, &fc->processing);
1190                 wake_up_all(&fc->waitq);
1191                 wake_up_all(&fc->blocked_waitq);
1192                 kill_fasync(&fc->fasync, SIGIO, POLL_IN);
1193         }
1194         spin_unlock(&fc->lock);
1195 }
1196 EXPORT_SYMBOL_GPL(fuse_abort_conn);
1197
1198 int fuse_dev_release(struct inode *inode, struct file *file)
1199 {
1200         struct fuse_conn *fc = fuse_get_conn(file);
1201         if (fc) {
1202                 spin_lock(&fc->lock);
1203                 fc->connected = 0;
1204                 end_requests(fc, &fc->pending);
1205                 end_requests(fc, &fc->processing);
1206                 spin_unlock(&fc->lock);
1207                 fuse_conn_put(fc);
1208         }
1209
1210         return 0;
1211 }
1212 EXPORT_SYMBOL_GPL(fuse_dev_release);
1213
1214 static int fuse_dev_fasync(int fd, struct file *file, int on)
1215 {
1216         struct fuse_conn *fc = fuse_get_conn(file);
1217         if (!fc)
1218                 return -EPERM;
1219
1220         /* No locking - fasync_helper does its own locking */
1221         return fasync_helper(fd, file, on, &fc->fasync);
1222 }
1223
1224 const struct file_operations fuse_dev_operations = {
1225         .owner          = THIS_MODULE,
1226         .llseek         = no_llseek,
1227         .read           = do_sync_read,
1228         .aio_read       = fuse_dev_read,
1229         .write          = do_sync_write,
1230         .aio_write      = fuse_dev_write,
1231         .poll           = fuse_dev_poll,
1232         .release        = fuse_dev_release,
1233         .fasync         = fuse_dev_fasync,
1234 };
1235 EXPORT_SYMBOL_GPL(fuse_dev_operations);
1236
1237 static struct miscdevice fuse_miscdevice = {
1238         .minor = FUSE_MINOR,
1239         .name  = "fuse",
1240         .fops = &fuse_dev_operations,
1241 };
1242
1243 int __init fuse_dev_init(void)
1244 {
1245         int err = -ENOMEM;
1246         fuse_req_cachep = kmem_cache_create("fuse_request",
1247                                             sizeof(struct fuse_req),
1248                                             0, 0, NULL);
1249         if (!fuse_req_cachep)
1250                 goto out;
1251
1252         err = misc_register(&fuse_miscdevice);
1253         if (err)
1254                 goto out_cache_clean;
1255
1256         return 0;
1257
1258  out_cache_clean:
1259         kmem_cache_destroy(fuse_req_cachep);
1260  out:
1261         return err;
1262 }
1263
1264 void fuse_dev_cleanup(void)
1265 {
1266         misc_deregister(&fuse_miscdevice);
1267         kmem_cache_destroy(fuse_req_cachep);
1268 }