]>
Commit | Line | Data |
---|---|---|
1 | /* | |
2 | * Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved. | |
3 | * | |
4 | * This copyrighted material is made available to anyone wishing to use, | |
5 | * modify, copy, or redistribute it subject to the terms and conditions | |
6 | * of the GNU General Public License version 2. | |
7 | */ | |
8 | ||
9 | #include <linux/fs.h> | |
10 | #include <linux/miscdevice.h> | |
11 | #include <linux/poll.h> | |
12 | #include <linux/dlm.h> | |
13 | #include <linux/dlm_plock.h> | |
14 | #include <linux/slab.h> | |
15 | ||
16 | #include "dlm_internal.h" | |
17 | #include "lockspace.h" | |
18 | ||
19 | static spinlock_t ops_lock; | |
20 | static struct list_head send_list; | |
21 | static struct list_head recv_list; | |
22 | static wait_queue_head_t send_wq; | |
23 | static wait_queue_head_t recv_wq; | |
24 | ||
25 | struct plock_op { | |
26 | struct list_head list; | |
27 | int done; | |
28 | struct dlm_plock_info info; | |
29 | }; | |
30 | ||
31 | struct plock_xop { | |
32 | struct plock_op xop; | |
33 | void *callback; | |
34 | void *fl; | |
35 | void *file; | |
36 | struct file_lock flc; | |
37 | }; | |
38 | ||
39 | ||
40 | static inline void set_version(struct dlm_plock_info *info) | |
41 | { | |
42 | info->version[0] = DLM_PLOCK_VERSION_MAJOR; | |
43 | info->version[1] = DLM_PLOCK_VERSION_MINOR; | |
44 | info->version[2] = DLM_PLOCK_VERSION_PATCH; | |
45 | } | |
46 | ||
47 | static int check_version(struct dlm_plock_info *info) | |
48 | { | |
49 | if ((DLM_PLOCK_VERSION_MAJOR != info->version[0]) || | |
50 | (DLM_PLOCK_VERSION_MINOR < info->version[1])) { | |
51 | log_print("plock device version mismatch: " | |
52 | "kernel (%u.%u.%u), user (%u.%u.%u)", | |
53 | DLM_PLOCK_VERSION_MAJOR, | |
54 | DLM_PLOCK_VERSION_MINOR, | |
55 | DLM_PLOCK_VERSION_PATCH, | |
56 | info->version[0], | |
57 | info->version[1], | |
58 | info->version[2]); | |
59 | return -EINVAL; | |
60 | } | |
61 | return 0; | |
62 | } | |
63 | ||
64 | static void send_op(struct plock_op *op) | |
65 | { | |
66 | set_version(&op->info); | |
67 | INIT_LIST_HEAD(&op->list); | |
68 | spin_lock(&ops_lock); | |
69 | list_add_tail(&op->list, &send_list); | |
70 | spin_unlock(&ops_lock); | |
71 | wake_up(&send_wq); | |
72 | } | |
73 | ||
74 | int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | |
75 | int cmd, struct file_lock *fl) | |
76 | { | |
77 | struct dlm_ls *ls; | |
78 | struct plock_op *op; | |
79 | struct plock_xop *xop; | |
80 | int rv; | |
81 | ||
82 | ls = dlm_find_lockspace_local(lockspace); | |
83 | if (!ls) | |
84 | return -EINVAL; | |
85 | ||
86 | xop = kzalloc(sizeof(*xop), GFP_NOFS); | |
87 | if (!xop) { | |
88 | rv = -ENOMEM; | |
89 | goto out; | |
90 | } | |
91 | ||
92 | op = &xop->xop; | |
93 | op->info.optype = DLM_PLOCK_OP_LOCK; | |
94 | op->info.pid = fl->fl_pid; | |
95 | op->info.ex = (fl->fl_type == F_WRLCK); | |
96 | op->info.wait = IS_SETLKW(cmd); | |
97 | op->info.fsid = ls->ls_global_id; | |
98 | op->info.number = number; | |
99 | op->info.start = fl->fl_start; | |
100 | op->info.end = fl->fl_end; | |
101 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) { | |
102 | /* fl_owner is lockd which doesn't distinguish | |
103 | processes on the nfs client */ | |
104 | op->info.owner = (__u64) fl->fl_pid; | |
105 | xop->callback = fl->fl_lmops->fl_grant; | |
106 | locks_init_lock(&xop->flc); | |
107 | locks_copy_lock(&xop->flc, fl); | |
108 | xop->fl = fl; | |
109 | xop->file = file; | |
110 | } else { | |
111 | op->info.owner = (__u64)(long) fl->fl_owner; | |
112 | xop->callback = NULL; | |
113 | } | |
114 | ||
115 | send_op(op); | |
116 | ||
117 | if (xop->callback == NULL) | |
118 | wait_event(recv_wq, (op->done != 0)); | |
119 | else { | |
120 | rv = FILE_LOCK_DEFERRED; | |
121 | goto out; | |
122 | } | |
123 | ||
124 | spin_lock(&ops_lock); | |
125 | if (!list_empty(&op->list)) { | |
126 | log_error(ls, "dlm_posix_lock: op on list %llx", | |
127 | (unsigned long long)number); | |
128 | list_del(&op->list); | |
129 | } | |
130 | spin_unlock(&ops_lock); | |
131 | ||
132 | rv = op->info.rv; | |
133 | ||
134 | if (!rv) { | |
135 | if (posix_lock_file_wait(file, fl) < 0) | |
136 | log_error(ls, "dlm_posix_lock: vfs lock error %llx", | |
137 | (unsigned long long)number); | |
138 | } | |
139 | ||
140 | kfree(xop); | |
141 | out: | |
142 | dlm_put_lockspace(ls); | |
143 | return rv; | |
144 | } | |
145 | EXPORT_SYMBOL_GPL(dlm_posix_lock); | |
146 | ||
147 | /* Returns failure iff a successful lock operation should be canceled */ | |
148 | static int dlm_plock_callback(struct plock_op *op) | |
149 | { | |
150 | struct file *file; | |
151 | struct file_lock *fl; | |
152 | struct file_lock *flc; | |
153 | int (*notify)(void *, void *, int) = NULL; | |
154 | struct plock_xop *xop = (struct plock_xop *)op; | |
155 | int rv = 0; | |
156 | ||
157 | spin_lock(&ops_lock); | |
158 | if (!list_empty(&op->list)) { | |
159 | log_print("dlm_plock_callback: op on list %llx", | |
160 | (unsigned long long)op->info.number); | |
161 | list_del(&op->list); | |
162 | } | |
163 | spin_unlock(&ops_lock); | |
164 | ||
165 | /* check if the following 2 are still valid or make a copy */ | |
166 | file = xop->file; | |
167 | flc = &xop->flc; | |
168 | fl = xop->fl; | |
169 | notify = xop->callback; | |
170 | ||
171 | if (op->info.rv) { | |
172 | notify(fl, NULL, op->info.rv); | |
173 | goto out; | |
174 | } | |
175 | ||
176 | /* got fs lock; bookkeep locally as well: */ | |
177 | flc->fl_flags &= ~FL_SLEEP; | |
178 | if (posix_lock_file(file, flc, NULL)) { | |
179 | /* | |
180 | * This can only happen in the case of kmalloc() failure. | |
181 | * The filesystem's own lock is the authoritative lock, | |
182 | * so a failure to get the lock locally is not a disaster. | |
183 | * As long as the fs cannot reliably cancel locks (especially | |
184 | * in a low-memory situation), we're better off ignoring | |
185 | * this failure than trying to recover. | |
186 | */ | |
187 | log_print("dlm_plock_callback: vfs lock error %llx file %p fl %p", | |
188 | (unsigned long long)op->info.number, file, fl); | |
189 | } | |
190 | ||
191 | rv = notify(fl, NULL, 0); | |
192 | if (rv) { | |
193 | /* XXX: We need to cancel the fs lock here: */ | |
194 | log_print("dlm_plock_callback: lock granted after lock request " | |
195 | "failed; dangling lock!\n"); | |
196 | goto out; | |
197 | } | |
198 | ||
199 | out: | |
200 | kfree(xop); | |
201 | return rv; | |
202 | } | |
203 | ||
204 | int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file, | |
205 | struct file_lock *fl) | |
206 | { | |
207 | struct dlm_ls *ls; | |
208 | struct plock_op *op; | |
209 | int rv; | |
210 | ||
211 | ls = dlm_find_lockspace_local(lockspace); | |
212 | if (!ls) | |
213 | return -EINVAL; | |
214 | ||
215 | op = kzalloc(sizeof(*op), GFP_NOFS); | |
216 | if (!op) { | |
217 | rv = -ENOMEM; | |
218 | goto out; | |
219 | } | |
220 | ||
221 | if (posix_lock_file_wait(file, fl) < 0) | |
222 | log_error(ls, "dlm_posix_unlock: vfs unlock error %llx", | |
223 | (unsigned long long)number); | |
224 | ||
225 | op->info.optype = DLM_PLOCK_OP_UNLOCK; | |
226 | op->info.pid = fl->fl_pid; | |
227 | op->info.fsid = ls->ls_global_id; | |
228 | op->info.number = number; | |
229 | op->info.start = fl->fl_start; | |
230 | op->info.end = fl->fl_end; | |
231 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) | |
232 | op->info.owner = (__u64) fl->fl_pid; | |
233 | else | |
234 | op->info.owner = (__u64)(long) fl->fl_owner; | |
235 | ||
236 | send_op(op); | |
237 | wait_event(recv_wq, (op->done != 0)); | |
238 | ||
239 | spin_lock(&ops_lock); | |
240 | if (!list_empty(&op->list)) { | |
241 | log_error(ls, "dlm_posix_unlock: op on list %llx", | |
242 | (unsigned long long)number); | |
243 | list_del(&op->list); | |
244 | } | |
245 | spin_unlock(&ops_lock); | |
246 | ||
247 | rv = op->info.rv; | |
248 | ||
249 | if (rv == -ENOENT) | |
250 | rv = 0; | |
251 | ||
252 | kfree(op); | |
253 | out: | |
254 | dlm_put_lockspace(ls); | |
255 | return rv; | |
256 | } | |
257 | EXPORT_SYMBOL_GPL(dlm_posix_unlock); | |
258 | ||
259 | int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file, | |
260 | struct file_lock *fl) | |
261 | { | |
262 | struct dlm_ls *ls; | |
263 | struct plock_op *op; | |
264 | int rv; | |
265 | ||
266 | ls = dlm_find_lockspace_local(lockspace); | |
267 | if (!ls) | |
268 | return -EINVAL; | |
269 | ||
270 | op = kzalloc(sizeof(*op), GFP_NOFS); | |
271 | if (!op) { | |
272 | rv = -ENOMEM; | |
273 | goto out; | |
274 | } | |
275 | ||
276 | op->info.optype = DLM_PLOCK_OP_GET; | |
277 | op->info.pid = fl->fl_pid; | |
278 | op->info.ex = (fl->fl_type == F_WRLCK); | |
279 | op->info.fsid = ls->ls_global_id; | |
280 | op->info.number = number; | |
281 | op->info.start = fl->fl_start; | |
282 | op->info.end = fl->fl_end; | |
283 | if (fl->fl_lmops && fl->fl_lmops->fl_grant) | |
284 | op->info.owner = (__u64) fl->fl_pid; | |
285 | else | |
286 | op->info.owner = (__u64)(long) fl->fl_owner; | |
287 | ||
288 | send_op(op); | |
289 | wait_event(recv_wq, (op->done != 0)); | |
290 | ||
291 | spin_lock(&ops_lock); | |
292 | if (!list_empty(&op->list)) { | |
293 | log_error(ls, "dlm_posix_get: op on list %llx", | |
294 | (unsigned long long)number); | |
295 | list_del(&op->list); | |
296 | } | |
297 | spin_unlock(&ops_lock); | |
298 | ||
299 | /* info.rv from userspace is 1 for conflict, 0 for no-conflict, | |
300 | -ENOENT if there are no locks on the file */ | |
301 | ||
302 | rv = op->info.rv; | |
303 | ||
304 | fl->fl_type = F_UNLCK; | |
305 | if (rv == -ENOENT) | |
306 | rv = 0; | |
307 | else if (rv > 0) { | |
308 | locks_init_lock(fl); | |
309 | fl->fl_type = (op->info.ex) ? F_WRLCK : F_RDLCK; | |
310 | fl->fl_flags = FL_POSIX; | |
311 | fl->fl_pid = op->info.pid; | |
312 | fl->fl_start = op->info.start; | |
313 | fl->fl_end = op->info.end; | |
314 | rv = 0; | |
315 | } | |
316 | ||
317 | kfree(op); | |
318 | out: | |
319 | dlm_put_lockspace(ls); | |
320 | return rv; | |
321 | } | |
322 | EXPORT_SYMBOL_GPL(dlm_posix_get); | |
323 | ||
324 | /* a read copies out one plock request from the send list */ | |
325 | static ssize_t dev_read(struct file *file, char __user *u, size_t count, | |
326 | loff_t *ppos) | |
327 | { | |
328 | struct dlm_plock_info info; | |
329 | struct plock_op *op = NULL; | |
330 | ||
331 | if (count < sizeof(info)) | |
332 | return -EINVAL; | |
333 | ||
334 | spin_lock(&ops_lock); | |
335 | if (!list_empty(&send_list)) { | |
336 | op = list_entry(send_list.next, struct plock_op, list); | |
337 | list_move(&op->list, &recv_list); | |
338 | memcpy(&info, &op->info, sizeof(info)); | |
339 | } | |
340 | spin_unlock(&ops_lock); | |
341 | ||
342 | if (!op) | |
343 | return -EAGAIN; | |
344 | ||
345 | if (copy_to_user(u, &info, sizeof(info))) | |
346 | return -EFAULT; | |
347 | return sizeof(info); | |
348 | } | |
349 | ||
350 | /* a write copies in one plock result that should match a plock_op | |
351 | on the recv list */ | |
352 | static ssize_t dev_write(struct file *file, const char __user *u, size_t count, | |
353 | loff_t *ppos) | |
354 | { | |
355 | struct dlm_plock_info info; | |
356 | struct plock_op *op; | |
357 | int found = 0, do_callback = 0; | |
358 | ||
359 | if (count != sizeof(info)) | |
360 | return -EINVAL; | |
361 | ||
362 | if (copy_from_user(&info, u, sizeof(info))) | |
363 | return -EFAULT; | |
364 | ||
365 | if (check_version(&info)) | |
366 | return -EINVAL; | |
367 | ||
368 | spin_lock(&ops_lock); | |
369 | list_for_each_entry(op, &recv_list, list) { | |
370 | if (op->info.fsid == info.fsid && | |
371 | op->info.number == info.number && | |
372 | op->info.owner == info.owner) { | |
373 | struct plock_xop *xop = (struct plock_xop *)op; | |
374 | list_del_init(&op->list); | |
375 | memcpy(&op->info, &info, sizeof(info)); | |
376 | if (xop->callback) | |
377 | do_callback = 1; | |
378 | else | |
379 | op->done = 1; | |
380 | found = 1; | |
381 | break; | |
382 | } | |
383 | } | |
384 | spin_unlock(&ops_lock); | |
385 | ||
386 | if (found) { | |
387 | if (do_callback) | |
388 | dlm_plock_callback(op); | |
389 | else | |
390 | wake_up(&recv_wq); | |
391 | } else | |
392 | log_print("dev_write no op %x %llx", info.fsid, | |
393 | (unsigned long long)info.number); | |
394 | return count; | |
395 | } | |
396 | ||
397 | static unsigned int dev_poll(struct file *file, poll_table *wait) | |
398 | { | |
399 | unsigned int mask = 0; | |
400 | ||
401 | poll_wait(file, &send_wq, wait); | |
402 | ||
403 | spin_lock(&ops_lock); | |
404 | if (!list_empty(&send_list)) | |
405 | mask = POLLIN | POLLRDNORM; | |
406 | spin_unlock(&ops_lock); | |
407 | ||
408 | return mask; | |
409 | } | |
410 | ||
411 | static const struct file_operations dev_fops = { | |
412 | .read = dev_read, | |
413 | .write = dev_write, | |
414 | .poll = dev_poll, | |
415 | .owner = THIS_MODULE, | |
416 | .llseek = noop_llseek, | |
417 | }; | |
418 | ||
419 | static struct miscdevice plock_dev_misc = { | |
420 | .minor = MISC_DYNAMIC_MINOR, | |
421 | .name = DLM_PLOCK_MISC_NAME, | |
422 | .fops = &dev_fops | |
423 | }; | |
424 | ||
425 | int dlm_plock_init(void) | |
426 | { | |
427 | int rv; | |
428 | ||
429 | spin_lock_init(&ops_lock); | |
430 | INIT_LIST_HEAD(&send_list); | |
431 | INIT_LIST_HEAD(&recv_list); | |
432 | init_waitqueue_head(&send_wq); | |
433 | init_waitqueue_head(&recv_wq); | |
434 | ||
435 | rv = misc_register(&plock_dev_misc); | |
436 | if (rv) | |
437 | log_print("dlm_plock_init: misc_register failed %d", rv); | |
438 | return rv; | |
439 | } | |
440 | ||
441 | void dlm_plock_exit(void) | |
442 | { | |
443 | if (misc_deregister(&plock_dev_misc) < 0) | |
444 | log_print("dlm_plock_exit: misc_deregister failed"); | |
445 | } | |
446 |