]>
Commit | Line | Data |
---|---|---|
1da177e4 LT |
1 | /* |
2 | * linux/fs/file.c | |
3 | * | |
4 | * Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes | |
5 | * | |
6 | * Manage the dynamic fd arrays in the process files_struct. | |
7 | */ | |
8 | ||
9 | #include <linux/fs.h> | |
10 | #include <linux/mm.h> | |
11 | #include <linux/time.h> | |
12 | #include <linux/slab.h> | |
13 | #include <linux/vmalloc.h> | |
14 | #include <linux/file.h> | |
15 | #include <linux/bitops.h> | |
ab2af1f5 DS |
16 | #include <linux/interrupt.h> |
17 | #include <linux/spinlock.h> | |
18 | #include <linux/rcupdate.h> | |
19 | #include <linux/workqueue.h> | |
20 | ||
21 | struct fdtable_defer { | |
22 | spinlock_t lock; | |
23 | struct work_struct wq; | |
ab2af1f5 DS |
24 | struct fdtable *next; |
25 | }; | |
26 | ||
27 | /* | |
28 | * We use this list to defer free fdtables that have vmalloced | |
29 | * sets/arrays. By keeping a per-cpu list, we avoid having to embed | |
30 | * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in | |
31 | * this per-task structure. | |
32 | */ | |
33 | static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list); | |
1da177e4 LT |
34 | |
35 | ||
36 | /* | |
37 | * Allocate an fd array, using kmalloc or vmalloc. | |
38 | * Note: the array isn't cleared at allocation time. | |
39 | */ | |
40 | struct file ** alloc_fd_array(int num) | |
41 | { | |
42 | struct file **new_fds; | |
43 | int size = num * sizeof(struct file *); | |
44 | ||
45 | if (size <= PAGE_SIZE) | |
46 | new_fds = (struct file **) kmalloc(size, GFP_KERNEL); | |
47 | else | |
48 | new_fds = (struct file **) vmalloc(size); | |
49 | return new_fds; | |
50 | } | |
51 | ||
52 | void free_fd_array(struct file **array, int num) | |
53 | { | |
54 | int size = num * sizeof(struct file *); | |
55 | ||
56 | if (!array) { | |
57 | printk (KERN_ERR "free_fd_array: array = 0 (num = %d)\n", num); | |
58 | return; | |
59 | } | |
60 | ||
61 | if (num <= NR_OPEN_DEFAULT) /* Don't free the embedded fd array! */ | |
62 | return; | |
63 | else if (size <= PAGE_SIZE) | |
64 | kfree(array); | |
65 | else | |
66 | vfree(array); | |
67 | } | |
68 | ||
ab2af1f5 | 69 | static void __free_fdtable(struct fdtable *fdt) |
1da177e4 | 70 | { |
bbea9f69 VL |
71 | free_fdset(fdt->open_fds, fdt->max_fds); |
72 | free_fdset(fdt->close_on_exec, fdt->max_fds); | |
0b175a7e | 73 | free_fd_array(fdt->fd, fdt->max_fds); |
ab2af1f5 DS |
74 | kfree(fdt); |
75 | } | |
1da177e4 | 76 | |
65f27f38 | 77 | static void free_fdtable_work(struct work_struct *work) |
ab2af1f5 | 78 | { |
65f27f38 DH |
79 | struct fdtable_defer *f = |
80 | container_of(work, struct fdtable_defer, wq); | |
ab2af1f5 | 81 | struct fdtable *fdt; |
1da177e4 | 82 | |
ab2af1f5 DS |
83 | spin_lock_bh(&f->lock); |
84 | fdt = f->next; | |
85 | f->next = NULL; | |
86 | spin_unlock_bh(&f->lock); | |
87 | while(fdt) { | |
88 | struct fdtable *next = fdt->next; | |
89 | __free_fdtable(fdt); | |
90 | fdt = next; | |
91 | } | |
92 | } | |
1da177e4 | 93 | |
ab2af1f5 DS |
94 | static void free_fdtable_rcu(struct rcu_head *rcu) |
95 | { | |
96 | struct fdtable *fdt = container_of(rcu, struct fdtable, rcu); | |
97 | int fdset_size, fdarray_size; | |
98 | struct fdtable_defer *fddef; | |
1da177e4 | 99 | |
ab2af1f5 | 100 | BUG_ON(!fdt); |
bbea9f69 | 101 | fdset_size = fdt->max_fds / 8; |
ab2af1f5 DS |
102 | fdarray_size = fdt->max_fds * sizeof(struct file *); |
103 | ||
104 | if (fdt->free_files) { | |
105 | /* | |
106 | * The this fdtable was embedded in the files structure | |
107 | * and the files structure itself was getting destroyed. | |
108 | * It is now safe to free the files structure. | |
109 | */ | |
110 | kmem_cache_free(files_cachep, fdt->free_files); | |
111 | return; | |
112 | } | |
bbea9f69 | 113 | if (fdt->max_fds <= NR_OPEN_DEFAULT) |
ab2af1f5 DS |
114 | /* |
115 | * The fdtable was embedded | |
116 | */ | |
117 | return; | |
ab2af1f5 DS |
118 | if (fdset_size <= PAGE_SIZE && fdarray_size <= PAGE_SIZE) { |
119 | kfree(fdt->open_fds); | |
120 | kfree(fdt->close_on_exec); | |
121 | kfree(fdt->fd); | |
122 | kfree(fdt); | |
1da177e4 | 123 | } else { |
ab2af1f5 DS |
124 | fddef = &get_cpu_var(fdtable_defer_list); |
125 | spin_lock(&fddef->lock); | |
126 | fdt->next = fddef->next; | |
127 | fddef->next = fdt; | |
593be07a TH |
128 | /* vmallocs are handled from the workqueue context */ |
129 | schedule_work(&fddef->wq); | |
ab2af1f5 DS |
130 | spin_unlock(&fddef->lock); |
131 | put_cpu_var(fdtable_defer_list); | |
1da177e4 | 132 | } |
ab2af1f5 DS |
133 | } |
134 | ||
135 | void free_fdtable(struct fdtable *fdt) | |
136 | { | |
bbea9f69 | 137 | if (fdt->free_files || fdt->max_fds > NR_OPEN_DEFAULT) |
ab2af1f5 DS |
138 | call_rcu(&fdt->rcu, free_fdtable_rcu); |
139 | } | |
140 | ||
141 | /* | |
142 | * Expand the fdset in the files_struct. Called with the files spinlock | |
143 | * held for write. | |
144 | */ | |
145 | static void copy_fdtable(struct fdtable *nfdt, struct fdtable *fdt) | |
146 | { | |
147 | int i; | |
148 | int count; | |
149 | ||
ab2af1f5 DS |
150 | BUG_ON(nfdt->max_fds < fdt->max_fds); |
151 | /* Copy the existing tables and install the new pointers */ | |
152 | ||
bbea9f69 VL |
153 | i = fdt->max_fds / (sizeof(unsigned long) * 8); |
154 | count = (nfdt->max_fds - fdt->max_fds) / 8; | |
ab2af1f5 DS |
155 | |
156 | /* | |
157 | * Don't copy the entire array if the current fdset is | |
158 | * not yet initialised. | |
159 | */ | |
160 | if (i) { | |
161 | memcpy (nfdt->open_fds, fdt->open_fds, | |
bbea9f69 | 162 | fdt->max_fds/8); |
ab2af1f5 | 163 | memcpy (nfdt->close_on_exec, fdt->close_on_exec, |
bbea9f69 | 164 | fdt->max_fds/8); |
ab2af1f5 DS |
165 | memset (&nfdt->open_fds->fds_bits[i], 0, count); |
166 | memset (&nfdt->close_on_exec->fds_bits[i], 0, count); | |
167 | } | |
168 | ||
169 | /* Don't copy/clear the array if we are creating a new | |
170 | fd array for fork() */ | |
171 | if (fdt->max_fds) { | |
172 | memcpy(nfdt->fd, fdt->fd, | |
173 | fdt->max_fds * sizeof(struct file *)); | |
174 | /* clear the remainder of the array */ | |
175 | memset(&nfdt->fd[fdt->max_fds], 0, | |
176 | (nfdt->max_fds - fdt->max_fds) * | |
177 | sizeof(struct file *)); | |
178 | } | |
1da177e4 LT |
179 | } |
180 | ||
181 | /* | |
182 | * Allocate an fdset array, using kmalloc or vmalloc. | |
183 | * Note: the array isn't cleared at allocation time. | |
184 | */ | |
185 | fd_set * alloc_fdset(int num) | |
186 | { | |
187 | fd_set *new_fdset; | |
188 | int size = num / 8; | |
189 | ||
190 | if (size <= PAGE_SIZE) | |
191 | new_fdset = (fd_set *) kmalloc(size, GFP_KERNEL); | |
192 | else | |
193 | new_fdset = (fd_set *) vmalloc(size); | |
194 | return new_fdset; | |
195 | } | |
196 | ||
197 | void free_fdset(fd_set *array, int num) | |
198 | { | |
bbea9f69 | 199 | if (num <= NR_OPEN_DEFAULT) /* Don't free an embedded fdset */ |
1da177e4 | 200 | return; |
0c9e63fd | 201 | else if (num <= 8 * PAGE_SIZE) |
1da177e4 LT |
202 | kfree(array); |
203 | else | |
204 | vfree(array); | |
205 | } | |
206 | ||
ab2af1f5 | 207 | static struct fdtable *alloc_fdtable(int nr) |
1da177e4 | 208 | { |
ab2af1f5 DS |
209 | struct fdtable *fdt = NULL; |
210 | int nfds = 0; | |
211 | fd_set *new_openset = NULL, *new_execset = NULL; | |
212 | struct file **new_fds; | |
1da177e4 | 213 | |
0c9e63fd | 214 | fdt = kzalloc(sizeof(*fdt), GFP_KERNEL); |
ab2af1f5 DS |
215 | if (!fdt) |
216 | goto out; | |
1da177e4 | 217 | |
ab2af1f5 DS |
218 | nfds = NR_OPEN_DEFAULT; |
219 | /* | |
220 | * Expand to the max in easy steps, and keep expanding it until | |
221 | * we have enough for the requested fd array size. | |
222 | */ | |
223 | do { | |
224 | #if NR_OPEN_DEFAULT < 256 | |
225 | if (nfds < 256) | |
226 | nfds = 256; | |
227 | else | |
228 | #endif | |
229 | if (nfds < (PAGE_SIZE / sizeof(struct file *))) | |
230 | nfds = PAGE_SIZE / sizeof(struct file *); | |
231 | else { | |
232 | nfds = nfds * 2; | |
233 | if (nfds > NR_OPEN) | |
234 | nfds = NR_OPEN; | |
235 | } | |
236 | } while (nfds <= nr); | |
bbea9f69 VL |
237 | |
238 | new_openset = alloc_fdset(nfds); | |
239 | new_execset = alloc_fdset(nfds); | |
240 | if (!new_openset || !new_execset) | |
241 | goto out; | |
242 | fdt->open_fds = new_openset; | |
243 | fdt->close_on_exec = new_execset; | |
244 | ||
ab2af1f5 DS |
245 | new_fds = alloc_fd_array(nfds); |
246 | if (!new_fds) | |
bbea9f69 | 247 | goto out; |
ab2af1f5 DS |
248 | fdt->fd = new_fds; |
249 | fdt->max_fds = nfds; | |
250 | fdt->free_files = NULL; | |
251 | return fdt; | |
252 | out: | |
8b0e330b AM |
253 | free_fdset(new_openset, nfds); |
254 | free_fdset(new_execset, nfds); | |
ab2af1f5 DS |
255 | kfree(fdt); |
256 | return NULL; | |
257 | } | |
1da177e4 | 258 | |
ab2af1f5 | 259 | /* |
74d392aa VL |
260 | * Expand the file descriptor table. |
261 | * This function will allocate a new fdtable and both fd array and fdset, of | |
262 | * the given size. | |
263 | * Return <0 error code on error; 1 on successful completion. | |
264 | * The files->file_lock should be held on entry, and will be held on exit. | |
ab2af1f5 DS |
265 | */ |
266 | static int expand_fdtable(struct files_struct *files, int nr) | |
267 | __releases(files->file_lock) | |
268 | __acquires(files->file_lock) | |
269 | { | |
74d392aa | 270 | struct fdtable *new_fdt, *cur_fdt; |
ab2af1f5 DS |
271 | |
272 | spin_unlock(&files->file_lock); | |
74d392aa | 273 | new_fdt = alloc_fdtable(nr); |
ab2af1f5 | 274 | spin_lock(&files->file_lock); |
74d392aa VL |
275 | if (!new_fdt) |
276 | return -ENOMEM; | |
ab2af1f5 | 277 | /* |
74d392aa VL |
278 | * Check again since another task may have expanded the fd table while |
279 | * we dropped the lock | |
ab2af1f5 | 280 | */ |
74d392aa | 281 | cur_fdt = files_fdtable(files); |
bbea9f69 | 282 | if (nr >= cur_fdt->max_fds) { |
74d392aa VL |
283 | /* Continue as planned */ |
284 | copy_fdtable(new_fdt, cur_fdt); | |
285 | rcu_assign_pointer(files->fdt, new_fdt); | |
286 | free_fdtable(cur_fdt); | |
ab2af1f5 | 287 | } else { |
74d392aa | 288 | /* Somebody else expanded, so undo our attempt */ |
74d392aa | 289 | __free_fdtable(new_fdt); |
ab2af1f5 | 290 | } |
74d392aa | 291 | return 1; |
1da177e4 LT |
292 | } |
293 | ||
294 | /* | |
295 | * Expand files. | |
74d392aa VL |
296 | * This function will expand the file structures, if the requested size exceeds |
297 | * the current capacity and there is room for expansion. | |
298 | * Return <0 error code on error; 0 when nothing done; 1 when files were | |
299 | * expanded and execution may have blocked. | |
300 | * The files->file_lock should be held on entry, and will be held on exit. | |
1da177e4 LT |
301 | */ |
302 | int expand_files(struct files_struct *files, int nr) | |
303 | { | |
badf1662 | 304 | struct fdtable *fdt; |
1da177e4 | 305 | |
badf1662 | 306 | fdt = files_fdtable(files); |
74d392aa | 307 | /* Do we need to expand? */ |
bbea9f69 | 308 | if (nr < fdt->max_fds) |
74d392aa VL |
309 | return 0; |
310 | /* Can we expand? */ | |
bbea9f69 | 311 | if (nr >= NR_OPEN) |
74d392aa VL |
312 | return -EMFILE; |
313 | ||
314 | /* All good, so we try */ | |
315 | return expand_fdtable(files, nr); | |
1da177e4 | 316 | } |
ab2af1f5 DS |
317 | |
318 | static void __devinit fdtable_defer_list_init(int cpu) | |
319 | { | |
320 | struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu); | |
321 | spin_lock_init(&fddef->lock); | |
65f27f38 | 322 | INIT_WORK(&fddef->wq, free_fdtable_work); |
ab2af1f5 DS |
323 | fddef->next = NULL; |
324 | } | |
325 | ||
326 | void __init files_defer_init(void) | |
327 | { | |
328 | int i; | |
0a945022 | 329 | for_each_possible_cpu(i) |
ab2af1f5 DS |
330 | fdtable_defer_list_init(i); |
331 | } |