]> bbs.cooldavid.org Git - net-next-2.6.git/blame - fs/jffs2/gc.c
[JFFS2][XATTR] Remove 'struct list_head ilist' from jffs2_inode_cache.
[net-next-2.6.git] / fs / jffs2 / gc.c
CommitLineData
1da177e4
LT
1/*
2 * JFFS2 -- Journalling Flash File System, Version 2.
3 *
4 * Copyright (C) 2001-2003 Red Hat, Inc.
5 *
6 * Created by David Woodhouse <dwmw2@infradead.org>
7 *
8 * For licensing information, see the file 'LICENCE' in this directory.
9 *
182ec4ee 10 * $Id: gc.c,v 1.155 2005/11/07 11:14:39 gleixner Exp $
1da177e4
LT
11 *
12 */
13
14#include <linux/kernel.h>
15#include <linux/mtd/mtd.h>
16#include <linux/slab.h>
17#include <linux/pagemap.h>
18#include <linux/crc32.h>
19#include <linux/compiler.h>
20#include <linux/stat.h>
21#include "nodelist.h"
22#include "compr.h"
23
182ec4ee 24static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
1da177e4
LT
25 struct jffs2_inode_cache *ic,
26 struct jffs2_raw_node_ref *raw);
182ec4ee 27static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1da177e4 28 struct jffs2_inode_info *f, struct jffs2_full_dnode *fd);
182ec4ee 29static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1da177e4 30 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
182ec4ee 31static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1da177e4
LT
32 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd);
33static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
34 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
35 uint32_t start, uint32_t end);
36static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
37 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
38 uint32_t start, uint32_t end);
39static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
40 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f);
41
42/* Called with erase_completion_lock held */
43static struct jffs2_eraseblock *jffs2_find_gc_block(struct jffs2_sb_info *c)
44{
45 struct jffs2_eraseblock *ret;
46 struct list_head *nextlist = NULL;
47 int n = jiffies % 128;
48
49 /* Pick an eraseblock to garbage collect next. This is where we'll
50 put the clever wear-levelling algorithms. Eventually. */
51 /* We possibly want to favour the dirtier blocks more when the
52 number of free blocks is low. */
a42163d7 53again:
1da177e4
LT
54 if (!list_empty(&c->bad_used_list) && c->nr_free_blocks > c->resv_blocks_gcbad) {
55 D1(printk(KERN_DEBUG "Picking block from bad_used_list to GC next\n"));
56 nextlist = &c->bad_used_list;
57 } else if (n < 50 && !list_empty(&c->erasable_list)) {
182ec4ee 58 /* Note that most of them will have gone directly to be erased.
1da177e4
LT
59 So don't favour the erasable_list _too_ much. */
60 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next\n"));
61 nextlist = &c->erasable_list;
62 } else if (n < 110 && !list_empty(&c->very_dirty_list)) {
63 /* Most of the time, pick one off the very_dirty list */
64 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next\n"));
65 nextlist = &c->very_dirty_list;
66 } else if (n < 126 && !list_empty(&c->dirty_list)) {
67 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next\n"));
68 nextlist = &c->dirty_list;
69 } else if (!list_empty(&c->clean_list)) {
70 D1(printk(KERN_DEBUG "Picking block from clean_list to GC next\n"));
71 nextlist = &c->clean_list;
72 } else if (!list_empty(&c->dirty_list)) {
73 D1(printk(KERN_DEBUG "Picking block from dirty_list to GC next (clean_list was empty)\n"));
74
75 nextlist = &c->dirty_list;
76 } else if (!list_empty(&c->very_dirty_list)) {
77 D1(printk(KERN_DEBUG "Picking block from very_dirty_list to GC next (clean_list and dirty_list were empty)\n"));
78 nextlist = &c->very_dirty_list;
79 } else if (!list_empty(&c->erasable_list)) {
80 D1(printk(KERN_DEBUG "Picking block from erasable_list to GC next (clean_list and {very_,}dirty_list were empty)\n"));
81
82 nextlist = &c->erasable_list;
a42163d7
AB
83 } else if (!list_empty(&c->erasable_pending_wbuf_list)) {
84 /* There are blocks are wating for the wbuf sync */
85 D1(printk(KERN_DEBUG "Synching wbuf in order to reuse erasable_pending_wbuf_list blocks\n"));
3cceb9f6 86 spin_unlock(&c->erase_completion_lock);
a42163d7 87 jffs2_flush_wbuf_pad(c);
3cceb9f6 88 spin_lock(&c->erase_completion_lock);
a42163d7 89 goto again;
1da177e4
LT
90 } else {
91 /* Eep. All were empty */
92 D1(printk(KERN_NOTICE "jffs2: No clean, dirty _or_ erasable blocks to GC from! Where are they all?\n"));
93 return NULL;
94 }
95
96 ret = list_entry(nextlist->next, struct jffs2_eraseblock, list);
97 list_del(&ret->list);
98 c->gcblock = ret;
99 ret->gc_node = ret->first_node;
100 if (!ret->gc_node) {
101 printk(KERN_WARNING "Eep. ret->gc_node for block at 0x%08x is NULL\n", ret->offset);
102 BUG();
103 }
182ec4ee 104
1da177e4
LT
105 /* Have we accidentally picked a clean block with wasted space ? */
106 if (ret->wasted_size) {
107 D1(printk(KERN_DEBUG "Converting wasted_size %08x to dirty_size\n", ret->wasted_size));
108 ret->dirty_size += ret->wasted_size;
109 c->wasted_size -= ret->wasted_size;
110 c->dirty_size += ret->wasted_size;
111 ret->wasted_size = 0;
112 }
113
1da177e4
LT
114 return ret;
115}
116
117/* jffs2_garbage_collect_pass
118 * Make a single attempt to progress GC. Move one node, and possibly
119 * start erasing one eraseblock.
120 */
121int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
122{
123 struct jffs2_inode_info *f;
124 struct jffs2_inode_cache *ic;
125 struct jffs2_eraseblock *jeb;
126 struct jffs2_raw_node_ref *raw;
127 int ret = 0, inum, nlink;
aa98d7cf 128 int xattr = 0;
1da177e4
LT
129
130 if (down_interruptible(&c->alloc_sem))
131 return -EINTR;
132
133 for (;;) {
134 spin_lock(&c->erase_completion_lock);
135 if (!c->unchecked_size)
136 break;
137
138 /* We can't start doing GC yet. We haven't finished checking
139 the node CRCs etc. Do it now. */
182ec4ee 140
1da177e4 141 /* checked_ino is protected by the alloc_sem */
aa98d7cf 142 if (c->checked_ino > c->highest_ino && xattr) {
1da177e4
LT
143 printk(KERN_CRIT "Checked all inodes but still 0x%x bytes of unchecked space?\n",
144 c->unchecked_size);
e0c8e42f 145 jffs2_dbg_dump_block_lists_nolock(c);
1da177e4
LT
146 spin_unlock(&c->erase_completion_lock);
147 BUG();
148 }
149
150 spin_unlock(&c->erase_completion_lock);
151
aa98d7cf
KK
152 if (!xattr)
153 xattr = jffs2_verify_xattr(c);
154
1da177e4
LT
155 spin_lock(&c->inocache_lock);
156
157 ic = jffs2_get_ino_cache(c, c->checked_ino++);
158
159 if (!ic) {
160 spin_unlock(&c->inocache_lock);
161 continue;
162 }
163
164 if (!ic->nlink) {
165 D1(printk(KERN_DEBUG "Skipping check of ino #%d with nlink zero\n",
166 ic->ino));
167 spin_unlock(&c->inocache_lock);
168 continue;
169 }
170 switch(ic->state) {
171 case INO_STATE_CHECKEDABSENT:
172 case INO_STATE_PRESENT:
173 D1(printk(KERN_DEBUG "Skipping ino #%u already checked\n", ic->ino));
174 spin_unlock(&c->inocache_lock);
175 continue;
176
177 case INO_STATE_GC:
178 case INO_STATE_CHECKING:
179 printk(KERN_WARNING "Inode #%u is in state %d during CRC check phase!\n", ic->ino, ic->state);
180 spin_unlock(&c->inocache_lock);
181 BUG();
182
183 case INO_STATE_READING:
184 /* We need to wait for it to finish, lest we move on
182ec4ee 185 and trigger the BUG() above while we haven't yet
1da177e4
LT
186 finished checking all its nodes */
187 D1(printk(KERN_DEBUG "Waiting for ino #%u to finish reading\n", ic->ino));
d96fb997
DW
188 /* We need to come back again for the _same_ inode. We've
189 made no progress in this case, but that should be OK */
190 c->checked_ino--;
191
1da177e4
LT
192 up(&c->alloc_sem);
193 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
194 return 0;
195
196 default:
197 BUG();
198
199 case INO_STATE_UNCHECKED:
200 ;
201 }
202 ic->state = INO_STATE_CHECKING;
203 spin_unlock(&c->inocache_lock);
204
205 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() triggering inode scan of ino#%u\n", ic->ino));
206
207 ret = jffs2_do_crccheck_inode(c, ic);
208 if (ret)
209 printk(KERN_WARNING "Returned error for crccheck of ino #%u. Expect badness...\n", ic->ino);
210
211 jffs2_set_inocache_state(c, ic, INO_STATE_CHECKEDABSENT);
212 up(&c->alloc_sem);
213 return ret;
214 }
215
216 /* First, work out which block we're garbage-collecting */
217 jeb = c->gcblock;
218
219 if (!jeb)
220 jeb = jffs2_find_gc_block(c);
221
222 if (!jeb) {
223 D1 (printk(KERN_NOTICE "jffs2: Couldn't find erase block to garbage collect!\n"));
224 spin_unlock(&c->erase_completion_lock);
225 up(&c->alloc_sem);
226 return -EIO;
227 }
228
229 D1(printk(KERN_DEBUG "GC from block %08x, used_size %08x, dirty_size %08x, free_size %08x\n", jeb->offset, jeb->used_size, jeb->dirty_size, jeb->free_size));
230 D1(if (c->nextblock)
231 printk(KERN_DEBUG "Nextblock at %08x, used_size %08x, dirty_size %08x, wasted_size %08x, free_size %08x\n", c->nextblock->offset, c->nextblock->used_size, c->nextblock->dirty_size, c->nextblock->wasted_size, c->nextblock->free_size));
232
233 if (!jeb->used_size) {
234 up(&c->alloc_sem);
235 goto eraseit;
236 }
237
238 raw = jeb->gc_node;
182ec4ee 239
1da177e4
LT
240 while(ref_obsolete(raw)) {
241 D1(printk(KERN_DEBUG "Node at 0x%08x is obsolete... skipping\n", ref_offset(raw)));
242 raw = raw->next_phys;
243 if (unlikely(!raw)) {
244 printk(KERN_WARNING "eep. End of raw list while still supposedly nodes to GC\n");
182ec4ee 245 printk(KERN_WARNING "erase block at 0x%08x. free_size 0x%08x, dirty_size 0x%08x, used_size 0x%08x\n",
1da177e4
LT
246 jeb->offset, jeb->free_size, jeb->dirty_size, jeb->used_size);
247 jeb->gc_node = raw;
248 spin_unlock(&c->erase_completion_lock);
249 up(&c->alloc_sem);
250 BUG();
251 }
252 }
253 jeb->gc_node = raw;
254
255 D1(printk(KERN_DEBUG "Going to garbage collect node at 0x%08x\n", ref_offset(raw)));
256
257 if (!raw->next_in_ino) {
258 /* Inode-less node. Clean marker, snapshot or something like that */
259 /* FIXME: If it's something that needs to be copied, including something
260 we don't grok that has JFFS2_NODETYPE_RWCOMPAT_COPY, we should do so */
261 spin_unlock(&c->erase_completion_lock);
262 jffs2_mark_node_obsolete(c, raw);
263 up(&c->alloc_sem);
264 goto eraseit_lock;
265 }
266
267 ic = jffs2_raw_ref_to_ic(raw);
268
aa98d7cf
KK
269 /* When 'ic' refers xattr_datum/xattr_ref, this node is GCed as xattr.
270 We can decide whether this node is inode or xattr by ic->class.
271 ret = 0 : ic is xattr_datum/xattr_ref, and GC was SUCCESSED.
272 ret < 0 : ic is xattr_datum/xattr_ref, but GC was FAILED.
273 ret > 0 : ic is NOT xattr_datum/xattr_ref.
274 */
275 ret = jffs2_garbage_collect_xattr(c, ic);
276 if (ret <= 0)
277 goto release_sem;
278
1da177e4 279 /* We need to hold the inocache. Either the erase_completion_lock or
182ec4ee 280 the inocache_lock are sufficient; we trade down since the inocache_lock
1da177e4
LT
281 causes less contention. */
282 spin_lock(&c->inocache_lock);
283
284 spin_unlock(&c->erase_completion_lock);
285
286 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass collecting from block @0x%08x. Node @0x%08x(%d), ino #%u\n", jeb->offset, ref_offset(raw), ref_flags(raw), ic->ino));
287
288 /* Three possibilities:
289 1. Inode is already in-core. We must iget it and do proper
290 updating to its fragtree, etc.
291 2. Inode is not in-core, node is REF_PRISTINE. We lock the
292 inocache to prevent a read_inode(), copy the node intact.
293 3. Inode is not in-core, node is not pristine. We must iget()
294 and take the slow path.
295 */
296
297 switch(ic->state) {
298 case INO_STATE_CHECKEDABSENT:
182ec4ee 299 /* It's been checked, but it's not currently in-core.
1da177e4
LT
300 We can just copy any pristine nodes, but have
301 to prevent anyone else from doing read_inode() while
302 we're at it, so we set the state accordingly */
303 if (ref_flags(raw) == REF_PRISTINE)
304 ic->state = INO_STATE_GC;
305 else {
182ec4ee 306 D1(printk(KERN_DEBUG "Ino #%u is absent but node not REF_PRISTINE. Reading.\n",
1da177e4
LT
307 ic->ino));
308 }
309 break;
310
311 case INO_STATE_PRESENT:
312 /* It's in-core. GC must iget() it. */
313 break;
314
315 case INO_STATE_UNCHECKED:
316 case INO_STATE_CHECKING:
317 case INO_STATE_GC:
318 /* Should never happen. We should have finished checking
182ec4ee
TG
319 by the time we actually start doing any GC, and since
320 we're holding the alloc_sem, no other garbage collection
1da177e4
LT
321 can happen.
322 */
323 printk(KERN_CRIT "Inode #%u already in state %d in jffs2_garbage_collect_pass()!\n",
324 ic->ino, ic->state);
325 up(&c->alloc_sem);
326 spin_unlock(&c->inocache_lock);
327 BUG();
328
329 case INO_STATE_READING:
330 /* Someone's currently trying to read it. We must wait for
331 them to finish and then go through the full iget() route
332 to do the GC. However, sometimes read_inode() needs to get
333 the alloc_sem() (for marking nodes invalid) so we must
334 drop the alloc_sem before sleeping. */
335
336 up(&c->alloc_sem);
337 D1(printk(KERN_DEBUG "jffs2_garbage_collect_pass() waiting for ino #%u in state %d\n",
338 ic->ino, ic->state));
339 sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
182ec4ee 340 /* And because we dropped the alloc_sem we must start again from the
1da177e4
LT
341 beginning. Ponder chance of livelock here -- we're returning success
342 without actually making any progress.
343
182ec4ee 344 Q: What are the chances that the inode is back in INO_STATE_READING
1da177e4
LT
345 again by the time we next enter this function? And that this happens
346 enough times to cause a real delay?
347
182ec4ee 348 A: Small enough that I don't care :)
1da177e4
LT
349 */
350 return 0;
351 }
352
353 /* OK. Now if the inode is in state INO_STATE_GC, we are going to copy the
182ec4ee 354 node intact, and we don't have to muck about with the fragtree etc.
1da177e4
LT
355 because we know it's not in-core. If it _was_ in-core, we go through
356 all the iget() crap anyway */
357
358 if (ic->state == INO_STATE_GC) {
359 spin_unlock(&c->inocache_lock);
360
361 ret = jffs2_garbage_collect_pristine(c, ic, raw);
362
363 spin_lock(&c->inocache_lock);
364 ic->state = INO_STATE_CHECKEDABSENT;
365 wake_up(&c->inocache_wq);
366
367 if (ret != -EBADFD) {
368 spin_unlock(&c->inocache_lock);
369 goto release_sem;
370 }
371
372 /* Fall through if it wanted us to, with inocache_lock held */
373 }
374
375 /* Prevent the fairly unlikely race where the gcblock is
376 entirely obsoleted by the final close of a file which had
377 the only valid nodes in the block, followed by erasure,
378 followed by freeing of the ic because the erased block(s)
379 held _all_ the nodes of that inode.... never been seen but
380 it's vaguely possible. */
381
382 inum = ic->ino;
383 nlink = ic->nlink;
384 spin_unlock(&c->inocache_lock);
385
386 f = jffs2_gc_fetch_inode(c, inum, nlink);
387 if (IS_ERR(f)) {
388 ret = PTR_ERR(f);
389 goto release_sem;
390 }
391 if (!f) {
392 ret = 0;
393 goto release_sem;
394 }
395
396 ret = jffs2_garbage_collect_live(c, jeb, raw, f);
397
398 jffs2_gc_release_inode(c, f);
399
400 release_sem:
401 up(&c->alloc_sem);
402
403 eraseit_lock:
404 /* If we've finished this block, start it erasing */
405 spin_lock(&c->erase_completion_lock);
406
407 eraseit:
408 if (c->gcblock && !c->gcblock->used_size) {
409 D1(printk(KERN_DEBUG "Block at 0x%08x completely obsoleted by GC. Moving to erase_pending_list\n", c->gcblock->offset));
410 /* We're GC'ing an empty block? */
411 list_add_tail(&c->gcblock->list, &c->erase_pending_list);
412 c->gcblock = NULL;
413 c->nr_erasing_blocks++;
414 jffs2_erase_pending_trigger(c);
415 }
416 spin_unlock(&c->erase_completion_lock);
417
418 return ret;
419}
420
421static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
422 struct jffs2_raw_node_ref *raw, struct jffs2_inode_info *f)
423{
424 struct jffs2_node_frag *frag;
425 struct jffs2_full_dnode *fn = NULL;
426 struct jffs2_full_dirent *fd;
427 uint32_t start = 0, end = 0, nrfrags = 0;
428 int ret = 0;
429
430 down(&f->sem);
431
432 /* Now we have the lock for this inode. Check that it's still the one at the head
433 of the list. */
434
435 spin_lock(&c->erase_completion_lock);
436
437 if (c->gcblock != jeb) {
438 spin_unlock(&c->erase_completion_lock);
439 D1(printk(KERN_DEBUG "GC block is no longer gcblock. Restart\n"));
440 goto upnout;
441 }
442 if (ref_obsolete(raw)) {
443 spin_unlock(&c->erase_completion_lock);
444 D1(printk(KERN_DEBUG "node to be GC'd was obsoleted in the meantime.\n"));
445 /* They'll call again */
446 goto upnout;
447 }
448 spin_unlock(&c->erase_completion_lock);
449
450 /* OK. Looks safe. And nobody can get us now because we have the semaphore. Move the block */
451 if (f->metadata && f->metadata->raw == raw) {
452 fn = f->metadata;
453 ret = jffs2_garbage_collect_metadata(c, jeb, f, fn);
454 goto upnout;
455 }
456
457 /* FIXME. Read node and do lookup? */
458 for (frag = frag_first(&f->fragtree); frag; frag = frag_next(frag)) {
459 if (frag->node && frag->node->raw == raw) {
460 fn = frag->node;
461 end = frag->ofs + frag->size;
462 if (!nrfrags++)
463 start = frag->ofs;
464 if (nrfrags == frag->node->frags)
465 break; /* We've found them all */
466 }
467 }
468 if (fn) {
469 if (ref_flags(raw) == REF_PRISTINE) {
470 ret = jffs2_garbage_collect_pristine(c, f->inocache, raw);
471 if (!ret) {
472 /* Urgh. Return it sensibly. */
473 frag->node->raw = f->inocache->nodes;
182ec4ee 474 }
1da177e4
LT
475 if (ret != -EBADFD)
476 goto upnout;
477 }
478 /* We found a datanode. Do the GC */
479 if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
480 /* It crosses a page boundary. Therefore, it must be a hole. */
481 ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
482 } else {
483 /* It could still be a hole. But we GC the page this way anyway */
484 ret = jffs2_garbage_collect_dnode(c, jeb, f, fn, start, end);
485 }
486 goto upnout;
487 }
182ec4ee 488
1da177e4
LT
489 /* Wasn't a dnode. Try dirent */
490 for (fd = f->dents; fd; fd=fd->next) {
491 if (fd->raw == raw)
492 break;
493 }
494
495 if (fd && fd->ino) {
496 ret = jffs2_garbage_collect_dirent(c, jeb, f, fd);
497 } else if (fd) {
498 ret = jffs2_garbage_collect_deletion_dirent(c, jeb, f, fd);
499 } else {
500 printk(KERN_WARNING "Raw node at 0x%08x wasn't in node lists for ino #%u\n",
501 ref_offset(raw), f->inocache->ino);
502 if (ref_obsolete(raw)) {
503 printk(KERN_WARNING "But it's obsolete so we don't mind too much\n");
504 } else {
e0c8e42f
AB
505 jffs2_dbg_dump_node(c, ref_offset(raw));
506 BUG();
1da177e4
LT
507 }
508 }
509 upnout:
510 up(&f->sem);
511
512 return ret;
513}
514
182ec4ee 515static int jffs2_garbage_collect_pristine(struct jffs2_sb_info *c,
1da177e4
LT
516 struct jffs2_inode_cache *ic,
517 struct jffs2_raw_node_ref *raw)
518{
519 union jffs2_node_union *node;
520 struct jffs2_raw_node_ref *nraw;
521 size_t retlen;
522 int ret;
523 uint32_t phys_ofs, alloclen;
524 uint32_t crc, rawlen;
525 int retried = 0;
526
527 D1(printk(KERN_DEBUG "Going to GC REF_PRISTINE node at 0x%08x\n", ref_offset(raw)));
528
529 rawlen = ref_totlen(c, c->gcblock, raw);
530
531 /* Ask for a small amount of space (or the totlen if smaller) because we
532 don't want to force wastage of the end of a block if splitting would
533 work. */
e631ddba
FH
534 ret = jffs2_reserve_space_gc(c, min_t(uint32_t, sizeof(struct jffs2_raw_inode) +
535 JFFS2_MIN_DATA_LEN, rawlen), &phys_ofs, &alloclen, rawlen);
536 /* this is not the exact summary size of it,
537 it is only an upper estimation */
538
1da177e4
LT
539 if (ret)
540 return ret;
541
542 if (alloclen < rawlen) {
543 /* Doesn't fit untouched. We'll go the old route and split it */
544 return -EBADFD;
545 }
546
547 node = kmalloc(rawlen, GFP_KERNEL);
548 if (!node)
549 return -ENOMEM;
550
551 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)node);
552 if (!ret && retlen != rawlen)
553 ret = -EIO;
554 if (ret)
555 goto out_node;
556
557 crc = crc32(0, node, sizeof(struct jffs2_unknown_node)-4);
558 if (je32_to_cpu(node->u.hdr_crc) != crc) {
559 printk(KERN_WARNING "Header CRC failed on REF_PRISTINE node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
560 ref_offset(raw), je32_to_cpu(node->u.hdr_crc), crc);
561 goto bail;
562 }
563
564 switch(je16_to_cpu(node->u.nodetype)) {
565 case JFFS2_NODETYPE_INODE:
566 crc = crc32(0, node, sizeof(node->i)-8);
567 if (je32_to_cpu(node->i.node_crc) != crc) {
568 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
569 ref_offset(raw), je32_to_cpu(node->i.node_crc), crc);
570 goto bail;
571 }
572
573 if (je32_to_cpu(node->i.dsize)) {
574 crc = crc32(0, node->i.data, je32_to_cpu(node->i.csize));
575 if (je32_to_cpu(node->i.data_crc) != crc) {
576 printk(KERN_WARNING "Data CRC failed on REF_PRISTINE data node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
577 ref_offset(raw), je32_to_cpu(node->i.data_crc), crc);
578 goto bail;
579 }
580 }
581 break;
582
583 case JFFS2_NODETYPE_DIRENT:
584 crc = crc32(0, node, sizeof(node->d)-8);
585 if (je32_to_cpu(node->d.node_crc) != crc) {
586 printk(KERN_WARNING "Node CRC failed on REF_PRISTINE dirent node at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
587 ref_offset(raw), je32_to_cpu(node->d.node_crc), crc);
588 goto bail;
589 }
590
591 if (node->d.nsize) {
592 crc = crc32(0, node->d.name, node->d.nsize);
593 if (je32_to_cpu(node->d.name_crc) != crc) {
594 printk(KERN_WARNING "Name CRC failed on REF_PRISTINE dirent ode at 0x%08x: Read 0x%08x, calculated 0x%08x\n",
595 ref_offset(raw), je32_to_cpu(node->d.name_crc), crc);
596 goto bail;
597 }
598 }
599 break;
600 default:
182ec4ee 601 printk(KERN_WARNING "Unknown node type for REF_PRISTINE node at 0x%08x: 0x%04x\n",
1da177e4
LT
602 ref_offset(raw), je16_to_cpu(node->u.nodetype));
603 goto bail;
604 }
605
606 nraw = jffs2_alloc_raw_node_ref();
607 if (!nraw) {
608 ret = -ENOMEM;
609 goto out_node;
610 }
611
612 /* OK, all the CRCs are good; this node can just be copied as-is. */
613 retry:
614 nraw->flash_offset = phys_ofs;
615 nraw->__totlen = rawlen;
616 nraw->next_phys = NULL;
617
618 ret = jffs2_flash_write(c, phys_ofs, rawlen, &retlen, (char *)node);
619
620 if (ret || (retlen != rawlen)) {
621 printk(KERN_NOTICE "Write of %d bytes at 0x%08x failed. returned %d, retlen %zd\n",
622 rawlen, phys_ofs, ret, retlen);
623 if (retlen) {
624 /* Doesn't belong to any inode */
625 nraw->next_in_ino = NULL;
626
627 nraw->flash_offset |= REF_OBSOLETE;
628 jffs2_add_physical_node_ref(c, nraw);
629 jffs2_mark_node_obsolete(c, nraw);
630 } else {
631 printk(KERN_NOTICE "Not marking the space at 0x%08x as dirty because the flash driver returned retlen zero\n", nraw->flash_offset);
632 jffs2_free_raw_node_ref(nraw);
633 }
634 if (!retried && (nraw = jffs2_alloc_raw_node_ref())) {
635 /* Try to reallocate space and retry */
636 uint32_t dummy;
637 struct jffs2_eraseblock *jeb = &c->blocks[phys_ofs / c->sector_size];
638
639 retried = 1;
640
641 D1(printk(KERN_DEBUG "Retrying failed write of REF_PRISTINE node.\n"));
182ec4ee 642
730554d9
AB
643 jffs2_dbg_acct_sanity_check(c,jeb);
644 jffs2_dbg_acct_paranoia_check(c, jeb);
1da177e4 645
e631ddba
FH
646 ret = jffs2_reserve_space_gc(c, rawlen, &phys_ofs, &dummy, rawlen);
647 /* this is not the exact summary size of it,
648 it is only an upper estimation */
1da177e4
LT
649
650 if (!ret) {
651 D1(printk(KERN_DEBUG "Allocated space at 0x%08x to retry failed write.\n", phys_ofs));
652
730554d9
AB
653 jffs2_dbg_acct_sanity_check(c,jeb);
654 jffs2_dbg_acct_paranoia_check(c, jeb);
1da177e4
LT
655
656 goto retry;
657 }
658 D1(printk(KERN_DEBUG "Failed to allocate space to retry failed write: %d!\n", ret));
659 jffs2_free_raw_node_ref(nraw);
660 }
661
662 jffs2_free_raw_node_ref(nraw);
663 if (!ret)
664 ret = -EIO;
665 goto out_node;
666 }
667 nraw->flash_offset |= REF_PRISTINE;
668 jffs2_add_physical_node_ref(c, nraw);
669
670 /* Link into per-inode list. This is safe because of the ic
671 state being INO_STATE_GC. Note that if we're doing this
672 for an inode which is in-core, the 'nraw' pointer is then
673 going to be fetched from ic->nodes by our caller. */
674 spin_lock(&c->erase_completion_lock);
675 nraw->next_in_ino = ic->nodes;
676 ic->nodes = nraw;
677 spin_unlock(&c->erase_completion_lock);
678
679 jffs2_mark_node_obsolete(c, raw);
680 D1(printk(KERN_DEBUG "WHEEE! GC REF_PRISTINE node at 0x%08x succeeded\n", ref_offset(raw)));
681
682 out_node:
683 kfree(node);
684 return ret;
685 bail:
686 ret = -EBADFD;
687 goto out_node;
688}
689
182ec4ee 690static int jffs2_garbage_collect_metadata(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1da177e4
LT
691 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn)
692{
693 struct jffs2_full_dnode *new_fn;
694 struct jffs2_raw_inode ri;
8557fd51 695 struct jffs2_node_frag *last_frag;
1da177e4
LT
696 jint16_t dev;
697 char *mdata = NULL, mdatalen = 0;
8557fd51 698 uint32_t alloclen, phys_ofs, ilen;
1da177e4
LT
699 int ret;
700
701 if (S_ISBLK(JFFS2_F_I_MODE(f)) ||
702 S_ISCHR(JFFS2_F_I_MODE(f)) ) {
703 /* For these, we don't actually need to read the old node */
704 /* FIXME: for minor or major > 255. */
182ec4ee 705 dev = cpu_to_je16(((JFFS2_F_I_RDEV_MAJ(f) << 8) |
1da177e4
LT
706 JFFS2_F_I_RDEV_MIN(f)));
707 mdata = (char *)&dev;
708 mdatalen = sizeof(dev);
709 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bytes of kdev_t\n", mdatalen));
710 } else if (S_ISLNK(JFFS2_F_I_MODE(f))) {
711 mdatalen = fn->size;
712 mdata = kmalloc(fn->size, GFP_KERNEL);
713 if (!mdata) {
714 printk(KERN_WARNING "kmalloc of mdata failed in jffs2_garbage_collect_metadata()\n");
715 return -ENOMEM;
716 }
717 ret = jffs2_read_dnode(c, f, fn, mdata, 0, mdatalen);
718 if (ret) {
719 printk(KERN_WARNING "read of old metadata failed in jffs2_garbage_collect_metadata(): %d\n", ret);
720 kfree(mdata);
721 return ret;
722 }
723 D1(printk(KERN_DEBUG "jffs2_garbage_collect_metadata(): Writing %d bites of symlink target\n", mdatalen));
724
725 }
182ec4ee 726
e631ddba
FH
727 ret = jffs2_reserve_space_gc(c, sizeof(ri) + mdatalen, &phys_ofs, &alloclen,
728 JFFS2_SUMMARY_INODE_SIZE);
1da177e4
LT
729 if (ret) {
730 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_metadata failed: %d\n",
731 sizeof(ri)+ mdatalen, ret);
732 goto out;
733 }
182ec4ee 734
8557fd51
AB
735 last_frag = frag_last(&f->fragtree);
736 if (last_frag)
737 /* Fetch the inode length from the fragtree rather then
738 * from i_size since i_size may have not been updated yet */
739 ilen = last_frag->ofs + last_frag->size;
740 else
741 ilen = JFFS2_F_I_SIZE(f);
182ec4ee 742
1da177e4
LT
743 memset(&ri, 0, sizeof(ri));
744 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
745 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
746 ri.totlen = cpu_to_je32(sizeof(ri) + mdatalen);
747 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
748
749 ri.ino = cpu_to_je32(f->inocache->ino);
750 ri.version = cpu_to_je32(++f->highest_version);
751 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
752 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
753 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
8557fd51 754 ri.isize = cpu_to_je32(ilen);
1da177e4
LT
755 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
756 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
757 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
758 ri.offset = cpu_to_je32(0);
759 ri.csize = cpu_to_je32(mdatalen);
760 ri.dsize = cpu_to_je32(mdatalen);
761 ri.compr = JFFS2_COMPR_NONE;
762 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
763 ri.data_crc = cpu_to_je32(crc32(0, mdata, mdatalen));
764
765 new_fn = jffs2_write_dnode(c, f, &ri, mdata, mdatalen, phys_ofs, ALLOC_GC);
766
767 if (IS_ERR(new_fn)) {
768 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
769 ret = PTR_ERR(new_fn);
770 goto out;
771 }
772 jffs2_mark_node_obsolete(c, fn->raw);
773 jffs2_free_full_dnode(fn);
774 f->metadata = new_fn;
775 out:
776 if (S_ISLNK(JFFS2_F_I_MODE(f)))
777 kfree(mdata);
778 return ret;
779}
780
182ec4ee 781static int jffs2_garbage_collect_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1da177e4
LT
782 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
783{
784 struct jffs2_full_dirent *new_fd;
785 struct jffs2_raw_dirent rd;
786 uint32_t alloclen, phys_ofs;
787 int ret;
788
789 rd.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
790 rd.nodetype = cpu_to_je16(JFFS2_NODETYPE_DIRENT);
791 rd.nsize = strlen(fd->name);
792 rd.totlen = cpu_to_je32(sizeof(rd) + rd.nsize);
793 rd.hdr_crc = cpu_to_je32(crc32(0, &rd, sizeof(struct jffs2_unknown_node)-4));
794
795 rd.pino = cpu_to_je32(f->inocache->ino);
796 rd.version = cpu_to_je32(++f->highest_version);
797 rd.ino = cpu_to_je32(fd->ino);
3a69e0cd
AB
798 /* If the times on this inode were set by explicit utime() they can be different,
799 so refrain from splatting them. */
800 if (JFFS2_F_I_MTIME(f) == JFFS2_F_I_CTIME(f))
801 rd.mctime = cpu_to_je32(JFFS2_F_I_MTIME(f));
182ec4ee 802 else
3a69e0cd 803 rd.mctime = cpu_to_je32(0);
1da177e4
LT
804 rd.type = fd->type;
805 rd.node_crc = cpu_to_je32(crc32(0, &rd, sizeof(rd)-8));
806 rd.name_crc = cpu_to_je32(crc32(0, fd->name, rd.nsize));
182ec4ee 807
e631ddba
FH
808 ret = jffs2_reserve_space_gc(c, sizeof(rd)+rd.nsize, &phys_ofs, &alloclen,
809 JFFS2_SUMMARY_DIRENT_SIZE(rd.nsize));
1da177e4
LT
810 if (ret) {
811 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dirent failed: %d\n",
812 sizeof(rd)+rd.nsize, ret);
813 return ret;
814 }
815 new_fd = jffs2_write_dirent(c, f, &rd, fd->name, rd.nsize, phys_ofs, ALLOC_GC);
816
817 if (IS_ERR(new_fd)) {
818 printk(KERN_WARNING "jffs2_write_dirent in garbage_collect_dirent failed: %ld\n", PTR_ERR(new_fd));
819 return PTR_ERR(new_fd);
820 }
821 jffs2_add_fd_to_list(c, new_fd, &f->dents);
822 return 0;
823}
824
182ec4ee 825static int jffs2_garbage_collect_deletion_dirent(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1da177e4
LT
826 struct jffs2_inode_info *f, struct jffs2_full_dirent *fd)
827{
828 struct jffs2_full_dirent **fdp = &f->dents;
829 int found = 0;
830
831 /* On a medium where we can't actually mark nodes obsolete
832 pernamently, such as NAND flash, we need to work out
833 whether this deletion dirent is still needed to actively
834 delete a 'real' dirent with the same name that's still
835 somewhere else on the flash. */
836 if (!jffs2_can_mark_obsolete(c)) {
837 struct jffs2_raw_dirent *rd;
838 struct jffs2_raw_node_ref *raw;
839 int ret;
840 size_t retlen;
841 int name_len = strlen(fd->name);
842 uint32_t name_crc = crc32(0, fd->name, name_len);
843 uint32_t rawlen = ref_totlen(c, jeb, fd->raw);
844
845 rd = kmalloc(rawlen, GFP_KERNEL);
846 if (!rd)
847 return -ENOMEM;
848
849 /* Prevent the erase code from nicking the obsolete node refs while
850 we're looking at them. I really don't like this extra lock but
851 can't see any alternative. Suggestions on a postcard to... */
852 down(&c->erase_free_sem);
853
854 for (raw = f->inocache->nodes; raw != (void *)f->inocache; raw = raw->next_in_ino) {
855
856 /* We only care about obsolete ones */
857 if (!(ref_obsolete(raw)))
858 continue;
859
860 /* Any dirent with the same name is going to have the same length... */
861 if (ref_totlen(c, NULL, raw) != rawlen)
862 continue;
863
182ec4ee 864 /* Doesn't matter if there's one in the same erase block. We're going to
1da177e4 865 delete it too at the same time. */
3be36675 866 if (SECTOR_ADDR(raw->flash_offset) == SECTOR_ADDR(fd->raw->flash_offset))
1da177e4
LT
867 continue;
868
869 D1(printk(KERN_DEBUG "Check potential deletion dirent at %08x\n", ref_offset(raw)));
870
871 /* This is an obsolete node belonging to the same directory, and it's of the right
872 length. We need to take a closer look...*/
873 ret = jffs2_flash_read(c, ref_offset(raw), rawlen, &retlen, (char *)rd);
874 if (ret) {
875 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Read error (%d) reading obsolete node at %08x\n", ret, ref_offset(raw));
876 /* If we can't read it, we don't need to continue to obsolete it. Continue */
877 continue;
878 }
879 if (retlen != rawlen) {
880 printk(KERN_WARNING "jffs2_g_c_deletion_dirent(): Short read (%zd not %u) reading header from obsolete node at %08x\n",
881 retlen, rawlen, ref_offset(raw));
882 continue;
883 }
884
885 if (je16_to_cpu(rd->nodetype) != JFFS2_NODETYPE_DIRENT)
886 continue;
887
888 /* If the name CRC doesn't match, skip */
889 if (je32_to_cpu(rd->name_crc) != name_crc)
890 continue;
891
892 /* If the name length doesn't match, or it's another deletion dirent, skip */
893 if (rd->nsize != name_len || !je32_to_cpu(rd->ino))
894 continue;
895
896 /* OK, check the actual name now */
897 if (memcmp(rd->name, fd->name, name_len))
898 continue;
899
900 /* OK. The name really does match. There really is still an older node on
901 the flash which our deletion dirent obsoletes. So we have to write out
902 a new deletion dirent to replace it */
903 up(&c->erase_free_sem);
904
905 D1(printk(KERN_DEBUG "Deletion dirent at %08x still obsoletes real dirent \"%s\" at %08x for ino #%u\n",
906 ref_offset(fd->raw), fd->name, ref_offset(raw), je32_to_cpu(rd->ino)));
907 kfree(rd);
908
909 return jffs2_garbage_collect_dirent(c, jeb, f, fd);
910 }
911
912 up(&c->erase_free_sem);
913 kfree(rd);
914 }
915
182ec4ee 916 /* FIXME: If we're deleting a dirent which contains the current mtime and ctime,
3a69e0cd
AB
917 we should update the metadata node with those times accordingly */
918
1da177e4
LT
919 /* No need for it any more. Just mark it obsolete and remove it from the list */
920 while (*fdp) {
921 if ((*fdp) == fd) {
922 found = 1;
923 *fdp = fd->next;
924 break;
925 }
926 fdp = &(*fdp)->next;
927 }
928 if (!found) {
929 printk(KERN_WARNING "Deletion dirent \"%s\" not found in list for ino #%u\n", fd->name, f->inocache->ino);
930 }
931 jffs2_mark_node_obsolete(c, fd->raw);
932 jffs2_free_full_dirent(fd);
933 return 0;
934}
935
936static int jffs2_garbage_collect_hole(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
937 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
938 uint32_t start, uint32_t end)
939{
940 struct jffs2_raw_inode ri;
941 struct jffs2_node_frag *frag;
942 struct jffs2_full_dnode *new_fn;
8557fd51 943 uint32_t alloclen, phys_ofs, ilen;
1da177e4
LT
944 int ret;
945
946 D1(printk(KERN_DEBUG "Writing replacement hole node for ino #%u from offset 0x%x to 0x%x\n",
947 f->inocache->ino, start, end));
182ec4ee 948
1da177e4
LT
949 memset(&ri, 0, sizeof(ri));
950
951 if(fn->frags > 1) {
952 size_t readlen;
953 uint32_t crc;
182ec4ee 954 /* It's partially obsoleted by a later write. So we have to
1da177e4
LT
955 write it out again with the _same_ version as before */
956 ret = jffs2_flash_read(c, ref_offset(fn->raw), sizeof(ri), &readlen, (char *)&ri);
957 if (readlen != sizeof(ri) || ret) {
958 printk(KERN_WARNING "Node read failed in jffs2_garbage_collect_hole. Ret %d, retlen %zd. Data will be lost by writing new hole node\n", ret, readlen);
959 goto fill;
960 }
961 if (je16_to_cpu(ri.nodetype) != JFFS2_NODETYPE_INODE) {
962 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had node type 0x%04x instead of JFFS2_NODETYPE_INODE(0x%04x)\n",
963 ref_offset(fn->raw),
964 je16_to_cpu(ri.nodetype), JFFS2_NODETYPE_INODE);
965 return -EIO;
966 }
967 if (je32_to_cpu(ri.totlen) != sizeof(ri)) {
968 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had totlen 0x%x instead of expected 0x%zx\n",
969 ref_offset(fn->raw),
970 je32_to_cpu(ri.totlen), sizeof(ri));
971 return -EIO;
972 }
973 crc = crc32(0, &ri, sizeof(ri)-8);
974 if (crc != je32_to_cpu(ri.node_crc)) {
975 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node at 0x%08x had CRC 0x%08x which doesn't match calculated CRC 0x%08x\n",
182ec4ee 976 ref_offset(fn->raw),
1da177e4
LT
977 je32_to_cpu(ri.node_crc), crc);
978 /* FIXME: We could possibly deal with this by writing new holes for each frag */
182ec4ee 979 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1da177e4
LT
980 start, end, f->inocache->ino);
981 goto fill;
982 }
983 if (ri.compr != JFFS2_COMPR_ZERO) {
984 printk(KERN_WARNING "jffs2_garbage_collect_hole: Node 0x%08x wasn't a hole node!\n", ref_offset(fn->raw));
182ec4ee 985 printk(KERN_WARNING "Data in the range 0x%08x to 0x%08x of inode #%u will be lost\n",
1da177e4
LT
986 start, end, f->inocache->ino);
987 goto fill;
988 }
989 } else {
990 fill:
991 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
992 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
993 ri.totlen = cpu_to_je32(sizeof(ri));
994 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
995
996 ri.ino = cpu_to_je32(f->inocache->ino);
997 ri.version = cpu_to_je32(++f->highest_version);
998 ri.offset = cpu_to_je32(start);
999 ri.dsize = cpu_to_je32(end - start);
1000 ri.csize = cpu_to_je32(0);
1001 ri.compr = JFFS2_COMPR_ZERO;
1002 }
182ec4ee 1003
8557fd51
AB
1004 frag = frag_last(&f->fragtree);
1005 if (frag)
1006 /* Fetch the inode length from the fragtree rather then
1007 * from i_size since i_size may have not been updated yet */
1008 ilen = frag->ofs + frag->size;
1009 else
1010 ilen = JFFS2_F_I_SIZE(f);
1011
1da177e4
LT
1012 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1013 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1014 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
8557fd51 1015 ri.isize = cpu_to_je32(ilen);
1da177e4
LT
1016 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1017 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1018 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1019 ri.data_crc = cpu_to_je32(0);
1020 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1021
e631ddba
FH
1022 ret = jffs2_reserve_space_gc(c, sizeof(ri), &phys_ofs, &alloclen,
1023 JFFS2_SUMMARY_INODE_SIZE);
1da177e4
LT
1024 if (ret) {
1025 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_hole failed: %d\n",
1026 sizeof(ri), ret);
1027 return ret;
1028 }
1029 new_fn = jffs2_write_dnode(c, f, &ri, NULL, 0, phys_ofs, ALLOC_GC);
1030
1031 if (IS_ERR(new_fn)) {
1032 printk(KERN_WARNING "Error writing new hole node: %ld\n", PTR_ERR(new_fn));
1033 return PTR_ERR(new_fn);
1034 }
1035 if (je32_to_cpu(ri.version) == f->highest_version) {
1036 jffs2_add_full_dnode_to_inode(c, f, new_fn);
1037 if (f->metadata) {
1038 jffs2_mark_node_obsolete(c, f->metadata->raw);
1039 jffs2_free_full_dnode(f->metadata);
1040 f->metadata = NULL;
1041 }
1042 return 0;
1043 }
1044
182ec4ee 1045 /*
1da177e4
LT
1046 * We should only get here in the case where the node we are
1047 * replacing had more than one frag, so we kept the same version
182ec4ee 1048 * number as before. (Except in case of error -- see 'goto fill;'
1da177e4
LT
1049 * above.)
1050 */
1051 D1(if(unlikely(fn->frags <= 1)) {
1052 printk(KERN_WARNING "jffs2_garbage_collect_hole: Replacing fn with %d frag(s) but new ver %d != highest_version %d of ino #%d\n",
1053 fn->frags, je32_to_cpu(ri.version), f->highest_version,
1054 je32_to_cpu(ri.ino));
1055 });
1056
1057 /* This is a partially-overlapped hole node. Mark it REF_NORMAL not REF_PRISTINE */
1058 mark_ref_normal(new_fn->raw);
1059
182ec4ee 1060 for (frag = jffs2_lookup_node_frag(&f->fragtree, fn->ofs);
1da177e4
LT
1061 frag; frag = frag_next(frag)) {
1062 if (frag->ofs > fn->size + fn->ofs)
1063 break;
1064 if (frag->node == fn) {
1065 frag->node = new_fn;
1066 new_fn->frags++;
1067 fn->frags--;
1068 }
1069 }
1070 if (fn->frags) {
1071 printk(KERN_WARNING "jffs2_garbage_collect_hole: Old node still has frags!\n");
1072 BUG();
1073 }
1074 if (!new_fn->frags) {
1075 printk(KERN_WARNING "jffs2_garbage_collect_hole: New node has no frags!\n");
1076 BUG();
1077 }
182ec4ee 1078
1da177e4
LT
1079 jffs2_mark_node_obsolete(c, fn->raw);
1080 jffs2_free_full_dnode(fn);
182ec4ee 1081
1da177e4
LT
1082 return 0;
1083}
1084
1085static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_eraseblock *jeb,
1086 struct jffs2_inode_info *f, struct jffs2_full_dnode *fn,
1087 uint32_t start, uint32_t end)
1088{
1089 struct jffs2_full_dnode *new_fn;
1090 struct jffs2_raw_inode ri;
182ec4ee 1091 uint32_t alloclen, phys_ofs, offset, orig_end, orig_start;
1da177e4
LT
1092 int ret = 0;
1093 unsigned char *comprbuf = NULL, *writebuf;
1094 unsigned long pg;
1095 unsigned char *pg_ptr;
182ec4ee 1096
1da177e4
LT
1097 memset(&ri, 0, sizeof(ri));
1098
1099 D1(printk(KERN_DEBUG "Writing replacement dnode for ino #%u from offset 0x%x to 0x%x\n",
1100 f->inocache->ino, start, end));
1101
1102 orig_end = end;
1103 orig_start = start;
1104
1105 if (c->nr_free_blocks + c->nr_erasing_blocks > c->resv_blocks_gcmerge) {
1106 /* Attempt to do some merging. But only expand to cover logically
1107 adjacent frags if the block containing them is already considered
182ec4ee
TG
1108 to be dirty. Otherwise we end up with GC just going round in
1109 circles dirtying the nodes it already wrote out, especially
1da177e4
LT
1110 on NAND where we have small eraseblocks and hence a much higher
1111 chance of nodes having to be split to cross boundaries. */
1112
1113 struct jffs2_node_frag *frag;
1114 uint32_t min, max;
1115
1116 min = start & ~(PAGE_CACHE_SIZE-1);
1117 max = min + PAGE_CACHE_SIZE;
1118
1119 frag = jffs2_lookup_node_frag(&f->fragtree, start);
1120
1121 /* BUG_ON(!frag) but that'll happen anyway... */
1122
1123 BUG_ON(frag->ofs != start);
1124
1125 /* First grow down... */
1126 while((frag = frag_prev(frag)) && frag->ofs >= min) {
1127
1128 /* If the previous frag doesn't even reach the beginning, there's
1129 excessive fragmentation. Just merge. */
1130 if (frag->ofs > min) {
1131 D1(printk(KERN_DEBUG "Expanding down to cover partial frag (0x%x-0x%x)\n",
1132 frag->ofs, frag->ofs+frag->size));
1133 start = frag->ofs;
1134 continue;
1135 }
1136 /* OK. This frag holds the first byte of the page. */
1137 if (!frag->node || !frag->node->raw) {
1138 D1(printk(KERN_DEBUG "First frag in page is hole (0x%x-0x%x). Not expanding down.\n",
1139 frag->ofs, frag->ofs+frag->size));
1140 break;
1141 } else {
1142
182ec4ee 1143 /* OK, it's a frag which extends to the beginning of the page. Does it live
1da177e4
LT
1144 in a block which is still considered clean? If so, don't obsolete it.
1145 If not, cover it anyway. */
1146
1147 struct jffs2_raw_node_ref *raw = frag->node->raw;
1148 struct jffs2_eraseblock *jeb;
1149
1150 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1151
1152 if (jeb == c->gcblock) {
1153 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1154 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1155 start = frag->ofs;
1156 break;
1157 }
1158 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1159 D1(printk(KERN_DEBUG "Not expanding down to cover frag (0x%x-0x%x) in clean block %08x\n",
1160 frag->ofs, frag->ofs+frag->size, jeb->offset));
1161 break;
1162 }
1163
1164 D1(printk(KERN_DEBUG "Expanding down to cover frag (0x%x-0x%x) in dirty block %08x\n",
1165 frag->ofs, frag->ofs+frag->size, jeb->offset));
1166 start = frag->ofs;
1167 break;
1168 }
1169 }
1170
1171 /* ... then up */
1172
1173 /* Find last frag which is actually part of the node we're to GC. */
1174 frag = jffs2_lookup_node_frag(&f->fragtree, end-1);
1175
1176 while((frag = frag_next(frag)) && frag->ofs+frag->size <= max) {
1177
1178 /* If the previous frag doesn't even reach the beginning, there's lots
1179 of fragmentation. Just merge. */
1180 if (frag->ofs+frag->size < max) {
1181 D1(printk(KERN_DEBUG "Expanding up to cover partial frag (0x%x-0x%x)\n",
1182 frag->ofs, frag->ofs+frag->size));
1183 end = frag->ofs + frag->size;
1184 continue;
1185 }
1186
1187 if (!frag->node || !frag->node->raw) {
1188 D1(printk(KERN_DEBUG "Last frag in page is hole (0x%x-0x%x). Not expanding up.\n",
1189 frag->ofs, frag->ofs+frag->size));
1190 break;
1191 } else {
1192
182ec4ee 1193 /* OK, it's a frag which extends to the beginning of the page. Does it live
1da177e4
LT
1194 in a block which is still considered clean? If so, don't obsolete it.
1195 If not, cover it anyway. */
1196
1197 struct jffs2_raw_node_ref *raw = frag->node->raw;
1198 struct jffs2_eraseblock *jeb;
1199
1200 jeb = &c->blocks[raw->flash_offset / c->sector_size];
1201
1202 if (jeb == c->gcblock) {
1203 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in gcblock at %08x\n",
1204 frag->ofs, frag->ofs+frag->size, ref_offset(raw)));
1205 end = frag->ofs + frag->size;
1206 break;
1207 }
1208 if (!ISDIRTY(jeb->dirty_size + jeb->wasted_size)) {
1209 D1(printk(KERN_DEBUG "Not expanding up to cover frag (0x%x-0x%x) in clean block %08x\n",
1210 frag->ofs, frag->ofs+frag->size, jeb->offset));
1211 break;
1212 }
1213
1214 D1(printk(KERN_DEBUG "Expanding up to cover frag (0x%x-0x%x) in dirty block %08x\n",
1215 frag->ofs, frag->ofs+frag->size, jeb->offset));
1216 end = frag->ofs + frag->size;
1217 break;
1218 }
1219 }
182ec4ee 1220 D1(printk(KERN_DEBUG "Expanded dnode to write from (0x%x-0x%x) to (0x%x-0x%x)\n",
1da177e4
LT
1221 orig_start, orig_end, start, end));
1222
8557fd51 1223 D1(BUG_ON(end > frag_last(&f->fragtree)->ofs + frag_last(&f->fragtree)->size));
1da177e4
LT
1224 BUG_ON(end < orig_end);
1225 BUG_ON(start > orig_start);
1226 }
182ec4ee 1227
1da177e4
LT
1228 /* First, use readpage() to read the appropriate page into the page cache */
1229 /* Q: What happens if we actually try to GC the _same_ page for which commit_write()
1230 * triggered garbage collection in the first place?
1231 * A: I _think_ it's OK. read_cache_page shouldn't deadlock, we'll write out the
1232 * page OK. We'll actually write it out again in commit_write, which is a little
1233 * suboptimal, but at least we're correct.
1234 */
1235 pg_ptr = jffs2_gc_fetch_page(c, f, start, &pg);
1236
1237 if (IS_ERR(pg_ptr)) {
1238 printk(KERN_WARNING "read_cache_page() returned error: %ld\n", PTR_ERR(pg_ptr));
1239 return PTR_ERR(pg_ptr);
1240 }
1241
1242 offset = start;
1243 while(offset < orig_end) {
1244 uint32_t datalen;
1245 uint32_t cdatalen;
1246 uint16_t comprtype = JFFS2_COMPR_NONE;
1247
e631ddba
FH
1248 ret = jffs2_reserve_space_gc(c, sizeof(ri) + JFFS2_MIN_DATA_LEN, &phys_ofs,
1249 &alloclen, JFFS2_SUMMARY_INODE_SIZE);
1da177e4
LT
1250
1251 if (ret) {
1252 printk(KERN_WARNING "jffs2_reserve_space_gc of %zd bytes for garbage_collect_dnode failed: %d\n",
1253 sizeof(ri)+ JFFS2_MIN_DATA_LEN, ret);
1254 break;
1255 }
1256 cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
1257 datalen = end - offset;
1258
1259 writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
1260
1261 comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
1262
1263 ri.magic = cpu_to_je16(JFFS2_MAGIC_BITMASK);
1264 ri.nodetype = cpu_to_je16(JFFS2_NODETYPE_INODE);
1265 ri.totlen = cpu_to_je32(sizeof(ri) + cdatalen);
1266 ri.hdr_crc = cpu_to_je32(crc32(0, &ri, sizeof(struct jffs2_unknown_node)-4));
1267
1268 ri.ino = cpu_to_je32(f->inocache->ino);
1269 ri.version = cpu_to_je32(++f->highest_version);
1270 ri.mode = cpu_to_jemode(JFFS2_F_I_MODE(f));
1271 ri.uid = cpu_to_je16(JFFS2_F_I_UID(f));
1272 ri.gid = cpu_to_je16(JFFS2_F_I_GID(f));
1273 ri.isize = cpu_to_je32(JFFS2_F_I_SIZE(f));
1274 ri.atime = cpu_to_je32(JFFS2_F_I_ATIME(f));
1275 ri.ctime = cpu_to_je32(JFFS2_F_I_CTIME(f));
1276 ri.mtime = cpu_to_je32(JFFS2_F_I_MTIME(f));
1277 ri.offset = cpu_to_je32(offset);
1278 ri.csize = cpu_to_je32(cdatalen);
1279 ri.dsize = cpu_to_je32(datalen);
1280 ri.compr = comprtype & 0xff;
1281 ri.usercompr = (comprtype >> 8) & 0xff;
1282 ri.node_crc = cpu_to_je32(crc32(0, &ri, sizeof(ri)-8));
1283 ri.data_crc = cpu_to_je32(crc32(0, comprbuf, cdatalen));
182ec4ee 1284
1da177e4
LT
1285 new_fn = jffs2_write_dnode(c, f, &ri, comprbuf, cdatalen, phys_ofs, ALLOC_GC);
1286
1287 jffs2_free_comprbuf(comprbuf, writebuf);
1288
1289 if (IS_ERR(new_fn)) {
1290 printk(KERN_WARNING "Error writing new dnode: %ld\n", PTR_ERR(new_fn));
1291 ret = PTR_ERR(new_fn);
1292 break;
1293 }
1294 ret = jffs2_add_full_dnode_to_inode(c, f, new_fn);
1295 offset += datalen;
1296 if (f->metadata) {
1297 jffs2_mark_node_obsolete(c, f->metadata->raw);
1298 jffs2_free_full_dnode(f->metadata);
1299 f->metadata = NULL;
1300 }
1301 }
1302
1303 jffs2_gc_release_page(c, pg_ptr, &pg);
1304 return ret;
1305}