]>
Commit | Line | Data |
---|---|---|
0bd49f94 RK |
1 | /* |
2 | * page.c - buffer/page management specific to NILFS | |
3 | * | |
4 | * Copyright (C) 2005-2008 Nippon Telegraph and Telephone Corporation. | |
5 | * | |
6 | * This program is free software; you can redistribute it and/or modify | |
7 | * it under the terms of the GNU General Public License as published by | |
8 | * the Free Software Foundation; either version 2 of the License, or | |
9 | * (at your option) any later version. | |
10 | * | |
11 | * This program is distributed in the hope that it will be useful, | |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * GNU General Public License for more details. | |
15 | * | |
16 | * You should have received a copy of the GNU General Public License | |
17 | * along with this program; if not, write to the Free Software | |
18 | * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA | |
19 | * | |
20 | * Written by Ryusuke Konishi <ryusuke@osrg.net>, | |
21 | * Seiji Kihara <kihara@osrg.net>. | |
22 | */ | |
23 | ||
24 | #include <linux/pagemap.h> | |
25 | #include <linux/writeback.h> | |
26 | #include <linux/swap.h> | |
27 | #include <linux/bitops.h> | |
28 | #include <linux/page-flags.h> | |
29 | #include <linux/list.h> | |
30 | #include <linux/highmem.h> | |
31 | #include <linux/pagevec.h> | |
5a0e3ad6 | 32 | #include <linux/gfp.h> |
0bd49f94 RK |
33 | #include "nilfs.h" |
34 | #include "page.h" | |
35 | #include "mdt.h" | |
36 | ||
37 | ||
38 | #define NILFS_BUFFER_INHERENT_BITS \ | |
39 | ((1UL << BH_Uptodate) | (1UL << BH_Mapped) | (1UL << BH_NILFS_Node) | \ | |
4e13e66b RK |
40 | (1UL << BH_NILFS_Volatile) | (1UL << BH_NILFS_Allocated) | \ |
41 | (1UL << BH_NILFS_Checked)) | |
0bd49f94 RK |
42 | |
43 | static struct buffer_head * | |
44 | __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index, | |
45 | int blkbits, unsigned long b_state) | |
46 | ||
47 | { | |
48 | unsigned long first_block; | |
49 | struct buffer_head *bh; | |
50 | ||
51 | if (!page_has_buffers(page)) | |
52 | create_empty_buffers(page, 1 << blkbits, b_state); | |
53 | ||
54 | first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits); | |
55 | bh = nilfs_page_get_nth_block(page, block - first_block); | |
56 | ||
57 | touch_buffer(bh); | |
58 | wait_on_buffer(bh); | |
59 | return bh; | |
60 | } | |
61 | ||
62 | /* | |
63 | * Since the page cache of B-tree node pages or data page cache of pseudo | |
64 | * inodes does not have a valid mapping->host pointer, calling | |
65 | * mark_buffer_dirty() for their buffers causes a NULL pointer dereference; | |
66 | * it calls __mark_inode_dirty(NULL) through __set_page_dirty(). | |
67 | * To avoid this problem, the old style mark_buffer_dirty() is used instead. | |
68 | */ | |
69 | void nilfs_mark_buffer_dirty(struct buffer_head *bh) | |
70 | { | |
71 | if (!buffer_dirty(bh) && !test_set_buffer_dirty(bh)) | |
72 | __set_page_dirty_nobuffers(bh->b_page); | |
73 | } | |
74 | ||
75 | struct buffer_head *nilfs_grab_buffer(struct inode *inode, | |
76 | struct address_space *mapping, | |
77 | unsigned long blkoff, | |
78 | unsigned long b_state) | |
79 | { | |
80 | int blkbits = inode->i_blkbits; | |
81 | pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits); | |
82 | struct page *page, *opage; | |
83 | struct buffer_head *bh, *obh; | |
84 | ||
85 | page = grab_cache_page(mapping, index); | |
86 | if (unlikely(!page)) | |
87 | return NULL; | |
88 | ||
89 | bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state); | |
90 | if (unlikely(!bh)) { | |
91 | unlock_page(page); | |
92 | page_cache_release(page); | |
93 | return NULL; | |
94 | } | |
95 | if (!buffer_uptodate(bh) && mapping->assoc_mapping != NULL) { | |
96 | /* | |
97 | * Shadow page cache uses assoc_mapping to point its original | |
98 | * page cache. The following code tries the original cache | |
99 | * if the given cache is a shadow and it didn't hit. | |
100 | */ | |
101 | opage = find_lock_page(mapping->assoc_mapping, index); | |
102 | if (!opage) | |
103 | return bh; | |
104 | ||
105 | obh = __nilfs_get_page_block(opage, blkoff, index, blkbits, | |
106 | b_state); | |
107 | if (buffer_uptodate(obh)) { | |
108 | nilfs_copy_buffer(bh, obh); | |
109 | if (buffer_dirty(obh)) { | |
110 | nilfs_mark_buffer_dirty(bh); | |
111 | if (!buffer_nilfs_node(bh) && NILFS_MDT(inode)) | |
112 | nilfs_mdt_mark_dirty(inode); | |
113 | } | |
114 | } | |
115 | brelse(obh); | |
116 | unlock_page(opage); | |
117 | page_cache_release(opage); | |
118 | } | |
119 | return bh; | |
120 | } | |
121 | ||
122 | /** | |
123 | * nilfs_forget_buffer - discard dirty state | |
124 | * @inode: owner inode of the buffer | |
125 | * @bh: buffer head of the buffer to be discarded | |
126 | */ | |
127 | void nilfs_forget_buffer(struct buffer_head *bh) | |
128 | { | |
129 | struct page *page = bh->b_page; | |
130 | ||
131 | lock_buffer(bh); | |
132 | clear_buffer_nilfs_volatile(bh); | |
4e13e66b | 133 | clear_buffer_nilfs_checked(bh); |
84338237 RK |
134 | clear_buffer_dirty(bh); |
135 | if (nilfs_page_buffers_clean(page)) | |
0bd49f94 RK |
136 | __nilfs_clear_page_dirty(page); |
137 | ||
138 | clear_buffer_uptodate(bh); | |
139 | clear_buffer_mapped(bh); | |
140 | bh->b_blocknr = -1; | |
141 | ClearPageUptodate(page); | |
142 | ClearPageMappedToDisk(page); | |
143 | unlock_buffer(bh); | |
144 | brelse(bh); | |
145 | } | |
146 | ||
147 | /** | |
148 | * nilfs_copy_buffer -- copy buffer data and flags | |
149 | * @dbh: destination buffer | |
150 | * @sbh: source buffer | |
151 | */ | |
152 | void nilfs_copy_buffer(struct buffer_head *dbh, struct buffer_head *sbh) | |
153 | { | |
154 | void *kaddr0, *kaddr1; | |
155 | unsigned long bits; | |
156 | struct page *spage = sbh->b_page, *dpage = dbh->b_page; | |
157 | struct buffer_head *bh; | |
158 | ||
159 | kaddr0 = kmap_atomic(spage, KM_USER0); | |
160 | kaddr1 = kmap_atomic(dpage, KM_USER1); | |
161 | memcpy(kaddr1 + bh_offset(dbh), kaddr0 + bh_offset(sbh), sbh->b_size); | |
162 | kunmap_atomic(kaddr1, KM_USER1); | |
163 | kunmap_atomic(kaddr0, KM_USER0); | |
164 | ||
165 | dbh->b_state = sbh->b_state & NILFS_BUFFER_INHERENT_BITS; | |
166 | dbh->b_blocknr = sbh->b_blocknr; | |
167 | dbh->b_bdev = sbh->b_bdev; | |
168 | ||
169 | bh = dbh; | |
170 | bits = sbh->b_state & ((1UL << BH_Uptodate) | (1UL << BH_Mapped)); | |
171 | while ((bh = bh->b_this_page) != dbh) { | |
172 | lock_buffer(bh); | |
173 | bits &= bh->b_state; | |
174 | unlock_buffer(bh); | |
175 | } | |
176 | if (bits & (1UL << BH_Uptodate)) | |
177 | SetPageUptodate(dpage); | |
178 | else | |
179 | ClearPageUptodate(dpage); | |
180 | if (bits & (1UL << BH_Mapped)) | |
181 | SetPageMappedToDisk(dpage); | |
182 | else | |
183 | ClearPageMappedToDisk(dpage); | |
184 | } | |
185 | ||
186 | /** | |
187 | * nilfs_page_buffers_clean - check if a page has dirty buffers or not. | |
188 | * @page: page to be checked | |
189 | * | |
190 | * nilfs_page_buffers_clean() returns zero if the page has dirty buffers. | |
191 | * Otherwise, it returns non-zero value. | |
192 | */ | |
193 | int nilfs_page_buffers_clean(struct page *page) | |
194 | { | |
195 | struct buffer_head *bh, *head; | |
196 | ||
197 | bh = head = page_buffers(page); | |
198 | do { | |
199 | if (buffer_dirty(bh)) | |
200 | return 0; | |
201 | bh = bh->b_this_page; | |
202 | } while (bh != head); | |
203 | return 1; | |
204 | } | |
205 | ||
206 | void nilfs_page_bug(struct page *page) | |
207 | { | |
208 | struct address_space *m; | |
209 | unsigned long ino = 0; | |
210 | ||
211 | if (unlikely(!page)) { | |
212 | printk(KERN_CRIT "NILFS_PAGE_BUG(NULL)\n"); | |
213 | return; | |
214 | } | |
215 | ||
216 | m = page->mapping; | |
217 | if (m) { | |
218 | struct inode *inode = NILFS_AS_I(m); | |
219 | if (inode != NULL) | |
220 | ino = inode->i_ino; | |
221 | } | |
222 | printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx " | |
223 | "mapping=%p ino=%lu\n", | |
224 | page, atomic_read(&page->_count), | |
225 | (unsigned long long)page->index, page->flags, m, ino); | |
226 | ||
227 | if (page_has_buffers(page)) { | |
228 | struct buffer_head *bh, *head; | |
229 | int i = 0; | |
230 | ||
231 | bh = head = page_buffers(page); | |
232 | do { | |
233 | printk(KERN_CRIT | |
234 | " BH[%d] %p: cnt=%d block#=%llu state=0x%lx\n", | |
235 | i++, bh, atomic_read(&bh->b_count), | |
236 | (unsigned long long)bh->b_blocknr, bh->b_state); | |
237 | bh = bh->b_this_page; | |
238 | } while (bh != head); | |
239 | } | |
240 | } | |
241 | ||
242 | /** | |
243 | * nilfs_alloc_private_page - allocate a private page with buffer heads | |
244 | * | |
245 | * Return Value: On success, a pointer to the allocated page is returned. | |
246 | * On error, NULL is returned. | |
247 | */ | |
248 | struct page *nilfs_alloc_private_page(struct block_device *bdev, int size, | |
249 | unsigned long state) | |
250 | { | |
251 | struct buffer_head *bh, *head, *tail; | |
252 | struct page *page; | |
253 | ||
254 | page = alloc_page(GFP_NOFS); /* page_count of the returned page is 1 */ | |
255 | if (unlikely(!page)) | |
256 | return NULL; | |
257 | ||
258 | lock_page(page); | |
259 | head = alloc_page_buffers(page, size, 0); | |
260 | if (unlikely(!head)) { | |
261 | unlock_page(page); | |
262 | __free_page(page); | |
263 | return NULL; | |
264 | } | |
265 | ||
266 | bh = head; | |
267 | do { | |
268 | bh->b_state = (1UL << BH_NILFS_Allocated) | state; | |
269 | tail = bh; | |
270 | bh->b_bdev = bdev; | |
271 | bh = bh->b_this_page; | |
272 | } while (bh); | |
273 | ||
274 | tail->b_this_page = head; | |
275 | attach_page_buffers(page, head); | |
276 | ||
277 | return page; | |
278 | } | |
279 | ||
280 | void nilfs_free_private_page(struct page *page) | |
281 | { | |
282 | BUG_ON(!PageLocked(page)); | |
283 | BUG_ON(page->mapping); | |
284 | ||
285 | if (page_has_buffers(page) && !try_to_free_buffers(page)) | |
286 | NILFS_PAGE_BUG(page, "failed to free page"); | |
287 | ||
288 | unlock_page(page); | |
289 | __free_page(page); | |
290 | } | |
291 | ||
292 | /** | |
293 | * nilfs_copy_page -- copy the page with buffers | |
294 | * @dst: destination page | |
295 | * @src: source page | |
296 | * @copy_dirty: flag whether to copy dirty states on the page's buffer heads. | |
297 | * | |
7a65004b | 298 | * This function is for both data pages and btnode pages. The dirty flag |
0bd49f94 RK |
299 | * should be treated by caller. The page must not be under i/o. |
300 | * Both src and dst page must be locked | |
301 | */ | |
302 | static void nilfs_copy_page(struct page *dst, struct page *src, int copy_dirty) | |
303 | { | |
304 | struct buffer_head *dbh, *dbufs, *sbh, *sbufs; | |
305 | unsigned long mask = NILFS_BUFFER_INHERENT_BITS; | |
306 | ||
307 | BUG_ON(PageWriteback(dst)); | |
308 | ||
309 | sbh = sbufs = page_buffers(src); | |
310 | if (!page_has_buffers(dst)) | |
311 | create_empty_buffers(dst, sbh->b_size, 0); | |
312 | ||
313 | if (copy_dirty) | |
314 | mask |= (1UL << BH_Dirty); | |
315 | ||
316 | dbh = dbufs = page_buffers(dst); | |
317 | do { | |
318 | lock_buffer(sbh); | |
319 | lock_buffer(dbh); | |
320 | dbh->b_state = sbh->b_state & mask; | |
321 | dbh->b_blocknr = sbh->b_blocknr; | |
322 | dbh->b_bdev = sbh->b_bdev; | |
323 | sbh = sbh->b_this_page; | |
324 | dbh = dbh->b_this_page; | |
325 | } while (dbh != dbufs); | |
326 | ||
327 | copy_highpage(dst, src); | |
328 | ||
329 | if (PageUptodate(src) && !PageUptodate(dst)) | |
330 | SetPageUptodate(dst); | |
331 | else if (!PageUptodate(src) && PageUptodate(dst)) | |
332 | ClearPageUptodate(dst); | |
333 | if (PageMappedToDisk(src) && !PageMappedToDisk(dst)) | |
334 | SetPageMappedToDisk(dst); | |
335 | else if (!PageMappedToDisk(src) && PageMappedToDisk(dst)) | |
336 | ClearPageMappedToDisk(dst); | |
337 | ||
338 | do { | |
339 | unlock_buffer(sbh); | |
340 | unlock_buffer(dbh); | |
341 | sbh = sbh->b_this_page; | |
342 | dbh = dbh->b_this_page; | |
343 | } while (dbh != dbufs); | |
344 | } | |
345 | ||
346 | int nilfs_copy_dirty_pages(struct address_space *dmap, | |
347 | struct address_space *smap) | |
348 | { | |
349 | struct pagevec pvec; | |
350 | unsigned int i; | |
351 | pgoff_t index = 0; | |
352 | int err = 0; | |
353 | ||
354 | pagevec_init(&pvec, 0); | |
355 | repeat: | |
356 | if (!pagevec_lookup_tag(&pvec, smap, &index, PAGECACHE_TAG_DIRTY, | |
357 | PAGEVEC_SIZE)) | |
358 | return 0; | |
359 | ||
360 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
361 | struct page *page = pvec.pages[i], *dpage; | |
362 | ||
363 | lock_page(page); | |
364 | if (unlikely(!PageDirty(page))) | |
365 | NILFS_PAGE_BUG(page, "inconsistent dirty state"); | |
366 | ||
367 | dpage = grab_cache_page(dmap, page->index); | |
368 | if (unlikely(!dpage)) { | |
369 | /* No empty page is added to the page cache */ | |
370 | err = -ENOMEM; | |
371 | unlock_page(page); | |
372 | break; | |
373 | } | |
374 | if (unlikely(!page_has_buffers(page))) | |
375 | NILFS_PAGE_BUG(page, | |
376 | "found empty page in dat page cache"); | |
377 | ||
378 | nilfs_copy_page(dpage, page, 1); | |
379 | __set_page_dirty_nobuffers(dpage); | |
380 | ||
381 | unlock_page(dpage); | |
382 | page_cache_release(dpage); | |
383 | unlock_page(page); | |
384 | } | |
385 | pagevec_release(&pvec); | |
386 | cond_resched(); | |
387 | ||
388 | if (likely(!err)) | |
389 | goto repeat; | |
390 | return err; | |
391 | } | |
392 | ||
393 | /** | |
7a65004b | 394 | * nilfs_copy_back_pages -- copy back pages to original cache from shadow cache |
0bd49f94 RK |
395 | * @dmap: destination page cache |
396 | * @smap: source page cache | |
397 | * | |
398 | * No pages must no be added to the cache during this process. | |
399 | * This must be ensured by the caller. | |
400 | */ | |
401 | void nilfs_copy_back_pages(struct address_space *dmap, | |
402 | struct address_space *smap) | |
403 | { | |
404 | struct pagevec pvec; | |
405 | unsigned int i, n; | |
406 | pgoff_t index = 0; | |
407 | int err; | |
408 | ||
409 | pagevec_init(&pvec, 0); | |
410 | repeat: | |
411 | n = pagevec_lookup(&pvec, smap, index, PAGEVEC_SIZE); | |
412 | if (!n) | |
413 | return; | |
414 | index = pvec.pages[n - 1]->index + 1; | |
415 | ||
416 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
417 | struct page *page = pvec.pages[i], *dpage; | |
418 | pgoff_t offset = page->index; | |
419 | ||
420 | lock_page(page); | |
421 | dpage = find_lock_page(dmap, offset); | |
422 | if (dpage) { | |
423 | /* override existing page on the destination cache */ | |
1f5abe7e | 424 | WARN_ON(PageDirty(dpage)); |
0bd49f94 RK |
425 | nilfs_copy_page(dpage, page, 0); |
426 | unlock_page(dpage); | |
427 | page_cache_release(dpage); | |
428 | } else { | |
429 | struct page *page2; | |
430 | ||
431 | /* move the page to the destination cache */ | |
432 | spin_lock_irq(&smap->tree_lock); | |
433 | page2 = radix_tree_delete(&smap->page_tree, offset); | |
1f5abe7e RK |
434 | WARN_ON(page2 != page); |
435 | ||
0bd49f94 RK |
436 | smap->nrpages--; |
437 | spin_unlock_irq(&smap->tree_lock); | |
438 | ||
439 | spin_lock_irq(&dmap->tree_lock); | |
440 | err = radix_tree_insert(&dmap->page_tree, offset, page); | |
441 | if (unlikely(err < 0)) { | |
1f5abe7e | 442 | WARN_ON(err == -EEXIST); |
0bd49f94 RK |
443 | page->mapping = NULL; |
444 | page_cache_release(page); /* for cache */ | |
445 | } else { | |
446 | page->mapping = dmap; | |
447 | dmap->nrpages++; | |
448 | if (PageDirty(page)) | |
449 | radix_tree_tag_set(&dmap->page_tree, | |
450 | offset, | |
451 | PAGECACHE_TAG_DIRTY); | |
452 | } | |
453 | spin_unlock_irq(&dmap->tree_lock); | |
454 | } | |
455 | unlock_page(page); | |
456 | } | |
457 | pagevec_release(&pvec); | |
458 | cond_resched(); | |
459 | ||
460 | goto repeat; | |
461 | } | |
462 | ||
463 | void nilfs_clear_dirty_pages(struct address_space *mapping) | |
464 | { | |
465 | struct pagevec pvec; | |
466 | unsigned int i; | |
467 | pgoff_t index = 0; | |
468 | ||
469 | pagevec_init(&pvec, 0); | |
470 | ||
471 | while (pagevec_lookup_tag(&pvec, mapping, &index, PAGECACHE_TAG_DIRTY, | |
472 | PAGEVEC_SIZE)) { | |
473 | for (i = 0; i < pagevec_count(&pvec); i++) { | |
474 | struct page *page = pvec.pages[i]; | |
475 | struct buffer_head *bh, *head; | |
476 | ||
477 | lock_page(page); | |
478 | ClearPageUptodate(page); | |
479 | ClearPageMappedToDisk(page); | |
480 | bh = head = page_buffers(page); | |
481 | do { | |
482 | lock_buffer(bh); | |
483 | clear_buffer_dirty(bh); | |
484 | clear_buffer_nilfs_volatile(bh); | |
4e13e66b | 485 | clear_buffer_nilfs_checked(bh); |
0bd49f94 RK |
486 | clear_buffer_uptodate(bh); |
487 | clear_buffer_mapped(bh); | |
488 | unlock_buffer(bh); | |
489 | bh = bh->b_this_page; | |
490 | } while (bh != head); | |
491 | ||
492 | __nilfs_clear_page_dirty(page); | |
493 | unlock_page(page); | |
494 | } | |
495 | pagevec_release(&pvec); | |
496 | cond_resched(); | |
497 | } | |
498 | } | |
499 | ||
500 | unsigned nilfs_page_count_clean_buffers(struct page *page, | |
501 | unsigned from, unsigned to) | |
502 | { | |
503 | unsigned block_start, block_end; | |
504 | struct buffer_head *bh, *head; | |
505 | unsigned nc = 0; | |
506 | ||
507 | for (bh = head = page_buffers(page), block_start = 0; | |
508 | bh != head || !block_start; | |
509 | block_start = block_end, bh = bh->b_this_page) { | |
510 | block_end = block_start + bh->b_size; | |
511 | if (block_end > from && block_start < to && !buffer_dirty(bh)) | |
512 | nc++; | |
513 | } | |
514 | return nc; | |
515 | } | |
516 | ||
517 | /* | |
518 | * NILFS2 needs clear_page_dirty() in the following two cases: | |
519 | * | |
520 | * 1) For B-tree node pages and data pages of the dat/gcdat, NILFS2 clears | |
521 | * page dirty flags when it copies back pages from the shadow cache | |
522 | * (gcdat->{i_mapping,i_btnode_cache}) to its original cache | |
523 | * (dat->{i_mapping,i_btnode_cache}). | |
524 | * | |
525 | * 2) Some B-tree operations like insertion or deletion may dispose buffers | |
526 | * in dirty state, and this needs to cancel the dirty state of their pages. | |
527 | */ | |
528 | int __nilfs_clear_page_dirty(struct page *page) | |
529 | { | |
530 | struct address_space *mapping = page->mapping; | |
531 | ||
532 | if (mapping) { | |
533 | spin_lock_irq(&mapping->tree_lock); | |
534 | if (test_bit(PG_dirty, &page->flags)) { | |
535 | radix_tree_tag_clear(&mapping->page_tree, | |
536 | page_index(page), | |
537 | PAGECACHE_TAG_DIRTY); | |
538 | spin_unlock_irq(&mapping->tree_lock); | |
539 | return clear_page_dirty_for_io(page); | |
540 | } | |
541 | spin_unlock_irq(&mapping->tree_lock); | |
542 | return 0; | |
543 | } | |
544 | return TestClearPageDirty(page); | |
545 | } |