mm: fix swapin race condition

author Andrea Arcangeli <aarcange@redhat.com>

Thu, 9 Sep 2010 23:37:52 +0000 (16:37 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Fri, 10 Sep 2010 01:57:24 +0000 (18:57 -0700)
author Andrea Arcangeli <aarcange@redhat.com>
Thu, 9 Sep 2010 23:37:52 +0000 (16:37 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Fri, 10 Sep 2010 01:57:24 +0000 (18:57 -0700)
diff --git a/include/linux/ksm.h b/include/linux/ksm.h

index 74d691ee9121c5bb3aa8336d7eeffcc03d88cc46..3319a6967626e02f91c340080b21950a8310a67f 100644 (file)
--- a/include/linux/ksm.h
+++ b/include/linux/ksm.h
@@ -16,6 +16,9 @@
  struct stable_node;
  struct mem_cgroup;
  
+struct page *ksm_does_need_to_copy(struct page *page,
+                       struct vm_area_struct *vma, unsigned long address);
+
  #ifdef CONFIG_KSM
  int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
                 unsigned long end, int advice, unsigned long *vm_flags);
@@ -70,19 +73,14 @@ static inline void set_page_stable_node(struct page *page,
   * We'd like to make this conditional on vma->vm_flags & VM_MERGEABLE,
   * but what if the vma was unmerged while the page was swapped out?
   */
-struct page *ksm_does_need_to_copy(struct page *page,
-                       struct vm_area_struct *vma, unsigned long address);
-static inline struct page *ksm_might_need_to_copy(struct page *page,
+static inline int ksm_might_need_to_copy(struct page *page,
                         struct vm_area_struct *vma, unsigned long address)
  {
         struct anon_vma *anon_vma = page_anon_vma(page);
  
-       if (!anon_vma ||
-           (anon_vma->root == vma->anon_vma->root &&
-            page->index == linear_page_index(vma, address)))
-               return page;
-
-       return ksm_does_need_to_copy(page, vma, address);
+       return anon_vma &&
+               (anon_vma->root != vma->anon_vma->root ||
+                page->index != linear_page_index(vma, address));
  }
  
  int page_referenced_ksm(struct page *page,
@@ -115,10 +113,10 @@ static inline int ksm_madvise(struct vm_area_struct *vma, unsigned long start,
         return 0;
  }
  
-static inline struct page *ksm_might_need_to_copy(struct page *page,
+static inline int ksm_might_need_to_copy(struct page *page,
                         struct vm_area_struct *vma, unsigned long address)
  {
-       return page;
+       return 0;
  }
  
  static inline int page_referenced_ksm(struct page *page,
diff --git a/mm/ksm.c b/mm/ksm.c

index e2ae00458320786a380a1ab370efe0dc6cfd6e1a..b1873cf03ed986bcb062259da8f1c4093a97160b 100644 (file)
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1504,8 +1504,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
  {
         struct page *new_page;
  
-       unlock_page(page);      /* any racers will COW it, not modify it */
-
         new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);
         if (new_page) {
                 copy_user_highpage(new_page, page, address, vma);
@@ -1521,7 +1519,6 @@ struct page *ksm_does_need_to_copy(struct page *page,
                         add_page_to_unevictable_list(new_page);
         }
  
-       page_cache_release(page);
         return new_page;
  }
  
diff --git a/mm/memory.c b/mm/memory.c

index 6b2ab10518512052c895dd5db7ff0f20fd1df2f3..71b161b73bb503be50556e9ff302ca0c7eaba396 100644 (file)
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2623,7 +2623,7 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
                 unsigned int flags, pte_t orig_pte)
  {
         spinlock_t *ptl;
-       struct page *page;
+       struct page *page, *swapcache = NULL;
         swp_entry_t entry;
         pte_t pte;
         struct mem_cgroup *ptr = NULL;
@@ -2679,10 +2679,23 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         lock_page(page);
         delayacct_clear_flag(DELAYACCT_PF_SWAPIN);
  
-       page = ksm_might_need_to_copy(page, vma, address);
-       if (!page) {
-               ret = VM_FAULT_OOM;
-               goto out;
+       /*
+        * Make sure try_to_free_swap didn't release the swapcache
+        * from under us. The page pin isn't enough to prevent that.
+        */
+       if (unlikely(!PageSwapCache(page)))
+               goto out_page;
+
+       if (ksm_might_need_to_copy(page, vma, address)) {
+               swapcache = page;
+               page = ksm_does_need_to_copy(page, vma, address);
+
+               if (unlikely(!page)) {
+                       ret = VM_FAULT_OOM;
+                       page = swapcache;
+                       swapcache = NULL;
+                       goto out_page;
+               }
         }
  
         if (mem_cgroup_try_charge_swapin(mm, page, GFP_KERNEL, &ptr)) {
@@ -2735,6 +2748,18 @@ static int do_swap_page(struct mm_struct *mm, struct vm_area_struct *vma,
         if (vm_swap_full() || (vma->vm_flags & VM_LOCKED) || PageMlocked(page))
                 try_to_free_swap(page);
         unlock_page(page);
+       if (swapcache) {
+               /*
+                * Hold the lock to avoid the swap entry to be reused
+                * until we take the PT lock for the pte_same() check
+                * (to avoid false positives from pte_same). For
+                * further safety release the lock after the swap_free
+                * so that the swap count won't change under a
+                * parallel locked swapcache.
+                */
+               unlock_page(swapcache);
+               page_cache_release(swapcache);
+       }
  
         if (flags & FAULT_FLAG_WRITE) {
                 ret |= do_wp_page(mm, vma, address, page_table, pmd, ptl, pte);
@@ -2756,6 +2781,10 @@ out_page:
         unlock_page(page);
  out_release:
         page_cache_release(page);
+       if (swapcache) {
+               unlock_page(swapcache);
+               page_cache_release(swapcache);
+       }
         return ret;
  }
author	Andrea Arcangeli <aarcange@redhat.com>
	Thu, 9 Sep 2010 23:37:52 +0000 (16:37 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Fri, 10 Sep 2010 01:57:24 +0000 (18:57 -0700)
include/linux/ksm.h		patch \| blob \| blame \| history
mm/ksm.c		patch \| blob \| blame \| history
mm/memory.c		patch \| blob \| blame \| history