]> bbs.cooldavid.org Git - net-next-2.6.git/blobdiff - mm/vmscan.c
kbuild: fix oldnoconfig to do the right thing
[net-next-2.6.git] / mm / vmscan.c
index a3d669f8e25ec2a96b7ea9925c2618971e2208df..c391c320dbafcda04260923f36336891283b614f 100644 (file)
@@ -1233,6 +1233,47 @@ static noinline_for_stack void update_isolated_counts(struct zone *zone,
        reclaim_stat->recent_scanned[1] += *nr_file;
 }
 
+/*
+ * Returns true if the caller should wait to clean dirty/writeback pages.
+ *
+ * If we are direct reclaiming for contiguous pages and we do not reclaim
+ * everything in the list, try again and wait for writeback IO to complete.
+ * This will stall high-order allocations noticeably. Only do that when really
+ * need to free the pages under high memory pressure.
+ */
+static inline bool should_reclaim_stall(unsigned long nr_taken,
+                                       unsigned long nr_freed,
+                                       int priority,
+                                       struct scan_control *sc)
+{
+       int lumpy_stall_priority;
+
+       /* kswapd should not stall on sync IO */
+       if (current_is_kswapd())
+               return false;
+
+       /* Only stall on lumpy reclaim */
+       if (!sc->lumpy_reclaim_mode)
+               return false;
+
+       /* If we have relaimed everything on the isolated list, no stall */
+       if (nr_freed == nr_taken)
+               return false;
+
+       /*
+        * For high-order allocations, there are two stall thresholds.
+        * High-cost allocations stall immediately where as lower
+        * order allocations such as stacks require the scanning
+        * priority to be much higher before stalling.
+        */
+       if (sc->order > PAGE_ALLOC_COSTLY_ORDER)
+               lumpy_stall_priority = DEF_PRIORITY;
+       else
+               lumpy_stall_priority = DEF_PRIORITY / 3;
+
+       return priority <= lumpy_stall_priority;
+}
+
 /*
  * shrink_inactive_list() is a helper for shrink_zone().  It returns the number
  * of reclaimed pages
@@ -1298,14 +1339,8 @@ shrink_inactive_list(unsigned long nr_to_scan, struct zone *zone,
 
        nr_reclaimed = shrink_page_list(&page_list, sc, PAGEOUT_IO_ASYNC);
 
-       /*
-        * If we are direct reclaiming for contiguous pages and we do
-        * not reclaim everything in the list, try again and wait
-        * for IO to complete. This will stall high-order allocations
-        * but that should be acceptable to the caller
-        */
-       if (nr_reclaimed < nr_taken && !current_is_kswapd() &&
-                       sc->lumpy_reclaim_mode) {
+       /* Check if we should syncronously wait for writeback */
+       if (should_reclaim_stall(nr_taken, nr_reclaimed, priority, sc)) {
                congestion_wait(BLK_RW_ASYNC, HZ/10);
 
                /*
@@ -1934,9 +1969,10 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
                                                gfp_t gfp_mask, bool noswap,
                                                unsigned int swappiness,
-                                               struct zone *zone, int nid)
+                                               struct zone *zone)
 {
        struct scan_control sc = {
+               .nr_to_reclaim = SWAP_CLUSTER_MAX,
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
                .may_swap = !noswap,
@@ -1944,13 +1980,13 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
                .order = 0,
                .mem_cgroup = mem,
        };
-       nodemask_t nm  = nodemask_of_node(nid);
-
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
-       sc.nodemask = &nm;
-       sc.nr_reclaimed = 0;
-       sc.nr_scanned = 0;
+
+       trace_mm_vmscan_memcg_softlimit_reclaim_begin(0,
+                                                     sc.may_writepage,
+                                                     sc.gfp_mask);
+
        /*
         * NOTE: Although we can get the priority field, using it
         * here is not a good idea, since it limits the pages we can scan.
@@ -1959,6 +1995,9 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
         * the priority and make it zero.
         */
        shrink_zone(0, zone, &sc);
+
+       trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
+
        return sc.nr_reclaimed;
 }
 
@@ -1968,6 +2007,7 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
                                           unsigned int swappiness)
 {
        struct zonelist *zonelist;
+       unsigned long nr_reclaimed;
        struct scan_control sc = {
                .may_writepage = !laptop_mode,
                .may_unmap = 1,
@@ -1982,7 +2022,16 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem_cont,
        sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
                        (GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
        zonelist = NODE_DATA(numa_node_id())->node_zonelists;
-       return do_try_to_free_pages(zonelist, &sc);
+
+       trace_mm_vmscan_memcg_reclaim_begin(0,
+                                           sc.may_writepage,
+                                           sc.gfp_mask);
+
+       nr_reclaimed = do_try_to_free_pages(zonelist, &sc);
+
+       trace_mm_vmscan_memcg_reclaim_end(nr_reclaimed);
+
+       return nr_reclaimed;
 }
 #endif
 
@@ -2119,7 +2168,6 @@ loop_again:
                for (i = 0; i <= end_zone; i++) {
                        struct zone *zone = pgdat->node_zones + i;
                        int nr_slab;
-                       int nid, zid;
 
                        if (!populated_zone(zone))
                                continue;
@@ -2129,14 +2177,12 @@ loop_again:
 
                        sc.nr_scanned = 0;
 
-                       nid = pgdat->node_id;
-                       zid = zone_idx(zone);
                        /*
                         * Call soft limit reclaim before calling shrink_zone.
                         * For now we ignore the return value
                         */
-                       mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask,
-                                                       nid, zid);
+                       mem_cgroup_soft_limit_reclaim(zone, order, sc.gfp_mask);
+
                        /*
                         * We put equal pressure on every zone, unless one
                         * zone has way too many pages free already.
@@ -2635,10 +2681,19 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
                 * Note that shrink_slab will free memory on all zones and may
                 * take a long time.
                 */
-               while (shrink_slab(sc.nr_scanned, gfp_mask, order) &&
-                      (zone_page_state(zone, NR_SLAB_RECLAIMABLE) + nr_pages >
-                               nr_slab_pages0))
-                       ;
+               for (;;) {
+                       unsigned long lru_pages = zone_reclaimable_pages(zone);
+
+                       /* No reclaimable slab or very low memory pressure */
+                       if (!shrink_slab(sc.nr_scanned, gfp_mask, lru_pages))
+                               break;
+
+                       /* Freed enough memory */
+                       nr_slab_pages1 = zone_page_state(zone,
+                                                       NR_SLAB_RECLAIMABLE);
+                       if (nr_slab_pages1 + nr_pages <= nr_slab_pages0)
+                               break;
+               }
 
                /*
                 * Update nr_reclaimed by the number of slab pages we