天天看點

14.5 shrink_active_list函數

首先來看當不活躍LRU的頁面數量少于活躍LRU的頁面數量的情況,shrink_active_list()函數掃描活躍LRU連結清單,看是否有頁面可以遷移到不活躍LRU連結清單中。

[kswapd()->balance_pgdat()->kswapd_shrink_zone()->shrink_zone()->shrink_lruvec()->shrink_list()->shrink_active_list()]

/*
    注意在操作LRU連結清單時,有一把保護LRU的spinlock鎖zone->lru_lock。
    isolate_lru_pages()批量地把LRU連結清單的部分頁面先遷移到臨時連結清單中,進而
    減少加鎖的時間。
*/

static void shrink_active_list(unsigned long nr_to_scan,
                   struct lruvec *lruvec,
                   struct scan_control *sc,
                   enum lru_list lru)
{
    unsigned long nr_taken;
    unsigned long nr_scanned;
    unsigned long vm_flags;
    /*定義了三個臨時連結清單:l_hold/l_active/l_inactive */
    LIST_HEAD(l_hold);  /* The pages which were snipped off */
    LIST_HEAD(l_active);
    LIST_HEAD(l_inactive);
    struct page *page;
    struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
    unsigned long nr_rotated = 0;
    isolate_mode_t isolate_mode = 0;
    int file = is_file_lru(lru);
    /*從lruvec結構傳回zone資料結構*/
    struct zone *zone = lruvec_zone(lruvec);
    lru_add_drain();

    if (!sc->may_unmap)
        isolate_mode |= ISOLATE_UNMAPPED;
    if (!sc->may_writepage)
        isolate_mode |= ISOLATE_CLEAN;
    
    /*申請zone->lru_lock鎖來保護LRU連結清單操作*/
    spin_lock_irq(&zone->lru_lock);

    /*isolate_lru_pages()批量地從LRU連結清單中分離nr_to_scan個頁面到l_hold連結清單中,
    這裡會根據isolate_mode來考慮一些特殊情況,基本上就是把LRU連結清單的頁面遷移到臨時
    l_hold連結清單中。下面檢視isolate_lru_pages()函數實作*/
    nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
                     &nr_scanned, sc, isolate_mode, lru);
    if (global_reclaim(sc))
        __mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);/*增加zone中的NR_PAGES_SCANNED計數*/

    /*增加recent_scanned[]計數,在get_scan_count()計算匿名頁面和檔案緩存頁面
    分别掃描數量時會用到。*/
    reclaim_stat->recent_scanned[file] += nr_taken;

    /*增加zone中PGREFILL、NR_LRU_BASE和NR_ISOLATED_ANON計數*/
    __count_zone_vm_events(PGREFILL, zone, nr_scanned);
    __mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
    __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
    spin_unlock_irq(&zone->lru_lock);

    /*下面開始掃描臨時l_hold連結清單的頁面,有些頁面會添加到l_active中,有些會加入到
    l_inactive中。*/
    while (!list_empty(&l_hold)) {
        cond_resched();
        page = lru_to_page(&l_hold);
        list_del(&page->lru);
         /*如果頁面是不可回收的,那麼就把它傳回到不可回收的LRU連結清單中*/
        if (unlikely(!page_evictable(page))) {
            putback_lru_page(page);
            continue;
        }

        if (unlikely(buffer_heads_over_limit)) {
            if (page_has_private(page) && trylock_page(page)) {
                if (page_has_private(page))
                    try_to_release_page(page, 0);
                unlock_page(page);
            }
        }
        /*page_referenced()函數傳回該頁最近通路引用pte的個數,傳回0表示最近沒有被通路過。
        除了可執行的page cache頁面,其他被通路引用的頁面(referenced page)為什麼都被加入
        到不活躍連結清單裡,而不是繼續待在活躍LRU連結清單中呢?
        把最近又通路引用的頁面全部遷移到活躍LRU連結清單會産生一個比較大的可擴充性問題(scalability
        problem)。在一個記憶體很大的系統中,當系統用完了這些空閑記憶體時,每個頁面都會被通路引用到,
        這種情況下我們不僅沒有時間去掃描活躍LRU連結清單,而且還重新設定通路比特位(referenced bit),
        而這些資訊沒有什麼用處。是以從Linux2.6.28開始,掃描活躍連結清單時會把頁面全部都遷移到不活躍
        連結清單中。這裡隻需要清理硬體的通路比特位(page_referenced()來完成),當有通路引用時,掃描不
        活躍LRU連結清單就遷移回到活躍LRU連結清單中。
        讓可執行的page cache頁面(mapped executable file pages)繼續儲存在活躍連結清單中,在掃描活躍
        連結清單期間它們可能再次被通路到,因為LRU連結清單的掃描順序是先掃描不活躍連結清單,然後再掃描活躍連結清單
        且掃描不活躍連結清單的速度要快于活躍連結清單,是以它們可以獲得比較多的時間讓使用者程序再次通路,進而
        提高使用者程序的互動體驗。可執行的頁面通常是vma的屬性标記着VM_EXEC,這些頁面通常包括可執行的
        檔案和它們連接配接的庫檔案等。*/
        if (page_referenced(page, 0, sc->target_mem_cgroup,
                    &vm_flags)) {
            nr_rotated += hpage_nr_pages(page);
            /*
             * Identify referenced, file-backed active pages and
             * give them one more trip around the active list. So
             * that executable code get better chances to stay in
             * memory under moderate memory pressure.  Anon pages
             * are not likely to be evicted by use-once streaming
             * IO, plus JVM can create lots of anon VM_EXEC pages,
             * so we ignore them here.
             */
            if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
                list_add(&page->lru, &l_active);
                continue;
            }
        }

        /*如果頁面沒有被引用,那麼加入到l_inactive連結清單*/
        ClearPageActive(page);  /* we are de-activating */
        list_add(&page->lru, &l_inactive);
    }

    /*
     * Move pages back to the lru list.
     */
    spin_lock_irq(&zone->lru_lock);
    /*
     * Count referenced pages from currently used mappings as rotated,
     * even though only some of them are actually re-activated.  This
     * helps balance scan pressure between file and anonymous pages in
     * get_scan_count.
     */
    /*這裡把最近被引用的頁面(referenced pages)統計到recent_rotated中,以便在下一次掃描
    時在 get_scan_count()中重新計算匿名頁面和檔案映射頁面LRU連結清單的掃描比重。*/
    reclaim_stat->recent_rotated[file] += nr_rotated;

    /*把l_inactive和l_active連結清單的頁遷移到LRU相應的連結清單中。*/
    move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
    move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
    __mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
    spin_unlock_irq(&zone->lru_lock);

    mem_cgroup_uncharge_list(&l_hold);
    /*l_hold連結清單是剩下的頁面,表示可以釋放*/
    free_hot_cold_page_list(&l_hold, true);
}
           

isolate_lru_pages()函數實作:批量把LRU部分連結清單遷移到臨時連結清單

[shrink_active_list()->isolate_lru_pages()]

/*
 * zone->lru_lock is heavily contended.  Some of the functions that
 * shrink the lists perform better by taking out a batch of pages
 * and working on them outside the LRU lock.
 *
 * For pagecache intensive workloads, this function is the hottest
 * spot in the kernel (apart from copy_*_user functions).
 *
 * Appropriate locks must be held before calling this function.
 *
 * @nr_to_scan: The number of pages to look through on the list.
 * @lruvec: The LRU vector to pull pages from.
 * @dst:    The temp list to put pages on to.
 * @nr_scanned: The number of pages that were scanned.
 * @sc:     The scan_control struct for this reclaim session
 * @mode:   One of the LRU isolation modes
 * @lru:    LRU list id for isolating
 *
 * returns how many pages were moved onto *@dst.
 */
/*參數說明:
@nr_to_scan: 表示在這個連結清單中掃描頁面的個數
@lruvec: LRU連結清單集合
@dst: 臨時存放的連結清單
@nr_scanned: 已經掃描的頁面個數
@sc: 頁面回收的控制資料結構struct scan_control
@mode: 分離LRU的模式
@lru:哪類LRU
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
        struct lruvec *lruvec, struct list_head *dst,
        unsigned long *nr_scanned, struct scan_control *sc,
        isolate_mode_t mode, enum lru_list lru)
{
    struct list_head *src = &lruvec->lists[lru];
    unsigned long nr_taken = 0;
    unsigned long scan;

    for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
        struct page *page;
        int nr_pages;

        page = lru_to_page(src);
        prefetchw_prev_lru_page(page, src, flags);

        VM_BUG_ON_PAGE(!PageLRU(page), page);

        /*調用__isolate_lru_page()來分離頁面,傳回0,則表示分離成功,并
        把頁面遷移到dst臨時連結清單中。下面檢視__isolate_lru_page實作*/
        switch (__isolate_lru_page(page, mode)) {
        case 0:
            nr_pages = hpage_nr_pages(page);
            mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
            list_move(&page->lru, dst);
            nr_taken += nr_pages;
            break;

        case -EBUSY:
            /* else it is being freed elsewhere */
            list_move(&page->lru, src);
            continue;

        default:
            BUG();
        }
    }

    *nr_scanned = scan;
    trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
                    nr_taken, mode, is_file_lru(lru));
    return nr_taken;
}
傳回shrink_active_list()函數
           

__isolate_lru_page()函數:

[shrink_active_list()->isolate_lru_pages()->__isolate_lru_page()]

/*
 * Attempt to remove the specified page from its LRU.  Only take this page
 * if it is of the appropriate PageActive status.  Pages which are being
 * freed elsewhere are also ignored.
 *
 * page:    page to consider
 * mode:    one of the LRU isolation modes defined above
 *
 * returns 0 on success, -ve errno on failure.
 */
/*
分離頁面有如下4種類型:
(1) ISOLATE_CLEAN: 分離幹淨的頁面。
(2) ISOLATE_UNMAPPED: 分離沒有映射的頁面。
(3) ISOLATE_ASYNC_MIGRATE: 分離異步合并的頁面。
(4) ISOLATE_UNEVICTABLE: 分離不可回收的頁面。
*/
int __isolate_lru_page(struct page *page, isolate_mode_t mode)
{
    int ret = -EINVAL;

    /* Only take pages on the LRU. */
    if (!PageLRU(page))
        return ret;

    /* Compaction should not handle unevictable pages but CMA can do so */
    /*如果page是不可回收的且mode不等于ISOLATE_UNEVICTABLE,則傳回錯誤*/
    if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
        return ret;

    ret = -EBUSY;

    /*
     * To minimise LRU disruption, the caller can indicate that it only
     * wants to isolate pages it will be able to operate on without
     * blocking - clean pages for the most part.
     *
     * ISOLATE_CLEAN means that only clean pages should be isolated. This
     * is used by reclaim when it is cannot write to backing storage
     *
     * ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
     * that it is possible to migrate without blocking
     */
    /*分離ISOLATE_CLEAN和ISOLATE_ASYNC_MIGRATE情況的頁面*/
    if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
        /* All the caller can do on PageWriteback is block */
        if (PageWriteback(page))
            return ret;

        if (PageDirty(page)) {
            struct address_space *mapping;

            /* ISOLATE_CLEAN means only clean pages */
            if (mode & ISOLATE_CLEAN)
                return ret;

            /*
             * Only pages without mappings or that have a
             * ->migratepage callback are possible to migrate
             * without blocking
             */
            mapping = page_mapping(page);
            if (mapping && !mapping->a_ops->migratepage)
                return ret;
        }
    }
    /*如果mode是ISOLATE_UNMAPPED,但是page有mapped,那麼傳回-EBUSY*/
    if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
        return ret;

    /*get_page_unless_zero()是為page->_count引用計數加1,并且判斷加1之後是否等于0,
    也就是說,這個page不能是空閑頁面,否則傳回-EBUSY*/
    if (likely(get_page_unless_zero(page))) {
        /*
         * Be careful not to clear PageLRU until after we're
         * sure the page is not being freed elsewhere -- the
         * page release code relies on it.
         */
        ClearPageLRU(page);
        ret = 0;
    }

    return ret;
}