首先來看當不活躍LRU的頁面數量少于活躍LRU的頁面數量的情況,shrink_active_list()函數掃描活躍LRU連結清單,看是否有頁面可以遷移到不活躍LRU連結清單中。
[kswapd()->balance_pgdat()->kswapd_shrink_zone()->shrink_zone()->shrink_lruvec()->shrink_list()->shrink_active_list()]
/*
注意在操作LRU連結清單時,有一把保護LRU的spinlock鎖zone->lru_lock。
isolate_lru_pages()批量地把LRU連結清單的部分頁面先遷移到臨時連結清單中,進而
減少加鎖的時間。
*/
static void shrink_active_list(unsigned long nr_to_scan,
struct lruvec *lruvec,
struct scan_control *sc,
enum lru_list lru)
{
unsigned long nr_taken;
unsigned long nr_scanned;
unsigned long vm_flags;
/*定義了三個臨時連結清單:l_hold/l_active/l_inactive */
LIST_HEAD(l_hold); /* The pages which were snipped off */
LIST_HEAD(l_active);
LIST_HEAD(l_inactive);
struct page *page;
struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
unsigned long nr_rotated = 0;
isolate_mode_t isolate_mode = 0;
int file = is_file_lru(lru);
/*從lruvec結構傳回zone資料結構*/
struct zone *zone = lruvec_zone(lruvec);
lru_add_drain();
if (!sc->may_unmap)
isolate_mode |= ISOLATE_UNMAPPED;
if (!sc->may_writepage)
isolate_mode |= ISOLATE_CLEAN;
/*申請zone->lru_lock鎖來保護LRU連結清單操作*/
spin_lock_irq(&zone->lru_lock);
/*isolate_lru_pages()批量地從LRU連結清單中分離nr_to_scan個頁面到l_hold連結清單中,
這裡會根據isolate_mode來考慮一些特殊情況,基本上就是把LRU連結清單的頁面遷移到臨時
l_hold連結清單中。下面檢視isolate_lru_pages()函數實作*/
nr_taken = isolate_lru_pages(nr_to_scan, lruvec, &l_hold,
&nr_scanned, sc, isolate_mode, lru);
if (global_reclaim(sc))
__mod_zone_page_state(zone, NR_PAGES_SCANNED, nr_scanned);/*增加zone中的NR_PAGES_SCANNED計數*/
/*增加recent_scanned[]計數,在get_scan_count()計算匿名頁面和檔案緩存頁面
分别掃描數量時會用到。*/
reclaim_stat->recent_scanned[file] += nr_taken;
/*增加zone中PGREFILL、NR_LRU_BASE和NR_ISOLATED_ANON計數*/
__count_zone_vm_events(PGREFILL, zone, nr_scanned);
__mod_zone_page_state(zone, NR_LRU_BASE + lru, -nr_taken);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, nr_taken);
spin_unlock_irq(&zone->lru_lock);
/*下面開始掃描臨時l_hold連結清單的頁面,有些頁面會添加到l_active中,有些會加入到
l_inactive中。*/
while (!list_empty(&l_hold)) {
cond_resched();
page = lru_to_page(&l_hold);
list_del(&page->lru);
/*如果頁面是不可回收的,那麼就把它傳回到不可回收的LRU連結清單中*/
if (unlikely(!page_evictable(page))) {
putback_lru_page(page);
continue;
}
if (unlikely(buffer_heads_over_limit)) {
if (page_has_private(page) && trylock_page(page)) {
if (page_has_private(page))
try_to_release_page(page, 0);
unlock_page(page);
}
}
/*page_referenced()函數傳回該頁最近通路引用pte的個數,傳回0表示最近沒有被通路過。
除了可執行的page cache頁面,其他被通路引用的頁面(referenced page)為什麼都被加入
到不活躍連結清單裡,而不是繼續待在活躍LRU連結清單中呢?
把最近又通路引用的頁面全部遷移到活躍LRU連結清單會産生一個比較大的可擴充性問題(scalability
problem)。在一個記憶體很大的系統中,當系統用完了這些空閑記憶體時,每個頁面都會被通路引用到,
這種情況下我們不僅沒有時間去掃描活躍LRU連結清單,而且還重新設定通路比特位(referenced bit),
而這些資訊沒有什麼用處。是以從Linux2.6.28開始,掃描活躍連結清單時會把頁面全部都遷移到不活躍
連結清單中。這裡隻需要清理硬體的通路比特位(page_referenced()來完成),當有通路引用時,掃描不
活躍LRU連結清單就遷移回到活躍LRU連結清單中。
讓可執行的page cache頁面(mapped executable file pages)繼續儲存在活躍連結清單中,在掃描活躍
連結清單期間它們可能再次被通路到,因為LRU連結清單的掃描順序是先掃描不活躍連結清單,然後再掃描活躍連結清單
且掃描不活躍連結清單的速度要快于活躍連結清單,是以它們可以獲得比較多的時間讓使用者程序再次通路,進而
提高使用者程序的互動體驗。可執行的頁面通常是vma的屬性标記着VM_EXEC,這些頁面通常包括可執行的
檔案和它們連接配接的庫檔案等。*/
if (page_referenced(page, 0, sc->target_mem_cgroup,
&vm_flags)) {
nr_rotated += hpage_nr_pages(page);
/*
* Identify referenced, file-backed active pages and
* give them one more trip around the active list. So
* that executable code get better chances to stay in
* memory under moderate memory pressure. Anon pages
* are not likely to be evicted by use-once streaming
* IO, plus JVM can create lots of anon VM_EXEC pages,
* so we ignore them here.
*/
if ((vm_flags & VM_EXEC) && page_is_file_cache(page)) {
list_add(&page->lru, &l_active);
continue;
}
}
/*如果頁面沒有被引用,那麼加入到l_inactive連結清單*/
ClearPageActive(page); /* we are de-activating */
list_add(&page->lru, &l_inactive);
}
/*
* Move pages back to the lru list.
*/
spin_lock_irq(&zone->lru_lock);
/*
* Count referenced pages from currently used mappings as rotated,
* even though only some of them are actually re-activated. This
* helps balance scan pressure between file and anonymous pages in
* get_scan_count.
*/
/*這裡把最近被引用的頁面(referenced pages)統計到recent_rotated中,以便在下一次掃描
時在 get_scan_count()中重新計算匿名頁面和檔案映射頁面LRU連結清單的掃描比重。*/
reclaim_stat->recent_rotated[file] += nr_rotated;
/*把l_inactive和l_active連結清單的頁遷移到LRU相應的連結清單中。*/
move_active_pages_to_lru(lruvec, &l_active, &l_hold, lru);
move_active_pages_to_lru(lruvec, &l_inactive, &l_hold, lru - LRU_ACTIVE);
__mod_zone_page_state(zone, NR_ISOLATED_ANON + file, -nr_taken);
spin_unlock_irq(&zone->lru_lock);
mem_cgroup_uncharge_list(&l_hold);
/*l_hold連結清單是剩下的頁面,表示可以釋放*/
free_hot_cold_page_list(&l_hold, true);
}
isolate_lru_pages()函數實作:批量把LRU部分連結清單遷移到臨時連結清單
[shrink_active_list()->isolate_lru_pages()]
/*
* zone->lru_lock is heavily contended. Some of the functions that
* shrink the lists perform better by taking out a batch of pages
* and working on them outside the LRU lock.
*
* For pagecache intensive workloads, this function is the hottest
* spot in the kernel (apart from copy_*_user functions).
*
* Appropriate locks must be held before calling this function.
*
* @nr_to_scan: The number of pages to look through on the list.
* @lruvec: The LRU vector to pull pages from.
* @dst: The temp list to put pages on to.
* @nr_scanned: The number of pages that were scanned.
* @sc: The scan_control struct for this reclaim session
* @mode: One of the LRU isolation modes
* @lru: LRU list id for isolating
*
* returns how many pages were moved onto *@dst.
*/
/*參數說明:
@nr_to_scan: 表示在這個連結清單中掃描頁面的個數
@lruvec: LRU連結清單集合
@dst: 臨時存放的連結清單
@nr_scanned: 已經掃描的頁面個數
@sc: 頁面回收的控制資料結構struct scan_control
@mode: 分離LRU的模式
@lru:哪類LRU
*/
static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
struct lruvec *lruvec, struct list_head *dst,
unsigned long *nr_scanned, struct scan_control *sc,
isolate_mode_t mode, enum lru_list lru)
{
struct list_head *src = &lruvec->lists[lru];
unsigned long nr_taken = 0;
unsigned long scan;
for (scan = 0; scan < nr_to_scan && !list_empty(src); scan++) {
struct page *page;
int nr_pages;
page = lru_to_page(src);
prefetchw_prev_lru_page(page, src, flags);
VM_BUG_ON_PAGE(!PageLRU(page), page);
/*調用__isolate_lru_page()來分離頁面,傳回0,則表示分離成功,并
把頁面遷移到dst臨時連結清單中。下面檢視__isolate_lru_page實作*/
switch (__isolate_lru_page(page, mode)) {
case 0:
nr_pages = hpage_nr_pages(page);
mem_cgroup_update_lru_size(lruvec, lru, -nr_pages);
list_move(&page->lru, dst);
nr_taken += nr_pages;
break;
case -EBUSY:
/* else it is being freed elsewhere */
list_move(&page->lru, src);
continue;
default:
BUG();
}
}
*nr_scanned = scan;
trace_mm_vmscan_lru_isolate(sc->order, nr_to_scan, scan,
nr_taken, mode, is_file_lru(lru));
return nr_taken;
}
傳回shrink_active_list()函數
__isolate_lru_page()函數:
[shrink_active_list()->isolate_lru_pages()->__isolate_lru_page()]
/*
* Attempt to remove the specified page from its LRU. Only take this page
* if it is of the appropriate PageActive status. Pages which are being
* freed elsewhere are also ignored.
*
* page: page to consider
* mode: one of the LRU isolation modes defined above
*
* returns 0 on success, -ve errno on failure.
*/
/*
分離頁面有如下4種類型:
(1) ISOLATE_CLEAN: 分離幹淨的頁面。
(2) ISOLATE_UNMAPPED: 分離沒有映射的頁面。
(3) ISOLATE_ASYNC_MIGRATE: 分離異步合并的頁面。
(4) ISOLATE_UNEVICTABLE: 分離不可回收的頁面。
*/
int __isolate_lru_page(struct page *page, isolate_mode_t mode)
{
int ret = -EINVAL;
/* Only take pages on the LRU. */
if (!PageLRU(page))
return ret;
/* Compaction should not handle unevictable pages but CMA can do so */
/*如果page是不可回收的且mode不等于ISOLATE_UNEVICTABLE,則傳回錯誤*/
if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
return ret;
ret = -EBUSY;
/*
* To minimise LRU disruption, the caller can indicate that it only
* wants to isolate pages it will be able to operate on without
* blocking - clean pages for the most part.
*
* ISOLATE_CLEAN means that only clean pages should be isolated. This
* is used by reclaim when it is cannot write to backing storage
*
* ISOLATE_ASYNC_MIGRATE is used to indicate that it only wants to pages
* that it is possible to migrate without blocking
*/
/*分離ISOLATE_CLEAN和ISOLATE_ASYNC_MIGRATE情況的頁面*/
if (mode & (ISOLATE_CLEAN|ISOLATE_ASYNC_MIGRATE)) {
/* All the caller can do on PageWriteback is block */
if (PageWriteback(page))
return ret;
if (PageDirty(page)) {
struct address_space *mapping;
/* ISOLATE_CLEAN means only clean pages */
if (mode & ISOLATE_CLEAN)
return ret;
/*
* Only pages without mappings or that have a
* ->migratepage callback are possible to migrate
* without blocking
*/
mapping = page_mapping(page);
if (mapping && !mapping->a_ops->migratepage)
return ret;
}
}
/*如果mode是ISOLATE_UNMAPPED,但是page有mapped,那麼傳回-EBUSY*/
if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
return ret;
/*get_page_unless_zero()是為page->_count引用計數加1,并且判斷加1之後是否等于0,
也就是說,這個page不能是空閑頁面,否則傳回-EBUSY*/
if (likely(get_page_unless_zero(page))) {
/*
* Be careful not to clear PageLRU until after we're
* sure the page is not being freed elsewhere -- the
* page release code relies on it.
*/
ClearPageLRU(page);
ret = 0;
}
return ret;
}