kernel 启动过程之四，start_kernel中的rest_init函数到init进程

tart_kernel ，是用来启动内核的主函数，我想大家都知道这个函数啦，而在该函数的最后将调用一个函数叫 rest_init() ，它执行完，内核就起来了，

asmlinkage void __init start_kernel(void)

{

......

rest_init();

}

现在我们来看一下 rest_init() 函数，它也在文件 init/main.c 中，它的前面几行是：

static void noinline __init_refok rest_init(void) __releases(kernel_lock)

{

int pid;

kernel_thread(kernel_init, NULL, CLONE_FS | CLONE_SIGHAND);

其中函数 kernel_thread 定义在文件 arch/ia64/kernel/process.c 中，用来启动一个内核线程，这里的 kernel_init 是要执行的函数的指针， NULL 表示传递给该函数的参数为空， CLONE_FS | CLONE_SIGHAND 为 do_fork 产生线程时的标志，表示进程间的 fs 信息共享，信号处理和块信号共享，然后我就屁颠屁颠地追随到 kernel_init 函数了，现在来瞧瞧它都做了什么好事，它的完整代码如下：

static int __init kernel_init(void * unused)

{

lock_kernel();

set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR);

init_pid_ns.child_reaper = current;

cad_pid = task_pid(current);

smp_prepare_cpus(setup_max_cpus);

do_pre_smp_initcalls();

smp_init();

sched_init_smp();

cpuset_init_smp();

do_basic_setup();

if (!ramdisk_execute_command)

ramdisk_execute_command = "/init";

if (sys_access((const char __user *) ramdisk_execute_command, 0) != 0) {

ramdisk_execute_command = NULL;

prepare_namespace();

}

init_post();

return 0;

}

在 kernel_init 函数的一开始就调用了 lock_kernel() 函数，当编译时选上了 CONFIG_LOCK_KERNEL ，就加上大内核锁，否则啥也不做，紧接着就调用了函数 set_cpus_allowed_ptr ，由于这些函数对 init 进程的调起还是有影响的，我们还是一个一个来瞧瞧吧，不要忘了啥东东最好，

static inline int set_cpus_allowed_ptr(struct task_struct *p,

const cpumask_t *new_mask)

{

if (!cpu_isset(0, *new_mask))

return -EINVAL;

return 0;

}

这函数其实就调用了 cpu_isset 宏，定义在文件 "include/linux/cpumask.h 中，如下：

#define cpu_isset(cpu, cpumask) test_bit((cpu), (cpumask).bits)

再来看看 set_cpus_allowed_ptr 的第二个参数类型吧，也定义在文件 include/linux/cpumask.h 中，具体如下：

typedef struct { DECLARE_BITMAP(bits, NR_CPUS); } cpumask_t;

接着尾随着 DECLAR_BITMAP 宏到文件 include/linux/types.h 中，定义如下：

#define DECLARE_BITMAP(name,bits) /

unsigned long name[BITS_TO_LONGS(bits)]

而宏 BITS_TO_LONGS 定义在文件 include/linux/bitops.h 中，实现如下：

#define BITS_TO_LONGS(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(long))

DIV_ROUND_UP 宏定义在文件 include/linux/kernel.h 中， BITS_PER_BYTE 宏定义在文件 include/linux/bitops.h 中，实现如下：

#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d))

#define BITS_PER_BYTE 8

即当 NR_CPUS 为 1 ～ 32 时， cpumask_t 类型为

struct {

}

然后来看看在 set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR); 中的 CPU_MASK_ALL_PTR 宏，定义在 include/linux/cpumask.h 中：

#define CPU_MASK_ALL_PTR (&CPU_MASK_ALL)

而 CPU_MASK_ALL 宏也定义在文件 include/linux/cpumask.h 中：

#define CPU_MASK_ALL /

(cpumask_t) { { /

[BITS_TO_LONGS(NR_CPUS)-1] = CPU_MASK_LAST_WORD /

} }

NR_CPUS 宏定义在文件 include/linux/threads.h 中，实现如下：

#ifdef CONFIG_SMP

#define NR_CPUS CONFIG_NR_CPUS

#else

#define NR_CPUS 1

#endif

CPU_MASK_LAST_WORD 宏定义在文件 include/linux/cpumask.h 中，实现如下：

#define CPU_MASK_LAST_WORD BITMAP_LAST_WORD_MASK(NR_CPUS)

BITMAP_LAST_WORD_MASK(NR_CPUS) 宏定义在文件 include/linux/bitmap.h 中，实现如下：

#define BITMAP_LAST_WORD_MASK(nbits) /

( /

((nbits) % BITS_PER_LONG) ? /

(1UL<<((nbits) % BITS_PER_LONG))-1 : ~0UL /

)

当 NR_CPUS 为 1 时， CPU_MASK_LAST_WORD 为 1

当 NR_CPUS 为 2 时， CPU_MASK_LAST_WORD 为 2

当 NR_CPUS 为 n 时， CPU_MASK_LAST_WORD 为 2 的 n-1 次方

有点晕了，我们现在把参数带入，即 set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR)

－－ >cpu_isset(0,CPU_MASK_ALL_PTR) －－ >test_bit(0,CPU_MASK_ALL_PTR.bits)

即当 NR_CPUS 为 n 时，就把 usigned long bits[0] 的第 n 位置 1 ，应该就如注释所说的， init 能运行在任何 CPU 上吧。

现在 kernel_init 中的 set_cpus_allowed_ptr(current, CPU_MASK_ALL_PTR); 分析完了，我们接着往下看，首先 init_pid_ns.child_reaper = current; init_pid_ns 定义在 kernel/pid.c 文件中

struct pid_namespace init_pid_ns = {

.kref = {

.refcount = ATOMIC_INIT(2),

.pidmap = {

[ 0 ... PIDMAP_ENTRIES-1] = { ATOMIC_INIT(BITS_PER_PAGE), NULL }

.last_pid = 0,

.level = 0,

.child_reaper = &init_task,

};

它是一个 pid_namespace 结构的变量，先来看看 pid_namespace 的结构，它定义在文件

include/linux/pid_namespace.h 中，具体定义如下：

struct pid_namespace {

struct kref kref;

struct pidmap pidmap[PIDMAP_ENTRIES];

int last_pid;

struct task_struct *child_reaper;

struct kmem_cache *pid_cachep;

unsigned int level;

struct pid_namespace *parent;

#ifdef CONFIG_PROC_FS

struct vfsmount *proc_mnt;

#endif

};

即把当前进程设为接受其它孤儿进程的进程，然后取得该进程的进程 ID ，如：

cad_pid = task_pid(current);

然后调用 smp_prepare_cpus(setup_max_cpus); 如果编译时没有指定 CONFIG_SMP ，它什么也不做，接着往下看，调用 do_pre_smp_initcalls() 函数，它定义在 init/main.c 文件中，实现如下：

static void __init do_pre_smp_initcalls(void)

{

extern int spawn_ksoftirqd(void);

migration_init();

spawn_ksoftirqd();

if (!nosoftlockup)

spawn_softlockup_task();

}

其中 migration_init() 定义在文件 include/linux/sched.h 中，具体实现如下 :

#ifdef CONFIG_SMP

void migration_init(void);

#else

static inline void migration_init(void)

{

}

#endif

好像什么也没有做，然后是调用 spawn_ksoftirqd() 函数，定义在文件 kernel/softirq.c 中，代码如下：

__init int spawn_ksoftirqd(void)

{

void *cpu = (void *)(long)smp_processor_id();

int err = cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu);

BUG_ON(err == NOTIFY_BAD);

cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);

register_cpu_notifier(&cpu_nfb);

return 0;

}

在该函数中，首先调用 smp_processor_id 函数获得当前 CPU 的 ID 并把它赋值给变量 cpu ，然后把 cpu 连同 &cpu_nfb ， CPU_UP_PREPARE 传递给函数 cpu_callback ，我们先看 cpu_callback 的前几行：

static int __cpuinit cpu_callback(struct notifier_block *nfb,

unsigned long action,

void *hcpu)

{

int hotcpu = (unsigned long)hcpu;

struct task_struct *p;

switch (action) {

case CPU_UP_PREPARE:

case CPU_UP_PREPARE_FROZEN:

p = kthread_create(ksoftirqd, hcpu, "ksoftirqd/%d", hotcpu);

if (IS_ERR(p)) {

printk("ksoftirqd for %i failed/n", hotcpu);

return NOTIFY_BAD;

}

kthread_bind(p, hotcpu);

per_cpu(ksoftirqd, hotcpu) = p;

break;

从上述代码可以看出当 action 为 CPU_PREPARE 时，将创建一个内核线程并把它赋值给 p ，该进程所要运行的函数为 ksoftirqd ，传递给该函数的参数为 hcpu ，而紧跟其后的” ksoftirqd/%d”,hotcpu 为该进程的名字参数，这就是我们在终端用命令 ps -ef | grep ksoftirqd 所看到的线程；如果进程创建失败，打印出错信息，否则把创建的线程 p 绑定到当前 CPU 的 ID 上，这就是 kthread_bind(p,hotcpu) 所做的，接下来的几行为：

case CPU_ONLINE:

case CPU_ONLINE_FROZEN:

wake_up_process(per_cpu(ksoftirqd, hotcpu));

break;

即在 spawn_ksoftirqd 函数中 cpu_callback(&cpu_nfb, CPU_ONLINE, cpu); 的 action 为 CPU_ONLINE 时，将调用 wake_up_process 函数来唤醒当前 CPU 上的 ksoftirqd 进程。最后调用 register_cpu_notifier(&cpu_nfb) ；其实也没做什么，只是简单的返回 0 。返回到 do_pre_smp_initcalls 函数中，接着往下看：

if (!nosoftlockup)

spawn_softlockup_task();

spawn_softlockup_task() 函数定义在文件 include/linux/sched.h 中，是个空函数。

到现在为止， do_pre_smp_initcalls 分析完了，它主要就是创建进程 ksoftirqd ，把它绑定到当前 CPU 上，然后再把该进程拷贝给每个 CPU ，并唤醒所有 CPU 上的进程 ksoftirqd ，就是当我们执行 ps -ef | grep ksoftirqd 的时候所看到的：

root 4 2 0 08:30 ? 00:00:03 [ksoftirqd/0]

root 7 2 0 08:30 ? 00:00:02 [ksoftirqd/1]

革命尚未成功，同志仍需努力！接着享受吧，呵呵！

现在到了 kernel_init 函数中的 smp_init(); 了

如果在编译时没有选择 CONFIG_SMP ，若定义 CONFIG_X86_LOCAL_APIC 则去调用 APIC_init_uniprocessor() 函数，否则什么也不做，具体代码定义在文件 init/main.c 中：

#ifndef CONFIG_SMP

#ifdef CONFIG_X86_LOCAL_APIC

static void __init smp_init(void)

{

APIC_init_uniprocessor();

}

#else

#define smp_init() do { } while (0)

#endif

如果在编译时选择了 CONFIG_SMP 呢，那么它的实现就如下喽：

static void __init smp_init(void)

{

unsigned int cpu;

for_each_present_cpu(cpu) {

if (num_online_cpus() >= setup_max_cpus)

break;

if (!cpu_online(cpu))

cpu_up(cpu);

}

printk(KERN_INFO "Brought up %ld CPUs/n", (long)num_online_cpus());

smp_cpus_done(setup_max_cpus);

}

来看看这个函数的， for_each_present_cpu(cpu) 宏在文件 include/linux/cpumask.h 中实现：

#define for_each_present_cpu(cpu) for_each_cpu_mask((cpu), cpu_present_map)

而 for_each_cpu_mask(cpu,mask) 宏也在文件 include/linux/cpumask.h 中实现：

#if NR_CPUS > 1

#define for_each_cpu_mask(cpu, mask) /

for ((cpu) = first_cpu(mask); /

(cpu) < NR_CPUS; /

(cpu) = next_cpu((cpu), (mask)))

#else

#define for_each_cpu_mask(cpu, mask) /

for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)

#endif

即对于每个 cpu 都要执行大括号里的语句，如果当前 cpu 没激活就把它激活的，该函数然后打印一些 cpu 信息，如当前激活的 cpu 数目。

kernel 启动过程之四，start_kernel中的rest_init函数到init进程

继续阅读

DOS批处理脚本语言简介

DOS 批处理文件

DOS批处理中文本替换

批处理编程- -介绍DOS/BAT

二叉树先序、中序、后序三种遍历的非递归算法

浅谈---测试Native Windows Command与Native PowerShell Command哪个效率高

How run Powershell ISE on WIndows Server2008

在当前位置打开命令行窗口的技巧

C/C++头文件、函数使用说明

在VC6.0开发环境中添加批量注释和取消注释

设置某一行背景颜色的CListCtrl

BMP文件结构及图像每行字节计算方法

linux网络编程----发送与接收文件

处理PCX文件

SIP Presence (二)RFC 3265 - Session Initiation Protocol (SIP)-Specific Event Notification

Linux设备模型（中）之上层容器