Linux内核分析--理解进程调度时机、跟踪分析进程调度和进程切换的过程

ID：fuchen1994

姓名：江军

作业要求：

理解Linux系统中进程调度的时机，可以在内核代码中搜索schedule()函数，看都是哪里调用了schedule()，判断我们课程内容中的总结是否准确；
使用gdb跟踪分析一个schedule()函数，验证您对Linux系统进程调度与进程切换过程的理解；推荐在实验楼Linux虚拟机环境下完成实验。
特别关注并仔细分析switch_to中的汇编代码，理解进程上下文的切换机制，以及与中断上下文切换的关系；

实验过程：

进程调度的时机

中断处理过程（包括时钟中断、I/O中断、系统调用和异常）中，直接调用schedule()，或者返回用户态时根据need_resched标记调用schedule()；
内核线程可以直接调用schedule()进行进程切换，也可以在中断处理过程中进行调度，也就是说内核线程作为一类的特殊的进程可以主动调度，也可以被动调度；
用户态进程无法实现主动调度，仅能通过陷入内核态后的某个时机点进行调度，即在中断处理过程中进行调度。

代码分析：

1.这部分是关闭掉内核抢占

#ifdef CONFIG_PREEMPT

/*

2907 * this is the entry point to schedule() from in-kernel preemption

2908 * off of preempt_enable. Kernel preemptions off return from interrupt

2909 * occur there and call schedule directly.

2910 */

2911asmlinkage __visible void __sched notrace preempt_schedule(void)

{

    /*

2914     * If there is a non-zero preempt_count or interrupts are disabled,

2915     * we do not want to preempt the current task. Just return..

2916     */

    if (likely(!preemptible()))

        return;

    do {

        __preempt_count_add(PREEMPT_ACTIVE);

        __schedule();  //这个函数进入

        __preempt_count_sub(PREEMPT_ACTIVE);

        /*

2926         * Check again in case we missed a preemption opportunity

2927         * between schedule and now.

2928         */

        barrier();

    } while (need_resched());

}

static void __sched __schedule(void)

{

    struct task_struct *prev, *next;

    unsigned long *switch_count;

    struct rq *rq;

    int cpu;

2777need_resched:

    preempt_disable();

    cpu = smp_processor_id();  //保存当前CPU的状态在rq中

    rq = cpu_rq(cpu);

    rcu_note_context_switch(cpu);

    prev = rq->curr;  //保存当前进程current

    schedule_debug(prev); //进入这个判断函数，判断

static inline void schedule_debug(struct task_struct *prev)

{

#ifdef CONFIG_SCHED_STACK_END_CHECK

    BUG_ON(unlikely(task_stack_end_corrupted(prev)));

#endif

    /*

2681     * Test if we are atomic. Since do_exit() needs to call into

2682     * schedule() atomically, we ignore that path. Otherwise whine

2683     * if we are scheduling when we should not.

2684     */

    if (unlikely(in_atomic_preempt_off() && prev->state != TASK_DEAD))

        __schedule_bug(prev);

    rcu_sleep_check();

    profile_hit(SCHED_PROFILING, __builtin_return_address());

    schedstat_inc(this_rq(), sched_count);

}

if (prev->state && !(preempt_count() & PREEMPT_ACTIVE)) {检测prev，如果处于不可运行状态并且在内核态没有被抢占，就从运行队列上删除

        if (unlikely(signal_pending_state(prev->state, prev))) {  检测prev，如果有非阻塞等待信号，就并且它的状态是TASK_INTERRUPTBLE,设置其状态为TASK_RUNNING，并且把它留在runqueue中

            prev->state = TASK_RUNNING;

        } else {

            deactivate_task(rq, prev, DEQUEUE_SLEEP);  否则就调用deactiveate_task()函数，进行移除

            prev->on_rq = ;

            /*

2806             * If a worker went to sleep, notify and ask workqueue

2807             * whether it wants to wake up a task to maintain

2808             * concurrency.

2809             */

            if (prev->flags & PF_WQ_WORKER) {

                struct task_struct *to_wakeup;

                to_wakeup = wq_worker_sleeping(prev, cpu);

                if (to_wakeup)

                    try_to_wake_up_local(to_wakeup);

            }

        }

        switch_count = &prev->nvcsw;

    }

860void deactivate_task(struct rq *rq, struct task_struct *p, int flags)

{

    if (task_contributes_to_load(p))

        rq->nr_uninterruptible++;   调用后将会发生上下文交换次数的增加

    dequeue_task(rq, p, flags);  并且进入dequeue_task()函数

}

845static void dequeue_task(struct rq *rq, struct task_struct *p, int flags)

{

    update_rq_clock(rq);

    sched_info_dequeued(rq, p);

    p->sched_class->dequeue_task(rq, p, flags);  调用自身，将p进程从当前运行队列上移除

}

2.执行下一个进程

2698pick_next_task(struct rq *rq, struct task_struct *prev)

{

    const struct sched_class *class = &fair_sched_class;

    struct task_struct *p;

    /*

2704     * Optimization: we know that if all tasks are in

2705     * the fair class we can call that function directly:

2706     */

    if (likely(prev->sched_class == class &&

           rq->nr_running == rq->cfs.h_nr_running)) {

        p = fair_sched_class.pick_next_task(rq, prev);

        if (unlikely(p == RETRY_TASK))

            goto again;

        /* assumes fair_sched_class->next == idle_sched_class */

        if (unlikely(!p))

            p = idle_sched_class.pick_next_task(rq, prev);

        return p;

    }

5const struct sched_class idle_sched_class = {

    /* .next is NULL */

    /* no enqueue/yield_task for idle tasks */

    /* dequeue is not valid, we print a debug message there: */

    .dequeue_task        = dequeue_task_idle,

    .check_preempt_curr    = check_preempt_curr_idle,

    .pick_next_task        = pick_next_task_idle,

    .put_prev_task        = put_prev_task_idle,

2.GDB跟踪分析

这个有点坑了，git不下来文件，就没法进行试验，等我后期在本机上搭建补上。其实跟我前面的代码分析差不多，区别不大

3.switch_to中的汇编代码分析，关注进程上下文切换机制，以及中断上下文切换的关系

schedule()函数选择一个新的进程来运行，并调用context_switch进行上下文的切换，这个宏调用switch_to来进行关键上下文切换

next = pick_next_task(rq, prev);//进程调度算法都封装这个函数内部

context_switch(rq, prev, next);//进程上下文切换

switch_to利用了prev和next两个参数：prev指向当前进程，next指向被调度的进程

#define switch_to(prev, next, last)                    \do {                                 \

  /*                              \

34   * Context-switching clobbers all registers, so we clobber  \

35   * them explicitly, via unused output variables.     \

36   * (EAX and EBP is not listed because EBP is saved/restored  \

37   * explicitly for wchan access and EAX is the return value of   \

38   * __switch_to())                     \

39   */                                \

  unsigned long ebx, ecx, edx, esi, edi;                \

                                  \

  asm volatile("pushfl\n\t"      /* save    flags */   \

           "pushl %%ebp\n\t"        /* save    EBP   */ \ 当前进程堆栈基址压栈

           "movl %%esp,%[prev_sp]\n\t"  /* save    ESP   */ \ 将当前进程栈顶保存prev->thread.sp

           "movl %[next_sp],%%esp\n\t"  /* restore ESP   */ \ 讲下一个进程栈顶保存到esp中

           "movl $1f,%[prev_ip]\n\t"    /* save    EIP   */ \ 保存当前进程的eip

           "pushl %[next_ip]\n\t"   /* restore EIP   */    \ 将下一个进程的eip压栈,next进程的栈顶就是他的的起点

           __switch_canary                   \

           "jmp __switch_to\n"  /* regparm call  */ \

           "1:\t"                        \

           "popl %%ebp\n\t"     /* restore EBP   */    \

           "popfl\n"         /* restore flags */  \ 开始执行下一个进程的第一条命令

                                  \

           /* output parameters */                \

           : [prev_sp] "=m" (prev->thread.sp),     \

             [prev_ip] "=m" (prev->thread.ip),        \

             "=a" (last),                 \

                                  \

             /* clobbered output registers: */     \

             "=b" (ebx), "=c" (ecx), "=d" (edx),      \

             "=S" (esi), "=D" (edi)             \

                                       \

             __switch_canary_oparam                \

                                  \

             /* input parameters: */                \

           : [next_sp]  "m" (next->thread.sp),        \

             [next_ip]  "m" (next->thread.ip),       \

                                       \

             /* regparm parameters for __switch_to(): */  \

             [prev]     "a" (prev),              \

             [next]     "d" (next)               \

                                  \

             __switch_canary_iparam                \

                                  \

           : /* reloaded segment registers */           \

          "memory");                  \

} while ()

通过系统调用，用户空间的应用程序就会进入内核空间，由内核代表该进程运行于内核空间，这就涉及到上下文的切换，用户空间和内核空间具有不同的地址映射，通用或专用的寄存器组，而用户空间的进程要传递很多变量、参数给内核，内核也要保存用户进程的一些寄存器、变量等，以便系统调用结束后回到用户空间继续执行，所谓的进程上下文，就是一个进程在执行的时候，CPU的所有寄存器中的值、进程的状态以及堆栈中的内容，当内核需要切换到另一个进程时，它需要保存当前进程的所有状态，即保存当前进程的进程上下文，以便再次执行该进程时，能够恢复切换时的状态，继续执行。

同理，硬件通过触发信号，导致内核调用中断处理程序，进入内核空间。这个过程中，硬件的一些变量和参数也要传递给内核，内核通过这些参数进行中断处理，中断上下文就可以理解为硬件传递过来的这些参数和内核需要保存的一些环境，主要是被中断的进程的环境。

Linux内核工作在进程上下文或者中断上下文。提供系统调用服务的内核代码代表发起系统调用的应用程序运行在进程上下文；另一方面，中断处理程序，异步运行在中断上下文。中断上下文和特定进程无关。

运行在进程上下文的内核代码是可以被抢占的（Linux2.6支持抢占）。但是一个中断上下文，通常都会始终占有CPU（当然中断可以嵌套，但我们一般不这样做），不可以被打断。正因为如此，运行在中断上下文的代码就要受一些限制

秒客网

Linux内核分析--理解进程调度时机、跟踪分析进程调度和进程切换的过程

相关文章