您现在的位置： Linux教程網 >> UnixLinux > >> Linux編程 >> Linux編程

ARM Linux的進程調度

1、大家應該都知道到Linux內核的用於進程調度的主要函數就是schedule函數，當然要進行進程調度，有許多條件需要滿足，現在假設所有的條件都已經滿足，要進行調度了。

* schedule() is the main scheduler function.
*/
asmlinkage void __sched schedule(void)
{
struct task_struct *prev, *next;
unsigned long *switch_count;
struct rq *rq;
int cpu;

need_resched:
........

#ifdef CONFIG_SMP
.........
#endif

if (unlikely(!rq->nr_running))
idle_balance(cpu, rq);

prev->sched_class->put_prev_task(rq, prev);
next = pick_next_task(rq, prev);應該是選擇出下一個需要運行的進程，當然很復雜，與進程的三種分類和優先級都有關系，就不細講了。

if (likely(prev != next)) {
sched_info_switch(prev, next);

rq->nr_switches++;
rq->curr = next;
++*switch_count;

context_switch(rq, prev, next); /* unlocks the rq */

我們重點講這個函數，先列出它的源碼：如下所示：

/*
* context_switch - switch to the new MM and the new
* thread's register state.看注釋，即可知道這個函數的主要作用，就是切換MM（內存管理方面的）和thread（CPU此時的狀態）
*/
static inline void
context_switch(struct rq *rq, struct task_struct *prev,
struct task_struct *next)
{
struct mm_struct *mm, *oldmm;

prepare_task_switch(rq, prev, next);對ARM來說是空函數
trace_sched_switch(rq, prev, next);
mm = next->mm;
oldmm = prev->active_mm;
/*
* For paravirt, this is coupled with an exit in switch_to to
* combine the page table reload and the switch backend into
* one hypercall.
*/
arch_enter_lazy_cpu_mode();

if (unlikely(!mm)) {
next->active_mm = oldmm;
atomic_inc(&oldmm->mm_count);
enter_lazy_tlb(oldmm, next);
} else
switch_mm(oldmm, mm, next);

數據結構mm_struct描述著一個地址空間，每個進程控制塊中有兩個mm_struct結構指針。一個是mm,指向描述著本進程運行空間的mm_struct結構，如果mm為NULL，就表示本進程是個線程。而active_mm則指向進程或線程實際使用的空間，如果是線程就指向其所“掛靠”進程的空間。“內核線程”則使用系統空間，其指針active_mm指向描述這內核空間的mm_struct結構。下面是struct task_struct結構中的一小段

struct list_head tasks;

struct mm_struct *mm, *active_mm;

/* task state */
struct linux_binfmt *binfmt;
int exit_state;
int exit_code, exit_signal;
int pdeath_signal; /* The signal sent when the parent dies */
/* ??? */
unsigned int personality;
unsigned did_exec:1;
pid_t pid;
pid_t tgid;

現在列出switch_mm函數的源碼，如下所示：

/*
* This is the actual mm switch as far as the scheduler
* is concerned. No registers are touched. We avoid
* calling the CPU specific function when the mm hasn't
* actually changed.
*/
static inline void
switch_mm(struct mm_struct *prev, struct mm_struct *next,
struct task_struct *tsk)
{
#ifdef CONFIG_MMU
unsigned int cpu = smp_processor_id();

#ifdef CONFIG_SMP
/* check for possible thread migration */
if (!cpus_empty(next->cpu_vm_mask) && !cpu_isset(cpu, next->cpu_vm_mask))
__flush_icache_all();
#endif
if (!cpu_test_and_set(cpu, next->cpu_vm_mask) || prev != next) {
check_context(next);
cpu_switch_mm(next->pgd, next);

切換空間，實際上就是換一套頁面映射目錄和映射表。看下這個宏的展開：

#define cpu_switch_mm(pgd,mm) cpu_do_switch_mm(virt_to_phys(pgd),mm)

#define cpu_do_switch_mm(pgd,mm)processor.switch_mm(pgd,mm)

出現了一個新的結構體

/*
* Don't change this structure - ASM code
* relies on it.
*/
extern struct processor {
/* MISC
* get data abort address/flags
*/
void (*_data_abort)(unsigned long pc);
/*
* Retrieve prefetch fault address
*/
unsigned long (*_prefetch_abort)(unsigned long lr);
/*
* Set up any processor specifics
*/
void (*_proc_init)(void);
/*
* Disable any processor specifics
*/
void (*_proc_fin)(void);
/*
* Special stuff for a reset
*/
void (*reset)(unsigned long addr) __attribute__((noreturn));
/*
* Idle the processor
*/
int (*_do_idle)(void);
/*
* Processor architecture specific
*/
/*
* clean a virtual address range from the
* D-cache without flushing the cache.
*/
void (*dcache_clean_area)(void *addr, int size);

/*
* Set the page table
*/
void (*switch_mm)(unsigned long pgd_phys, struct mm_struct *mm);
/*
* Set a possibly extended PTE. Non-extended PTEs should
* ignore 'ext'.
*/
void (*set_pte_ext)(pte_t *ptep, pte_t pte, unsigned int ext);
} processor;

一看就知道這個結構應該和具體CPU有關，而卻成員都是函數指針，那麼這些函數指針在你賦值的呢？

舉個例子，如下所示：在文件linux/arch/arm/mm/proc-sa110.S有如下定義：

.typesa110_processor_functions, #object
ENTRY(sa110_processor_functions)
.word v4_early_abort
.word pabort_noifar
.word cpu_sa110_proc_init
.word cpu_sa110_proc_fin
.word cpu_sa110_reset
.word cpu_sa110_do_idle
.word cpu_sa110_dcache_clean_area
.wordcpu_sa110_switch_mm
.word cpu_sa110_set_pte_ext

對應的有

ENTRY(cpu_sa110_switch_mm)
#ifdef CONFIG_MMU
str lr, [sp, #-4]!
bl v4wb_flush_kern_cache_all@ clears IP
mcrp15, 0, r0, c2, c0, 0@ load page table pointer

將指向新進程的首層映射表的指針寫入MMU中的寄存器c2，即地址轉化表基地址寄存器，
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs

丟掉高速緩存中原有的地址映射表
ldr pc, [sp], #4
#else
mov pc, lr
#endif

這樣，CPU的用戶空間映射就改變了，但是對當前程序的運行沒影響，因為現在CPU運行在系統空間中。

注：進程的調度、切換只能在系統空間中進行。

if (cache_is_vivt())
cpu_clear(cpu, prev->cpu_vm_mask);
}
#endif
}switch_mm函數源碼到此結束。

if (unlikely(!prev->mm)) {
prev->active_mm = NULL;
rq->prev_mm = oldmm;
}
/*
* Since the runqueue lock will be released by the next
* task (which is an invalid locking op but in the case
* of the scheduler it's an obvious special-case), so we
* do an early lockdep release here:
*/
#ifndef __ARCH_WANT_UNLOCKED_CTXSW
spin_release(&rq->lock.dep_map, 1, _THIS_IP_);
#endif

/* Here we just switch the register state and the stack. */
switch_to(prev, next, prev);

barrier();
/*
* this_rq must be evaluated again because prev may have moved
* CPUs since it called schedule(), thus the 'rq' on its stack
* frame will be invalid.
*/
finish_task_switch(this_rq(), prev);

這一部分，下篇再說。
}context_switch到此結束。

/*
* the context switch might have flipped the stack from under
* us, hence refresh the local variables.
*/
cpu = smp_processor_id();
rq = cpu_rq(cpu);
} else
spin_unlock_irq(&rq->lock);

..........

preempt_enable_no_resched();
if (unlikely(test_thread_flag(TIF_NEED_RESCHED)))
goto need_resched;

}schedule函數到此結束

這篇寫到這裡，也就結束了，我們主要講述的就是已經找到下一個要運行的進程，現在進行切換，這一篇說的主要是內存管理的切換，即mm_struct結構體。其實，它的切換就是頁面目錄的切換，當然，不同的處理其會不同。

上一篇文章： Python調用MySQLdb插入中文亂碼的問題
下一篇文章： C++提供了四種新的類型強制

Linux編程

Linux進程ID號--Linux進程的管理與調度（三）