Linux内核抢占补丁的基本原理
作者 jklCPU在内核中运行时并不是处处不可抢占的,内核中存在一些空隙,在这时进行抢占是安
全的,内核抢占补丁的基本原理就是将SMP可并行的代码段看成是可以进行内核抢占的区
域。
2.4内核正好细化了多CPU下的内核线程同步机构,对不可并行的指令块用spinlock和rw
lock作了细致的表示,该补丁的实现可谓水到渠成。
具体的方法就是在进程的任务结构上增加一个preempt_count变量作为内核抢占锁,它随
着spinlock和rwlock一起加锁和解锁。当preempt_count为0时表示可以进行内核调度。
内核调度器的入口为preempt_schedule(),它将当前进程标记为TASK_PREEMPTED状态再
调用schedule(),在TASK_PREEMPTED状态,schedule()不会将进程从运行队列中删除。
下面是内核抢占补丁的主要代码示意:
arch/i386/kernel/entry.S:
preempt_count = 4 # 将task_struct中的flags用作preempt_count,flags被移到了别
的位置
ret_from_exception: # 从异常返回
#ifdef CONFIG_SMP
GET_CURRENT(%ebx)
movl processor(%ebx),%eax
shll $CONFIG_X86_L1_CACHE_SHIFT,%eax
movl SYMBOL_NAME(irq_stat)(,%eax),%ecx # softirq_active
testl SYMBOL_NAME(irq_stat)+4(,%eax),%ecx # softirq_mask
#else
movl SYMBOL_NAME(irq_stat),%ecx # softirq_active
testl SYMBOL_NAME(irq_stat)+4,%ecx # softirq_mask
#endif
jne handle_softirq
#ifdef CONFIG_PREEMPT
cli
incl preempt_count(%ebx) # 异常的入口没有禁止内核调度的指令,与ret_from_intr
匹配一下
#endif
ENTRY(ret_from_intr) # 硬件中断的返回
GET_CURRENT(%ebx)
#ifdef CONFIG_PREEMPT
cli
decl preempt_count(%ebx) # 恢复内核抢占标志
#endif
movl EFLAGS(%esp),%eax # mix EFLAGS and CS
movb CS(%esp),%al
testl $(VM_MASK | 3),%eax # return to VM86 mode or non-supervisor?
jne ret_with_reschedule
#ifdef CONFIG_PREEMPT
cmpl $0,preempt_count(%ebx)
jnz restore_all # 如果preempt_count非零则表示禁止内核抢占
cmpl $0,need_resched(%ebx)
jz restore_all #
movl SYMBOL_NAME(irq_stat)+irq_stat_local_bh_count CPU_INDX,%ecx
addl SYMBOL_NAME(irq_stat)+irq_stat_local_irq_count CPU_INDX,%ecx
jnz restore_all
incl preempt_count(%ebx)
sti
call SYMBOL_NAME(preempt_schedule)
jmp ret_from_intr # 新进程返回,返回ret_from_intr恢复抢占标志后再返回
#else
jmp restore_all
#endif
ALIGN
handle_softirq:
#ifdef CONFIG_PREEMPT
cli
GET_CURRENT(%ebx)
incl preempt_count(%ebx)
sti
#endif
call SYMBOL_NAME(do_softirq)
jmp ret_from_intr
ALIGN
reschedule:
call SYMBOL_NAME(schedule) # test
jmp ret_from_sys_call
include/asm/hw_irq.h:
...
#ifdef CONFIG_PREEMPT
#define BUMP_CONTEX_SWITCH_LOCK \
GET_CURRENT \
"incl 4(%ebx)\n\t"
#else
#define BUMP_CONTEX_SWITCH_LOCK
#endif
#define SAVE_ALL \ 硬件中断保护入口现场
"cld\n\t" \
"pushl %es\n\t" \
"pushl %ds\n\t" \
"pushl %eax\n\t" \
"pushl %ebp\n\t" \
"pushl %edi\n\t" \
"pushl %esi\n\t" \
"pushl %edx\n\t" \
"pushl %ecx\n\t" \
"pushl %ebx\n\t" \
"movl $" STR(__KERNEL_DS) ",%edx\n\t" \
"movl %edx,%ds\n\t" \
"movl %edx,%es\n\t" \
BUMP_CONTEX_SWITCH_LOCK # 硬件中断的入口禁止内核抢占
include/linux/spinlock.h:
#ifdef CONFIG_PREEMPT
#define switch_lock_count() current->preempt_count
#define in_ctx_sw_off() (switch_lock_count().counter) 判断当前进程的抢占计数
是否非零
#define atomic_ptr_in_ctx_sw_off() (&switch_lock_count())
#define ctx_sw_off() \ 禁止内核抢占
do { \
atomic_inc(atomic_ptr_in_ctx_sw_off()); \ 当前进程的内核抢占计数增1
} while (0)
#define ctx_sw_on_no_preempt() \ 允许内核抢占
do { \
atomic_dec(atomic_ptr_in_ctx_sw_off()); \ 当前进程的内核抢占计数减1
} while (0)
#define ctx_sw_on() \ 允许并完成内核抢占
do { \
if (atomic_dec_and_test(atomic_ptr_in_ctx_sw_off()) && \
current->need_resched) \
preempt_schedule(); \
} while (0)
#define spin_lock(lock) \
do { \
ctx_sw_off(); \ 进入自旋锁时禁止抢占
_raw_spin_lock(lock); \
} while(0)
#define spin_trylock(lock) ({ctx_sw_off(); _raw_spin_trylock(lock) ? \锁定并
测试原来是否上锁
1 : ({ctx_sw_on(); 0;});})
#define spin_unlock(lock) \
do { \
_raw_spin_unlock(lock); \
ctx_sw_on(); \ 离开自旋锁时允许并完成内核抢占
} while (0)
#define read_lock(lock) ({ctx_sw_off(); _raw_read_lock(lock);})
#define read_unlock(lock) ({_raw_read_unlock(lock); ctx_sw_on();})
#define write_lock(lock) ({ctx_sw_off(); _raw_write_lock(lock);})
#define write_unlock(lock) ({_raw_write_unlock(lock); ctx_sw_on();})
#define write_trylock(lock) ({ctx_sw_off(); _raw_write_trylock(lock) ? \
1 : ({ctx_sw_on(); 0;});})
...
include/asm/softirq.h:
#define cpu_bh_disable(cpu) do { ctx_sw_off(); local_bh_count(cpu)++; barrie
r(); } while (0)
#define cpu_bh_enable(cpu) do { barrier(); local_bh_count(cpu)--;ctx_sw_on()
; } while (0)
kernel/schedule.c:
#ifdef CONFIG_PREEMPT
asmlinkage void preempt_schedule(void)
{
while (current->need_resched) {
ctx_sw_off();
current->state |= TASK_PREEMPTED;
schedule();
current->state &= ~TASK_PREEMPTED;
ctx_sw_on_no_preempt();
}
}
#endif
asmlinkage void schedule(void)
{
struct schedule_data * sched_data;
struct task_struct *prev, *next, *p;
struct list_head *tmp;
int this_cpu, c;
#ifdef CONFIG_PREEMPT
ctx_sw_off();
#endif
if (!current->active_mm) BUG();
need_resched_back:
prev = current;
this_cpu = prev->processor;
if (in_interrupt())
goto sche易做图ng_in_interrupt;
release_kernel_lock(prev, this_cpu);
/* Do "administrative" work here while we don't hold any locks */
if (softirq_active(this_cpu) & softirq_mask(this_cpu))
goto handle_softirq;
handle_softirq_back:
/*
* 'sched_data' is protected by the fact that we can run
* only one process per CPU.
*/
sched_data = & aligned_data[this_cpu].schedule_data;
spin_lock_irq(&runqueue_lock);
/* move an exhau