Linux启动过程的内核代码分析

时间:2023-03-09 00:20:20
Linux启动过程的内核代码分析

参考上文:

http://www.cnblogs.com/long123king/p/3543872.html

http://www.cnblogs.com/long123king/p/3545688.html

补充:linker script documentation

http://www.nacad.ufrj.br/online/sgi/860-0247-001/sgi_html/ldLinker_scripts.html

参考:http://blog.chinaunix.net/uid-20499746-id-1663135.html

http://blog.****.net/redredbird/article/details/5986035

同类文章参考:

http://blog.chinaunix.net/uid-1701789-id-148056.html

http://www.cnblogs.com/cybertitan/archive/2012/09/29/2708184.html

1. 内核代码的布局

我们知道,内核代码被加载到物理内存1MB处,然后in_pm32跳转到1MB物理内存处执行。

那么1MB物理内存处存放的是什么代码呢?

我们先看一个链接器的脚本文件arch/x86/boot/compressed/vmlinux.lds.S

   1: #include <asm-generic/vmlinux.lds.h>

   2:  

   3: OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)

   4:  

   5: #undef i386

   6:  

   7: #include <asm/cache.h>

   8: #include <asm/page_types.h>

   9:  

  10: #ifdef CONFIG_X86_64

  11: OUTPUT_ARCH(i386:x86-64)

  12: ENTRY(startup_64)

  13: #else

  14: OUTPUT_ARCH(i386)

  15: ENTRY(startup_32)

  16: #endif

  17:  

  18: SECTIONS

  19: {

  20:     /* Be careful parts of head_64.S assume startup_32 is at

  21:      * address 0.

  22:      */

  23:     . = 0;

  24:     .head.text : {

  25:         _head = . ;

  26:         HEAD_TEXT

  27:         _ehead = . ;

  28:     }

  29:     .rodata..compressed : {

  30:         *(.rodata..compressed)

  31:     }

  32:     .text :    {

  33:         _text = .;     /* Text */

  34:         *(.text)

  35:         *(.text.*)

  36:         _etext = . ;

  37:     }

  38:     .rodata : {

  39:         _rodata = . ;

  40:         *(.rodata)     /* read-only data */

  41:         *(.rodata.*)

  42:         _erodata = . ;

  43:     }

  44:     .got : {

  45:         _got = .;

  46:         KEEP(*(.got.plt))

  47:         KEEP(*(.got))

  48:         _egot = .;

  49:     }

  50:     .data :    {

  51:         _data = . ;

  52:         *(.data)

  53:         *(.data.*)

  54:         _edata = . ;

  55:     }

  56:     . = ALIGN(L1_CACHE_BYTES);

  57:     .bss : {

  58:         _bss = . ;

  59:         *(.bss)

  60:         *(.bss.*)

  61:         *(COMMON)

  62:         . = ALIGN(8);    /* For convenience during zeroing */

  63:         _ebss = .;

  64:     }

  65: #ifdef CONFIG_X86_64

  66:        . = ALIGN(PAGE_SIZE);

  67:        .pgtable : {

  68:         _pgtable = . ;

  69:         *(.pgtable)

  70:         _epgtable = . ;

  71:     }

  72: #endif

  73:     _end = .;

  74: }

可见,在vmlinux即内核映像的0地址处存放的是.head.text段。

#define __HEAD        .section    ".head.text","ax"

因此,我们找到下面的代码:arch/x86/boot/compressed/head_32.S

   1:  

   2:     __HEAD

   3: ENTRY(startup_32)

   4:     cld

   5:     /*

   6:      * Test KEEP_SEGMENTS flag to see if the bootloader is asking

   7:      * us to not reload segments

   8:      */

   9:     testb    $(1<<6), BP_loadflags(%esi)

  10:     jnz    1f

  11:  

  12:     cli

  13:     movl    $__BOOT_DS, %eax

  14:     movl    %eax, %ds

  15:     movl    %eax, %es

  16:     movl    %eax, %fs

  17:     movl    %eax, %gs

  18:     movl    %eax, %ss

  19: 1:

以及arch/x86/kernel/head_32.S

   1: /*

   2:  * 32-bit kernel entrypoint; only used by the boot CPU.  On entry,

   3:  * %esi points to the real-mode code as a 32-bit pointer.

   4:  * CS and DS must be 4 GB flat segments, but we don't depend on

   5:  * any particular GDT layout, because we load our own as soon as we

   6:  * can.

   7:  */

   8: __HEAD

   9: ENTRY(startup_32)

  10:     movl pa(stack_start),%ecx

  11:     

  12:     /* test KEEP_SEGMENTS flag to see if the bootloader is asking

  13:         us to not reload segments */

  14:     testb $(1<<6), BP_loadflags(%esi)

  15:     jnz 2f

  16:  

  17: /*

  18:  * Set segments to known values.

  19:  */

  20:     lgdt pa(boot_gdt_descr)

  21:     movl $(__BOOT_DS),%eax

  22:     movl %eax,%ds

  23:     movl %eax,%es

  24:     movl %eax,%fs

  25:     movl %eax,%gs

  26:     movl %eax,%ss

  27: 2:

  28:     leal -__PAGE_OFFSET(%ecx),%esp

那么这二者有什么先后顺序吗?


原来,思路到了这里是走进了一个误区,/compressed目录下面存放的是压缩后的代码,主要功能是解压缩内核;而/kernel目录下的才是真正的内核文件。

在/kernel目录下也有一个链接器的脚本文件arch/x86/kernel/vmlinux.lds.S

   1: /*

   2:  * ld script for the x86 kernel

   3:  *

   4:  * Historic 32-bit version written by Martin Mares <mj@atrey.karlin.mff.cuni.cz>

   5:  *

   6:  * Modernisation, unification and other changes and fixes:

   7:  *   Copyright (C) 2007-2009  Sam Ravnborg <sam@ravnborg.org>

   8:  *

   9:  *

  10:  * Don't define absolute symbols until and unless you know that symbol

  11:  * value is should remain constant even if kernel image is relocated

  12:  * at run time. Absolute symbols are not relocated. If symbol value should

  13:  * change if kernel is relocated, make the symbol section relative and

  14:  * put it inside the section definition.

  15:  */

  16:  

  17: #ifdef CONFIG_X86_32

  18: #define LOAD_OFFSET __PAGE_OFFSET

  19: #else

  20: #define LOAD_OFFSET __START_KERNEL_map

  21: #endif

  22:  

  23: #include <asm-generic/vmlinux.lds.h>

  24: #include <asm/asm-offsets.h>

  25: #include <asm/thread_info.h>

  26: #include <asm/page_types.h>

  27: #include <asm/cache.h>

  28: #include <asm/boot.h>

  29:  

  30: #undef i386     /* in case the preprocessor is a 32bit one */

  31:  

  32: OUTPUT_FORMAT(CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT, CONFIG_OUTPUT_FORMAT)

  33:  

  34: #ifdef CONFIG_X86_32

  35: OUTPUT_ARCH(i386)

  36: ENTRY(phys_startup_32)

  37: jiffies = jiffies_64;

  38: #else

  39: OUTPUT_ARCH(i386:x86-64)

  40: ENTRY(phys_startup_64)

  41: jiffies_64 = jiffies;

  42: #endif

  43:  

  44: #if defined(CONFIG_X86_64) && defined(CONFIG_DEBUG_RODATA)

  45: /*

  46:  * On 64-bit, align RODATA to 2MB so that even with CONFIG_DEBUG_RODATA

  47:  * we retain large page mappings for boundaries spanning kernel text, rodata

  48:  * and data sections.

  49:  *

  50:  * However, kernel identity mappings will have different RWX permissions

  51:  * to the pages mapping to text and to the pages padding (which are freed) the

  52:  * text section. Hence kernel identity mappings will be broken to smaller

  53:  * pages. For 64-bit, kernel text and kernel identity mappings are different,

  54:  * so we can enable protection checks that come with CONFIG_DEBUG_RODATA,

  55:  * as well as retain 2MB large page mappings for kernel text.

  56:  */

  57: #define X64_ALIGN_DEBUG_RODATA_BEGIN    . = ALIGN(HPAGE_SIZE);

  58:  

  59: #define X64_ALIGN_DEBUG_RODATA_END                \

  60:         . = ALIGN(HPAGE_SIZE);                \

  61:         __end_rodata_hpage_align = .;

  62:  

  63: #else

  64:  

  65: #define X64_ALIGN_DEBUG_RODATA_BEGIN

  66: #define X64_ALIGN_DEBUG_RODATA_END

  67:  

  68: #endif

  69:  

  70: PHDRS {

  71:     text PT_LOAD FLAGS(5);          /* R_E */

  72:     data PT_LOAD FLAGS(6);          /* RW_ */

  73: #ifdef CONFIG_X86_64

  74:     user PT_LOAD FLAGS(5);          /* R_E */

  75: #ifdef CONFIG_SMP

  76:     percpu PT_LOAD FLAGS(6);        /* RW_ */

  77: #endif

  78:     init PT_LOAD FLAGS(7);          /* RWE */

  79: #endif

  80:     note PT_NOTE FLAGS(0);          /* ___ */

  81: }

  82:  

  83: SECTIONS

  84: {

  85: #ifdef CONFIG_X86_32

  86:         . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;

  87:         phys_startup_32 = startup_32 - LOAD_OFFSET;

  88: #else

  89:         . = __START_KERNEL;

  90:         phys_startup_64 = startup_64 - LOAD_OFFSET;

  91: #endif

  92:  

  93:     /* Text and read-only data */

  94:     .text :  AT(ADDR(.text) - LOAD_OFFSET) {

  95:         _text = .;

  96:         /* bootstrapping code */

  97:         HEAD_TEXT

  98: #ifdef CONFIG_X86_32

  99:         . = ALIGN(PAGE_SIZE);

 100:         *(.text..page_aligned)

 101: #endif

 102:         . = ALIGN(8);

 103:         _stext = .;

 104:         TEXT_TEXT

 105:         SCHED_TEXT

 106:         LOCK_TEXT

 107:         KPROBES_TEXT

 108:         ENTRY_TEXT

 109:         IRQENTRY_TEXT

 110:         *(.fixup)

 111:         *(.gnu.warning)

 112:         /* End of text section */

 113:         _etext = .;

 114:     } :text = 0x9090

 115:  

 116:     NOTES :text :note

 117:  

 118:     EXCEPTION_TABLE(16) :text = 0x9090

 119:  

 120: #if defined(CONFIG_DEBUG_RODATA)

 121:     /* .text should occupy whole number of pages */

 122:     . = ALIGN(PAGE_SIZE);

 123: #endif

 124:     X64_ALIGN_DEBUG_RODATA_BEGIN

 125:     RO_DATA(PAGE_SIZE)

 126:     X64_ALIGN_DEBUG_RODATA_END

 127:  

 128:     /* Data */

 129:     .data : AT(ADDR(.data) - LOAD_OFFSET) {

 130:         /* Start of data section */

 131:         _sdata = .;

 132:  

 133:         /* init_task */

 134:         INIT_TASK_DATA(THREAD_SIZE)

 135:  

 136: #ifdef CONFIG_X86_32

 137:         /* 32 bit has nosave before _edata */

 138:         NOSAVE_DATA

 139: #endif

 140:  

 141:         PAGE_ALIGNED_DATA(PAGE_SIZE)

 142:  

 143:         CACHELINE_ALIGNED_DATA(L1_CACHE_BYTES)

 144:  

 145:         DATA_DATA

 146:         CONSTRUCTORS

 147:  

 148:         /* rarely changed data like cpu maps */

 149:         READ_MOSTLY_DATA(INTERNODE_CACHE_BYTES)

 150:  

 151:         /* End of data section */

 152:         _edata = .;

 153:     } :data

 154:  

 155: #ifdef CONFIG_X86_64

 156:  

 157: #define VSYSCALL_ADDR (-10*1024*1024)

 158:  

 159: #define VLOAD_OFFSET (VSYSCALL_ADDR - __vsyscall_0 + LOAD_OFFSET)

 160: #define VLOAD(x) (ADDR(x) - VLOAD_OFFSET)

 161:  

 162: #define VVIRT_OFFSET (VSYSCALL_ADDR - __vsyscall_0)

 163: #define VVIRT(x) (ADDR(x) - VVIRT_OFFSET)

 164: #define EMIT_VVAR(x, offset) .vsyscall_var_ ## x    \

 165:     ADDR(.vsyscall_0) + offset             \

 166:     : AT(VLOAD(.vsyscall_var_ ## x)) {             \

 167:         *(.vsyscall_var_ ## x)            \

 168:     }                        \

 169:     x = VVIRT(.vsyscall_var_ ## x);

 170:  

 171:     . = ALIGN(4096);

 172:     __vsyscall_0 = .;

 173:  

 174:     . = VSYSCALL_ADDR;

 175:     .vsyscall_0 : AT(VLOAD(.vsyscall_0)) {

 176:         *(.vsyscall_0)

 177:     } :user

 178:  

 179:     . = ALIGN(L1_CACHE_BYTES);

 180:     .vsyscall_fn : AT(VLOAD(.vsyscall_fn)) {

 181:         *(.vsyscall_fn)

 182:     }

 183:  

 184:     .vsyscall_1 ADDR(.vsyscall_0) + 1024: AT(VLOAD(.vsyscall_1)) {

 185:         *(.vsyscall_1)

 186:     }

 187:     .vsyscall_2 ADDR(.vsyscall_0) + 2048: AT(VLOAD(.vsyscall_2)) {

 188:         *(.vsyscall_2)

 189:     }

 190:  

 191:     .vsyscall_3 ADDR(.vsyscall_0) + 3072: AT(VLOAD(.vsyscall_3)) {

 192:         *(.vsyscall_3)

 193:     }

 194:  

 195: #define __VVAR_KERNEL_LDS

 196: #include <asm/vvar.h>

 197: #undef __VVAR_KERNEL_LDS

 198:  

 199:     . = __vsyscall_0 + PAGE_SIZE;

 200:  

 201: #undef VSYSCALL_ADDR

 202: #undef VLOAD_OFFSET

 203: #undef VLOAD

 204: #undef VVIRT_OFFSET

 205: #undef VVIRT

 206: #undef EMIT_VVAR

 207:  

 208: #endif /* CONFIG_X86_64 */

 209:  

 210:     /* Init code and data - will be freed after init */

 211:     . = ALIGN(PAGE_SIZE);

 212:     .init.begin : AT(ADDR(.init.begin) - LOAD_OFFSET) {

 213:         __init_begin = .; /* paired with __init_end */

 214:     }

 215:  

 216: #if defined(CONFIG_X86_64) && defined(CONFIG_SMP)

 217:     /*

 218:      * percpu offsets are zero-based on SMP.  PERCPU_VADDR() changes the

 219:      * output PHDR, so the next output section - .init.text - should

 220:      * start another segment - init.

 221:      */

 222:     PERCPU_VADDR(INTERNODE_CACHE_BYTES, 0, :percpu)

 223: #endif

 224:  

 225:     INIT_TEXT_SECTION(PAGE_SIZE)

 226: #ifdef CONFIG_X86_64

 227:     :init

 228: #endif

 229:  

 230:     INIT_DATA_SECTION(16)

 231:  

 232:     /*

 233:      * Code and data for a variety of lowlevel trampolines, to be

 234:      * copied into base memory (< 1 MiB) during initialization.

 235:      * Since it is copied early, the main copy can be discarded

 236:      * afterwards.

 237:      */

 238:      .x86_trampoline : AT(ADDR(.x86_trampoline) - LOAD_OFFSET) {

 239:         x86_trampoline_start = .;

 240:         *(.x86_trampoline)

 241:         x86_trampoline_end = .;

 242:     }

 243:  

 244:     .x86_cpu_dev.init : AT(ADDR(.x86_cpu_dev.init) - LOAD_OFFSET) {

 245:         __x86_cpu_dev_start = .;

 246:         *(.x86_cpu_dev.init)

 247:         __x86_cpu_dev_end = .;

 248:     }

 249:  

 250:     /*

 251:      * start address and size of operations which during runtime

 252:      * can be patched with virtualization friendly instructions or

 253:      * baremetal native ones. Think page table operations.

 254:      * Details in paravirt_types.h

 255:      */

 256:     . = ALIGN(8);

 257:     .parainstructions : AT(ADDR(.parainstructions) - LOAD_OFFSET) {

 258:         __parainstructions = .;

 259:         *(.parainstructions)

 260:         __parainstructions_end = .;

 261:     }

 262:  

 263:     /*

 264:      * struct alt_inst entries. From the header (alternative.h):

 265:      * "Alternative instructions for different CPU types or capabilities"

 266:      * Think locking instructions on spinlocks.

 267:      */

 268:     . = ALIGN(8);

 269:     .altinstructions : AT(ADDR(.altinstructions) - LOAD_OFFSET) {

 270:         __alt_instructions = .;

 271:         *(.altinstructions)

 272:         __alt_instructions_end = .;

 273:     }

 274:  

 275:     /*

 276:      * And here are the replacement instructions. The linker sticks

 277:      * them as binary blobs. The .altinstructions has enough data to

 278:      * get the address and the length of them to patch the kernel safely.

 279:      */

 280:     .altinstr_replacement : AT(ADDR(.altinstr_replacement) - LOAD_OFFSET) {

 281:         *(.altinstr_replacement)

 282:     }

 283:  

 284:     /*

 285:      * struct iommu_table_entry entries are injected in this section.

 286:      * It is an array of IOMMUs which during run time gets sorted depending

 287:      * on its dependency order. After rootfs_initcall is complete

 288:      * this section can be safely removed.

 289:      */

 290:     .iommu_table : AT(ADDR(.iommu_table) - LOAD_OFFSET) {

 291:         __iommu_table = .;

 292:         *(.iommu_table)

 293:         __iommu_table_end = .;

 294:     }

 295:  

 296:     . = ALIGN(8);

 297:     .apicdrivers : AT(ADDR(.apicdrivers) - LOAD_OFFSET) {

 298:         __apicdrivers = .;

 299:         *(.apicdrivers);

 300:         __apicdrivers_end = .;

 301:     }

 302:  

 303:     . = ALIGN(8);

 304:     /*

 305:      * .exit.text is discard at runtime, not link time, to deal with

 306:      *  references from .altinstructions and .eh_frame

 307:      */

 308:     .exit.text : AT(ADDR(.exit.text) - LOAD_OFFSET) {

 309:         EXIT_TEXT

 310:     }

 311:  

 312:     .exit.data : AT(ADDR(.exit.data) - LOAD_OFFSET) {

 313:         EXIT_DATA

 314:     }

 315:  

 316: #if !defined(CONFIG_X86_64) || !defined(CONFIG_SMP)

 317:     PERCPU_SECTION(INTERNODE_CACHE_BYTES)

 318: #endif

 319:  

 320:     . = ALIGN(PAGE_SIZE);

 321:  

 322:     /* freed after init ends here */

 323:     .init.end : AT(ADDR(.init.end) - LOAD_OFFSET) {

 324:         __init_end = .;

 325:     }

 326:  

 327:     /*

 328:      * smp_locks might be freed after init

 329:      * start/end must be page aligned

 330:      */

 331:     . = ALIGN(PAGE_SIZE);

 332:     .smp_locks : AT(ADDR(.smp_locks) - LOAD_OFFSET) {

 333:         __smp_locks = .;

 334:         *(.smp_locks)

 335:         . = ALIGN(PAGE_SIZE);

 336:         __smp_locks_end = .;

 337:     }

 338:  

 339: #ifdef CONFIG_X86_64

 340:     .data_nosave : AT(ADDR(.data_nosave) - LOAD_OFFSET) {

 341:         NOSAVE_DATA

 342:     }

 343: #endif

 344:  

 345:     /* BSS */

 346:     . = ALIGN(PAGE_SIZE);

 347:     .bss : AT(ADDR(.bss) - LOAD_OFFSET) {

 348:         __bss_start = .;

 349:         *(.bss..page_aligned)

 350:         *(.bss)

 351:         . = ALIGN(PAGE_SIZE);

 352:         __bss_stop = .;

 353:     }

 354:  

 355:     . = ALIGN(PAGE_SIZE);

 356:     .brk : AT(ADDR(.brk) - LOAD_OFFSET) {

 357:         __brk_base = .;

 358:         . += 64 * 1024;        /* 64k alignment slop space */

 359:         *(.brk_reservation)    /* areas brk users have reserved */

 360:         __brk_limit = .;

 361:     }

 362:  

 363:     _end = .;

 364:  

 365:         STABS_DEBUG

 366:         DWARF_DEBUG

 367:  

 368:     /* Sections to be discarded */

 369:     DISCARDS

 370:     /DISCARD/ : { *(.eh_frame) }

 371: }

 372:  

 373:  

 374: #ifdef CONFIG_X86_32

 375: /*

 376:  * The ASSERT() sink to . is intentional, for binutils 2.14 compatibility:

 377:  */

 378: . = ASSERT((_end - LOAD_OFFSET <= KERNEL_IMAGE_SIZE),

 379:        "kernel image bigger than KERNEL_IMAGE_SIZE");

 380: #else

 381: /*

 382:  * Per-cpu symbols which need to be offset from __per_cpu_load

 383:  * for the boot processor.

 384:  */

 385: #define INIT_PER_CPU(x) init_per_cpu__##x = x + __per_cpu_load

 386: INIT_PER_CPU(gdt_page);

 387: INIT_PER_CPU(irq_stack_union);

 388:  

 389: /*

 390:  * Build-time check on the image size:

 391:  */

 392: . = ASSERT((_end - _text <= KERNEL_IMAGE_SIZE),

 393:        "kernel image bigger than KERNEL_IMAGE_SIZE");

 394:  

 395: #ifdef CONFIG_SMP

 396: . = ASSERT((irq_stack_union == 0),

 397:            "irq_stack_union is not at start of per-cpu area");

 398: #endif

 399:  

 400: #endif /* CONFIG_X86_32 */

 401:  

 402: #ifdef CONFIG_KEXEC

 403: #include <asm/kexec.h>

 404:  

 405: . = ASSERT(kexec_control_code_size <= KEXEC_CONTROL_CODE_MAX_SIZE,

 406:            "kexec control code size is too big");

 407: #endif

 408:  

SECTIONS
{
#ifdef CONFIG_X86_32
       . = LOAD_OFFSET + LOAD_PHYSICAL_ADDR;
        phys_startup_32 = startup_32 - LOAD_OFFSET;
#else
        . = __START_KERNEL;
        phys_startup_64 = startup_64 - LOAD_OFFSET;
#endif

/* Text and read-only data */
    .text :  AT(ADDR(.text) - LOAD_OFFSET) {
        _text = .;
        /* bootstrapping code */
        HEAD_TEXT
#ifdef CONFIG_X86_32
        . = ALIGN(PAGE_SIZE);
        *(.text..page_aligned)
#endif
        . = ALIGN(8);
        _stext = .;
        TEXT_TEXT
        SCHED_TEXT
        LOCK_TEXT
        KPROBES_TEXT
        ENTRY_TEXT
        IRQENTRY_TEXT
        *(.fixup)
        *(.gnu.warning)
        /* End of text section */
        _etext = .;
    } :text = 0x9090

其中

   1:  

   2: #ifdef CONFIG_X86_32

   3: #define LOAD_OFFSET __PAGE_OFFSET

   4: #else

   5: #define LOAD_OFFSET __START_KERNEL_map

   6: #endif

   1: /* Physical address where kernel should be loaded. */

   2: #define LOAD_PHYSICAL_ADDR ((CONFIG_PHYSICAL_START \

   3:                 + (CONFIG_PHYSICAL_ALIGN - 1)) \

   4:                 & ~(CONFIG_PHYSICAL_ALIGN - 1))

config PHYSICAL_START
    hex "Physical address where the kernel is loaded" if (EXPERT || CRASH_DUMP)
    default "0x1000000"
    ---help---
      This gives the physical address where the kernel is loaded.

      If kernel is a not relocatable (CONFIG_RELOCATABLE=n) then
      bzImage will decompress itself to above physical address and
      run from there. Otherwise, bzImage will run from the address where
      it has been loaded by the boot loader and will ignore above physical
      address.

[arch/x86/Kconfig]

因此红色语句将.[当前标号]定位到3GB+1MB(0xC0100000)虚拟地址处

1MB物理内存处

#define HEAD_TEXT  *(.head.text)

因此,可以确认,1MB物理内存处是arch/x86/kernel/head_32.S中的startup_32函数

2. startup_32函数

该函数也是通过汇编定义

#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD)

/* Number of possible pages in the lowmem region */
LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT)
   
/* Enough space to fit pagetables for the low memory linear map */
MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT

/*
* Worst-case size of the kernel mapping we need to make:
* a relocatable kernel can live anywhere in lowmem, so we need to be able
* to map all of lowmem.
*/
KERNEL_PAGES = LOWMEM_PAGES

INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE

LOWMEM_PAGES为1GB大小,即内核态的内存空间范围。

long(((long long)1 << 32 ) / 4) >> 12 = 0x00040000

即需要0x40000个页表项来表示内核可能用到的地址空间。

PAGE_TABLE_SIZE(0x40000) = 0x40000 / 1024 = 0x100

需要页目录能包含0x100(256)个项目,每个项目用于指定对应的页表的物理地址,每个页目录项为32位。因此需要

0x100 * 4 = 1024bytes = 1KB

来保存内核需要的页目录项目。

接下来,分配存放内核页表的内存空间:

RESERVE_BRK(pagetables, INIT_MAP_SIZE)

   1: /*

   2:  * Reserve space in the brk section.  The name must be unique within

   3:  * the file, and somewhat descriptive.  The size is in bytes.  Must be

   4:  * used at file scope.

   5:  *

   6:  * (This uses a temp function to wrap the asm so we can pass it the

   7:  * size parameter; otherwise we wouldn't be able to.  We can't use a

   8:  * "section" attribute on a normal variable because it always ends up

   9:  * being @progbits, which ends up allocating space in the vmlinux

  10:  * executable.)

  11:  */

  12: #define RESERVE_BRK(name,sz)                        \

  13:     static void __section(.discard.text) __used notrace        \

  14:     __brk_reservation_fn_##name##__(void) {                \

  15:         asm volatile (                        \

  16:             ".pushsection .brk_reservation,\"aw\",@nobits;" \

  17:             ".brk." #name ":"                \

  18:             " 1:.skip %c0;"                    \

  19:             " .size .brk." #name ", . - 1b;"        \

  20:             " .popsection"                    \

  21:             : : "i" (sz));                    \

  22:     }

相当于分配一个.brk.pagetables的section,大小为1KB。

下面这段代码,检查bootloader有没有明确地指示不要重新设置各个段选择子的内容,如果可以,就将各个数据段选择子都重置为BOOT_DS段选择子。

   1: /* test KEEP_SEGMENTS flag to see if the bootloader is asking

   2:     us to not reload segments */

   3: testb $(1<<6), BP_loadflags(%esi)

   4: jnz 2f

   5:  

   6: /*

   7: et segments to known values.

   8:  

   9: lgdt pa(boot_gdt_descr)

  10: movl $(__BOOT_DS),%eax

  11: movl %eax,%ds

  12: movl %eax,%es

  13: movl %eax,%fs

  14: movl %eax,%gs

  15: movl %eax,%ss

在上面.data section中有设置标号stack_start

   1: .data

   2: .balign 4

   3: ENTRY(stack_start)

   4:     .long init_thread_union+THREAD_SIZE

   5:  

   6: early_recursion_flag:

   7:     .long 0

   8:  

   9: ready:    .byte 0

  10:  

  11: int_msg:

  12:     .asciz "Unknown interrupt or fault at: %p %p %p\n"

  13:  

  14: fault_msg:

  15: /* fault info: */

  16:     .ascii "BUG: Int %d: CR2 %p\n"

  17: /* pusha regs: */

  18:     .ascii "     EDI %p  ESI %p  EBP %p  ESP %p\n"

  19:     .ascii "     EBX %p  EDX %p  ECX %p  EAX %p\n"

  20: /* fault frame: */

  21:     .ascii "     err %p  EIP %p   CS %p  flg %p\n"

  22:     .ascii "Stack: %p %p %p %p %p %p %p %p\n"

  23:     .ascii "       %p %p %p %p %p %p %p %p\n"

  24:     .asciz "       %p %p %p %p %p %p %p %p\n"

  25:  

  26: #include "../../x86/xen/xen-head.S"