u-boot或者uefi启动Linux的时候,会把device tree 所在的地址通过x0寄存器传给Linux。 kernel在boot阶段会使用到x0寄存器,所以在primary_entry -> preserve_boot_args 会把device tree的地址保存到x21寄存器:
SYM_CODE_START_LOCAL(preserve_boot_args)
mov x21, x0 // x21=FDT
1:1 mapping阶段
在init_idmap_pg_dir阶段,会把这个x21地址所对应的物理地址mapping到kernel结尾之后2M对的的地方,这个地方其实不是1:1的映射。 参考:primary_entry -> create_idmap:
/* Remap the FDT after the kernel image */
adrp x1, _text
adrp x22, _end + SWAPPER_BLOCK_SIZE
bic x2, x22, #SWAPPER_BLOCK_SIZE - 1
bfi x22, x21, #0, #SWAPPER_BLOCK_SHIFT // remapped FDT address
add x3, x2, #MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE
bic x4, x21, #SWAPPER_BLOCK_SIZE - 1
mov_q x5, SWAPPER_RW_MMUFLAGS
mov x6, #SWAPPER_BLOCK_SHIFT
bl remap_region
可以发现,x22寄存器对应的就是fdt的在使用init_idmap_pg_dir页表阶段的VA,在enable了init_idmap_pg_dir 之后,我们只能使用这个VA来访问fdt的内容。 这也就是为什么在__primary_switch,在调用__pi_kaslr_early_init 之前把x22寄存器赋给x0. 而kaslr_early_init第一个参数就是对应的是fdt的地址。
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir
bl __enable_mmu
#ifdef CONFIG_RELOCATABLE
adrp x23, KERNEL_START
and x23, x23, MIN_KIMG_ALIGN - 1
#ifdef CONFIG_RANDOMIZE_BASE
mov x0, x22
adrp x1, init_pg_end
mov sp, x1
mov x29, xzr
bl __pi_kaslr_early_init
and x24, x0, #SZ_2M - 1 // capture memstart offset seed
bic x0, x0, #SZ_2M - 1
orr x23, x23, x0 // record kernel offset
#endif
因为这个时候不能直接调用C代码,因为没有设置好栈。这里为了能够调用下面的C代码,临时使用init_pg_end的这个页表区域作为临时栈,就可以调用如下C 代码:
asmlinkage u64 kaslr_early_init(void *fdt)
{
u64 seed;
if (is_kaslr_disabled_cmdline(fdt))
return 0;
init_task 阶段
在这个阶段,x21还是fdt的PA,而x22是对应的VA。在函数__primary_switched 中,首先把x21对应的PA存到__fdt_pointer 这个变量中,setup_arch->setup_machine_fdt 来把这个PA映射成0xffffxxxxxxxxxxxx的地址。
SYM_FUNC_START_LOCAL(__primary_switched)
adr_l x4, init_task
init_cpu_task x4, x5, x6
adr_l x8, vectors // load VBAR_EL1 with virtual
msr vbar_el1, x8 // vector table address
isb
stp x29, x30, [sp, #-16]!
mov x29, sp
str_l x21, __fdt_pointer, x5 // Save FDT pointer
在__primary_switched,我们已经把task 切换到init_task,这个时候对应的mm是init_mm:
struct task_struct init_task __aligned(L1_CACHE_BYTES) = {
//...
.mm = NULL,
.active_mm = &init_mm,
struct mm_struct init_mm = {
//...
INIT_MM_CONTEXT(init_mm)
};
#define INIT_MM_CONTEXT(name) \
.pgd = init_pg_dir,
可以看到这个task的pgd被初始化成了init_pg_dir,这也就是在调用early_fdt_map 的时候,没有给MMU的页表的基地址的原因。 但是init_pg_dir这个区域的大小是根据kernel image的大小已经确定好了的,如果想要在early_fdt_map 函数里面增加fdt的mapping,那么就需要额外分配页表。 在进行fdt的mapping的时候,根据VA的不同,这里就需要用到中间或者最后一级的页表,而这个时候memblock又没有ready,所以这个时候只能是静态分配的,参考 arch/arm64/mm/fixmap.c:
static pte_t bm_pte[NR_BM_PTE_TABLES][PTRS_PER_PTE] __page_aligned_bss;
static pmd_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss __maybe_unused;
static pud_t bm_pud[PTRS_PER_PUD] __page_aligned_bss __maybe_unused;
在函数early_fixmap_init 建立FDT页表的时候,都是使用的上面几个静态的页表。调用完 __primary_switched-> early_fdt_map-> early_fixmap_init();之后,可以看到FIXADDR并没有和PA链接起来,页表如下:
>mmu print EL1N_S1_TTBR1_EL1 TTBR1_EL1=0x00000000865DA000
Input Address | Type | Next Level | Output Address | Properties
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0xFFFF000000000000 | TTBR1_EL1 | NP:0x00000000865DA000 | | TBI1=1, TBI0=1, AS=1, IPS=256TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
- 0xFFFF000000000000 | Invalid (x256) | | |
+ 0xFFFF800000000000 | Level 0 Table | NP:0x00000000865DB000 | | APTable=0x0, UXNTable=0, PXNTable=0
+ 0xFFFF800000000000 | Level 1 Table | NP:0x00000000865DC000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFF800000000000 | Invalid (x64) | | |
- 0xFFFF800008000000 | Level 2 Block | | NP:0x0000000084000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008200000 | Level 2 Block | | NP:0x0000000084200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008400000 | Level 2 Block | | NP:0x0000000084400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008600000 | Level 2 Block | | NP:0x0000000084600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008800000 | Level 2 Block | | NP:0x0000000084800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008A00000 | Level 2 Block | | NP:0x0000000084A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008C00000 | Level 2 Block | | NP:0x0000000084C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008E00000 | Level 2 Block | | NP:0x0000000084E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009000000 | Level 2 Block | | NP:0x0000000085000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009200000 | Level 2 Block | | NP:0x0000000085200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009400000 | Level 2 Block | | NP:0x0000000085400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009600000 | Level 2 Block | | NP:0x0000000085600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009800000 | Level 2 Block | | NP:0x0000000085800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009A00000 | Level 2 Block | | NP:0x0000000085A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009C00000 | Level 2 Block | | NP:0x0000000085C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009E00000 | Level 2 Block | | NP:0x0000000085E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A000000 | Level 2 Block | | NP:0x0000000086000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A200000 | Level 2 Block | | NP:0x0000000086200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A400000 | Level 2 Block | | NP:0x0000000086400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A600000 | Invalid (x429) | | |
- 0xFFFF800040000000 | Invalid (x511) | | |
- 0xFFFF808000000000 | Invalid (x246) | | |
+ 0xFFFFFB8000000000 | Level 0 Table | NP:0x0000000086542000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFFFB8000000000 | Invalid (x511) | | |
+ 0xFFFFFBFFC0000000 | Level 1 Table | NP:0x0000000086543000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFFFBFFC0000000 | Invalid (x493) | | |
+ 0xFFFFFBFFFDA00000 | Level 2 Table | NP:0x0000000086544000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFFFBFFFDA00000 | Invalid (x512) | | |
- 0xFFFFFBFFFDC00000 | Invalid (x18) | | |
- 0xFFFFFC0000000000 | Invalid (x8) | | |
0x0000000086542000 0x0000000086543000 等地址就是bm_pud, bm_pmd 的地址。 在__primary_switched-> early_fdt_map->fixmap_remap_fdt 调用完成之后,fdt的PA就和VA给对应起来了,页表如下:
>mmu print EL1N_S1_TTBR1_EL1 TTBR1_EL1=0x00000000865DA000
Input Address | Type | Next Level | Output Address | Properties
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0xFFFF000000000000 | TTBR1_EL1 | NP:0x00000000865DA000 | | TBI1=1, TBI0=1, AS=1, IPS=256TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
- 0xFFFF000000000000 | Invalid (x256) | | |
+ 0xFFFF800000000000 | Level 0 Table | NP:0x00000000865DB000 | | APTable=0x0, UXNTable=0, PXNTable=0
+ 0xFFFF800000000000 | Level 1 Table | NP:0x00000000865DC000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFF800000000000 | Invalid (x64) | | |
- 0xFFFF800008000000 | Level 2 Block | | NP:0x0000000084000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008200000 | Level 2 Block | | NP:0x0000000084200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008400000 | Level 2 Block | | NP:0x0000000084400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008600000 | Level 2 Block | | NP:0x0000000084600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008800000 | Level 2 Block | | NP:0x0000000084800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008A00000 | Level 2 Block | | NP:0x0000000084A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008C00000 | Level 2 Block | | NP:0x0000000084C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008E00000 | Level 2 Block | | NP:0x0000000084E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009000000 | Level 2 Block | | NP:0x0000000085000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009200000 | Level 2 Block | | NP:0x0000000085200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009400000 | Level 2 Block | | NP:0x0000000085400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009600000 | Level 2 Block | | NP:0x0000000085600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009800000 | Level 2 Block | | NP:0x0000000085800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009A00000 | Level 2 Block | | NP:0x0000000085A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009C00000 | Level 2 Block | | NP:0x0000000085C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009E00000 | Level 2 Block | | NP:0x0000000085E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A000000 | Level 2 Block | | NP:0x0000000086000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A200000 | Level 2 Block | | NP:0x0000000086200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A400000 | Level 2 Block | | NP:0x0000000086400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A600000 | Invalid (x429) | | |
- 0xFFFF800040000000 | Invalid (x511) | | |
- 0xFFFF808000000000 | Invalid (x246) | | |
+ 0xFFFFFB8000000000 | Level 0 Table | NP:0x0000000086542000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFFFB8000000000 | Invalid (x511) | | |
+ 0xFFFFFBFFC0000000 | Level 1 Table | NP:0x0000000086543000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFFFBFFC0000000 | Invalid (x493) | | |
+ 0xFFFFFBFFFDA00000 | Level 2 Table | NP:0x0000000086544000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFFFBFFFDA00000 | Invalid (x512) | | |
- 0xFFFFFBFFFDC00000 | Level 2 Block | | NP:0x0000000082000000 | UXN=1, PXN=1, Contiguous=0, DBM=1, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFFFBFFFDE00000 | Invalid (x17) | | |
- 0xFFFFFC0000000000 | Invalid (x8) | | |
0x0000000082000000 就是之前保存在x21,再用x0传给early_fdt_map的。 代码如下:
SYM_FUNC_START_LOCAL(__primary_switched)
//...
mov x0, x21 // pass FDT address in x0
bl early_fdt_map // Try mapping the FDT early
mov x0, x20 // pass the full boot status
bl init_feature_override // Parse cpu feature overrides
这个时候映射的FDT的页表,只给函数init_feature_override使用。 这些FDT(也就是fixed map)的页表用完之后也不会释放,会被复制到最终的页表(swapper_pg_dir)里面: setup_arch->paging_init->map_kernel->fixmap_copy:
void __init fixmap_copy(pgd_t *pgdir)
{
if (!READ_ONCE(pgd_val(*pgd_offset_pgd(pgdir, FIXADDR_TOT_START)))) {
/*
* The fixmap falls in a separate pgd to the kernel, and doesn't
* live in the carveout for the swapper_pg_dir. We can simply
* re-use the existing dir for the fixmap.
*/
set_pgd(pgd_offset_pgd(pgdir, FIXADDR_TOT_START),
READ_ONCE(*pgd_offset_k(FIXADDR_TOT_START)));
} else if (CONFIG_PGTABLE_LEVELS > 3) {
pgd_t *bm_pgdp;
p4d_t *bm_p4dp;
pud_t *bm_pudp;
/*
* The fixmap shares its top level pgd entry with the kernel
* mapping. This can really only occur when we are running
* with 16k/4 levels, so we can simply reuse the pud level
* entry instead.
*/
BUG_ON(!IS_ENABLED(CONFIG_ARM64_16K_PAGES));
bm_pgdp = pgd_offset_pgd(pgdir, FIXADDR_TOT_START);
bm_p4dp = p4d_offset(bm_pgdp, FIXADDR_TOT_START);
bm_pudp = pud_set_fixmap_offset(bm_p4dp, FIXADDR_TOT_START);
pud_populate(&init_mm, bm_pudp, lm_alias(bm_pmd));
pud_clear_fixmap();
} else {
BUG();
}
}
setup_arch阶段
函数setup_arch-> early_fixmap_init其实是可以不用调用的,因为在__primary_switched-> early_fdt_map -> early_fixmap_init 已经调用了这个函数,并且这个时候的页表并没有什么变化。所以其实是可以不用调用的。
在setup_arch 阶段,这里会最终设置FDT的页表,就是在setup_arch-> setup_machine_fdt -> fixmap_remap_fdt ,但是这个fixmap_remap_fdt 其实不是必须的,因为在上面的流程里面,已经把FDT 的VA->PA给mapping上了。 最终的页表页不会有任何变化。
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
const char *name;
最终,FDT的VA地址赋给了变量 initial_boot_params, 在函数 setup_machine_fdt-> early_init_dt_scan-> early_init_dt_verify, 后面就用这个变量来解析FDT了。
static void __init setup_machine_fdt(phys_addr_t dt_phys)
{
int size;
void *dt_virt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL);
const char *name;
if (dt_virt)
memblock_reserve(dt_phys, size);
if (!dt_virt || !early_init_dt_scan(dt_virt)) {
//..
}
/* Early fixups are done, map the FDT as read-only now */
fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL_RO);
接下来就是从init_pg_dir转换到swapper_pg_dir这个页,前面讲过会把FDT的页表接上swapper_pg_dir,不需要重新映射,调用过程: setup_arch->paging_init->map_kernel->fixmap_copy。 这样FDT就完成了在正式的页表里面的映射。
Comments !