u-boot或者uefi启动kernel的时候,默认MMU是关闭的,所以这个时候kernel实际运行的地址就是物理地址。 为了前期尽早打开MMU和cache,kernel需要建立MMU的页表,这个时候的页表是1:1映射,即VA = PA。
分配
这个页表是在Link 脚本中进行分配,参考代码:
init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
init_idmap_pg_end = .;
至于它的大小INIT_IDMAP_DIR_SIZE 为什么会出现在VA小于48位的时候需要+2个page,+2 表示在基础页数的基础上再增加两个页面的空间,以确保有足够的空间处理对齐、边界和其他可能的额外需求。
#if VA_BITS < 48
#define INIT_IDMAP_DIR_SIZE ((INIT_IDMAP_DIR_PAGES + 2) * PAGE_SIZE)
#else
#define INIT_IDMAP_DIR_SIZE (INIT_IDMAP_DIR_PAGES * PAGE_SIZE)
#endif
创建
在函数create_idmap 会创建从_text 到 _end + MAX_FDT_SIZE + SWAPPER_BLOCK_SIZE 的1:1映射。 从启动到正式的页表建立之前,kernel是不会访问写任何数据,除了写device tree 和正式的页表。 所以默认把它映射成SWAPPER_RX_MMUFLAGS。然后再把FDT和init_pg_dir 的region 设置成SWAPPER_RW_MMUFLAGS度。
建立完成之后,在__enable_mmu 会去使能MMU,同时 TTBR0 = init_idmap_pg_dir TTBR1 = reserved_pg_dir
MMU的页表如下:
>mmu print EL1N_S1_TTBR0_EL1 TTBR0_EL1=init_idmap_pg_dir
Input Address | Type | Next Level | Output Address | Properties
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0x00000000 | TTBR0_EL1 | NP:0x0000000085A90000 | | TBI1=1, TBI0=1, AS=1, IPS=256TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
+ 0x00000000 | Level 0 Table | NP:0x0000000085A91000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0x00000000 | Invalid | | |
- 0x40000000 | Invalid | | |
+ 0x80000000 | Level 1 Table | NP:0x0000000085A92000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0x80000000 | Invalid (x32) | | |
- 0x84000000 | Level 2 Block | | NP:0x0000000084000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x84200000 | Level 2 Block | | NP:0x0000000084200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x84400000 | Level 2 Block | | NP:0x0000000084400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x84600000 | Level 2 Block | | NP:0x0000000084600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x84800000 | Level 2 Block | | NP:0x0000000084800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x84A00000 | Level 2 Block | | NP:0x0000000084A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x84C00000 | Level 2 Block | | NP:0x0000000084C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x84E00000 | Level 2 Block | | NP:0x0000000084E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x85000000 | Level 2 Block | | NP:0x0000000085000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x85200000 | Level 2 Block | | NP:0x0000000085200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x85400000 | Level 2 Block | | NP:0x0000000085400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x85600000 | Level 2 Block | | NP:0x0000000085600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x85800000 | Level 2 Block | | NP:0x0000000085800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x85A00000 | Level 2 Block | | NP:0x0000000085A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x85C00000 | Level 2 Block | | NP:0x0000000085C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x85E00000 | Level 2 Block | | NP:0x0000000085E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x86000000 | Level 2 Block | | NP:0x0000000086000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x86200000 | Level 2 Block | | NP:0x0000000086200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x86400000 | Level 2 Block | | NP:0x0000000086400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0x86600000 | Level 2 Block | | NP:0x0000000082000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0x86800000 | Level 2 Block | | NP:0x0000000082200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0x86A00000 | Invalid (x459) | | |
- 0xC0000000 | Invalid (x509) | | |
- 0x8000000000 | Invalid (x511) | | |
设置TTBR1
这个时候TTBR1还是指向reserved_pg_dir,为了建立正式的VA到PA的映射,在函数:create_kernel_mapping 中会把整个kernel image 建立VA 到PA的映射:
SYM_FUNC_START_LOCAL(create_kernel_mapping)
adrp x0, init_pg_dir
mov_q x5, KIMAGE_VADDR // compile time __va(_text)
#ifdef CONFIG_RELOCATABLE
add x5, x5, x23 // add KASLR displacement
#endif
adrp x6, _end // runtime __pa(_end)
adrp x3, _text // runtime __pa(_text)
sub x6, x6, x3 // _end - _text
add x6, x6, x5 // runtime __va(_end)
mov_q x7, SWAPPER_RW_MMUFLAGS
map_memory x0, x1, x5, x6, x7, x3, (VA_BITS - PGDIR_SHIFT), x10, x11, x12, x13, x14
dsb ishst // sync with page table walker
ret
SYM_FUNC_END(create_kernel_mapping)
因为这个也不是最终的页表,所以我们看到属性全都设置成了SWAPPER_RW_MMUFLAGS。 后面这个页表会被swapper_pg_dir 给替换掉。 TTBR的MMU的页表如下:
>mmu print EL1N_S1_TTBR1_EL1 TTBR1_EL1=init_pg_dir
Input Address | Type | Next Level | Output Address | Properties
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0xFFFF000000000000 | TTBR1_EL1 | NP:0x00000000865DA000 | | TBI1=1, TBI0=1, AS=1, IPS=256TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
- 0xFFFF000000000000 | Invalid (x256) | | |
+ 0xFFFF800000000000 | Level 0 Table | NP:0x00000000865DB000 | | APTable=0x0, UXNTable=0, PXNTable=0
+ 0xFFFF800000000000 | Level 1 Table | NP:0x00000000865DC000 | | APTable=0x0, UXNTable=0, PXNTable=0
- 0xFFFF800000000000 | Invalid (x64) | | |
- 0xFFFF800008000000 | Level 2 Block | | NP:0x0000000084000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008200000 | Level 2 Block | | NP:0x0000000084200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008400000 | Level 2 Block | | NP:0x0000000084400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008600000 | Level 2 Block | | NP:0x0000000084600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008800000 | Level 2 Block | | NP:0x0000000084800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008A00000 | Level 2 Block | | NP:0x0000000084A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008C00000 | Level 2 Block | | NP:0x0000000084C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800008E00000 | Level 2 Block | | NP:0x0000000084E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009000000 | Level 2 Block | | NP:0x0000000085000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009200000 | Level 2 Block | | NP:0x0000000085200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009400000 | Level 2 Block | | NP:0x0000000085400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009600000 | Level 2 Block | | NP:0x0000000085600000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009800000 | Level 2 Block | | NP:0x0000000085800000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009A00000 | Level 2 Block | | NP:0x0000000085A00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009C00000 | Level 2 Block | | NP:0x0000000085C00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF800009E00000 | Level 2 Block | | NP:0x0000000085E00000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A000000 | Level 2 Block | | NP:0x0000000086000000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A200000 | Level 2 Block | | NP:0x0000000086200000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A400000 | Level 2 Block | | NP:0x0000000086400000 | UXN=0, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0xFFFF80000A600000 | Invalid (x429) | | |
- 0xFFFF800040000000 | Invalid (x511) | | |
- 0xFFFF808000000000 | Invalid (x255) | | |
卸载init_idmap_pg_dir
在函数__primary_switch,kernel会从1:1 的VA 切换到正式的真正的VA:
SYM_FUNC_START_LOCAL(__primary_switch)
adrp x1, reserved_pg_dir
adrp x2, init_idmap_pg_dir
bl __enable_mmu
#ifdef CONFIG_RELOCATABLE
adrp x23, KERNEL_START
and x23, x23, MIN_KIMG_ALIGN - 1
#ifdef CONFIG_RANDOMIZE_BASE
mov x0, x22
adrp x1, init_pg_end
mov sp, x1
mov x29, xzr
bl __pi_kaslr_early_init
and x24, x0, #SZ_2M - 1 // capture memstart offset seed
bic x0, x0, #SZ_2M - 1
orr x23, x23, x0 // record kernel offset
#endif
#endif
bl clear_page_tables
bl create_kernel_mapping
adrp x1, init_pg_dir
load_ttbr1 x1, x1, x2
#ifdef CONFIG_RELOCATABLE
bl __relocate_kernel
#endif
ldr x8, =__primary_switched
adrp x0, KERNEL_START // __pa(KERNEL_START)
br x8
SYM_FUNC_END(__primary_switch)
这个时候1:1 的mapping就基本不用了,就在setup_arch函数调用cpu_uninstall_idmap把TTBR0设置成reserved_pg_dir
static inline void cpu_uninstall_idmap(void)
{
struct mm_struct *mm = current->active_mm;
cpu_set_reserved_ttbr0();
local_flush_tlb_all();
cpu_set_default_tcr_t0sz();
if (mm != &init_mm && !system_uses_ttbr0_pan())
cpu_switch_mm(mm->pgd, mm);
}
接下还会在paging_init 的里面使用 init_idmap_pg_dir,因为我们要切换MMU页表,不能直接使用TTBR1映射的函数,需要使用idmap的函数才能切换TTBR1所对应的页表。 参考函数:
static inline void __cpu_replace_ttbr1(pgd_t *pgdp, pgd_t *idmap, bool cnp)
{
typedef void (ttbr_replace_func)(phys_addr_t);
extern ttbr_replace_func idmap_cpu_replace_ttbr1;
ttbr_replace_func *replace_phys;
unsigned long daif;
/* phys_to_ttbr() zeros lower 2 bits of ttbr with 52-bit PA */
phys_addr_t ttbr1 = phys_to_ttbr(virt_to_phys(pgdp));
if (cnp)
ttbr1 |= TTBR_CNP_BIT;
replace_phys = (void *)__pa_symbol(idmap_cpu_replace_ttbr1);
__cpu_install_idmap(idmap);
/*
* We really don't want to take *any* exceptions while TTBR1 is
* in the process of being replaced so mask everything.
*/
daif = local_daif_save();
replace_phys(ttbr1);
local_daif_restore(daif);
cpu_uninstall_idmap();
}
回收init_idmap_pg_dir
init_idmap_pg_dir 这个页表使用完了不是立即就回收利用,这部分内存是从__init_begin 到__init_end 的一部分,所以在系统初始化结束之后,统一进行回收利用。 在函数 kernel_init->free_initmem
__init_begin = .;
//...
init_idmap_pg_dir = .;
. += INIT_IDMAP_DIR_SIZE;
init_idmap_pg_end = .;
//.....
. = ALIGN(SEGMENT_ALIGN);
__initdata_end = .;
__init_end = .;
void free_initmem(void)
{
free_reserved_area(lm_alias(__init_begin),
lm_alias(__init_end),
POISON_FREE_INITMEM, "unused kernel");
/*
* Unmap the __init region but leave the VM area in place. This
* prevents the region from being reused for kernel modules, which
* is not supported by kallsyms.
*/
vunmap_range((u64)__init_begin, (u64)__init_end);
}
参考:
Comments !