当主CPU启动完成之后,已经把页表设置成了swapper_pg_dir, 到这里为止,kernel一般情况下是不再需要idmap这个页表的,但是还是有些特殊情况下需要idmap:
- 第二个CPU启动
- 当第二个CPU启动的时候,MMU是关闭的。要设置TTBR1 到swapper_pg_dir 之前,需要把TTBR0 设置成idmap,否则系统在使能MMU的时候找不到下一条指令的地址。参考函数:secondary_startup
- CPU resume时
- 当CPU从sleep状态回到正常状态的时候,这个时候也需要重新设置TTBR1 到swapper_pg_dir。同样需要把TTBR0 设置成idmap。参考函数:cpu_resume
- 更改TTBR1 的Common not Private bit时
- 在第一次安装页表swapper_pg_dir时,这个时候cpu feature的框架还没有准备好,当CPU feature的启动时,如果检测到CPU使能了Common not Private,就需要重新更改TTBR1的Common not Private bit。这个时候也需要用到idmap。 参考: cpu_enable_swapper_cnp
- //TODO kasan时
其实这个时候也不是整个kernel都需要做idmap,只是一些跟MMU相关的代码,才需要做idmap,下面会详细提到。
idmap_pg_dir页表的分配
顶级页表是在Linux link 脚本中分配:
idmap_pg_dir = .;
. += PAGE_SIZE;
其他级别的页表都是通过memblock的函数:early_pgtable_alloc 来进行分配。
early_pgtable_alloc分配出来的是物理地址,这个时候不可以直接对他进行access,这个时候就需要用到fixmap里面的 FIX_PGD,FIX_PUD,FIX_PMD以及FIX_PTE来把这个分配出的物理地址在页表swapper_pg_dir 中进行映射。这也就是FIX_XXX的作用。
static phys_addr_t __init early_pgtable_alloc(int shift)
{
//......
ptr = pte_set_fixmap(phys);
memset(ptr, 0, PAGE_SIZE);
/*
* Implicit barriers also ensure the zeroed page is visible to the page
* table walker
*/
pte_clear_fixmap();
return phys;
}
下面这些宏的主要作用就是映射这些页表以及地址之间的转换:
#define pte_offset_phys(dir,addr) (pmd_page_paddr(READ_ONCE(*(dir))) + pte_index(addr) * sizeof(pte_t))
#define pte_set_fixmap(addr) ((pte_t *)set_fixmap_offset(FIX_PTE, addr))
#define pte_set_fixmap_offset(pmd, addr) pte_set_fixmap(pte_offset_phys(pmd, addr))
#define pte_clear_fixmap() clear_fixmap(FIX_PTE)
#define pmd_offset_phys(dir, addr) (pud_page_paddr(READ_ONCE(*(dir))) + pmd_index(addr) * sizeof(pmd_t))
#define pmd_set_fixmap(addr) ((pmd_t *)set_fixmap_offset(FIX_PMD, addr))
#define pmd_set_fixmap_offset(pud, addr) pmd_set_fixmap(pmd_offset_phys(pud, addr))
#define pmd_clear_fixmap() clear_fixmap(FIX_PMD)
#define pud_offset_phys(dir, addr) (p4d_page_paddr(READ_ONCE(*(dir))) + pud_index(addr) * sizeof(pud_t))
#define pud_set_fixmap(addr) ((pud_t *)set_fixmap_offset(FIX_PUD, addr))
#define pud_set_fixmap_offset(p4d, addr) pud_set_fixmap(pud_offset_phys(p4d, addr))
#define pud_clear_fixmap() clear_fixmap(FIX_PUD)
#define pgd_set_fixmap(addr) ((pgd_t *)set_fixmap_offset(FIX_PGD, addr))
#define pgd_clear_fixmap() clear_fixmap(FIX_PGD)
映射页表的函数会使用如上的宏才能读写页表,如:
static void init_pmd(pud_t *pudp, unsigned long addr, unsigned long end,
phys_addr_t phys, pgprot_t prot,
phys_addr_t (*pgtable_alloc)(int), int flags)
{
unsigned long next;
pmd_t *pmdp;
pmdp = pmd_set_fixmap_offset(pudp, addr);
do {
pmd_t old_pmd = READ_ONCE(*pmdp);
idmap_pg_dir映射的范围
- 通过函数create_idmap,可以发现IDMAP_TEXT(idmap.text)定义的这个section都会被idmap做一一映射。 基本上可以发现,只要是跟设置TTBR1 相关的代码,都应该要放在这个section中,如函数:
- __enable_mmu
- __cpu_setup
- cpu_resume
- cpu_soft_restart
当然还有更多函数,这里就不一一列举。
idmap_pg_dir页表的创建
当主CPU 启动到 setup_arch->paging_init -> create_idmap时,swapper_pg_dir 已经生效。这也就是前面提到的如果要读写那些页表,要用到FIX_PGD,FIX_PUD,FIX_PMD这些的映射。
当运行完第一个 __create_pgd_mapping 之后,即
static void __init create_idmap(void)
{
u64 start = __pa_symbol(__idmap_text_start);
u64 size = __pa_symbol(__idmap_text_end) - start;
pgd_t *pgd = idmap_pg_dir;
u64 pgd_phys;
/* check if we need an additional level of translation */
if (VA_BITS < 48 && idmap_t0sz < (64 - VA_BITS_MIN)) {
pgd_phys = early_pgtable_alloc(PAGE_SHIFT);
set_pgd(&idmap_pg_dir[start >> VA_BITS],
__pgd(pgd_phys | P4D_TYPE_TABLE));
pgd = __va(pgd_phys);
}
__create_pgd_mapping(pgd, start, start, size, PAGE_KERNEL_ROX,
early_pgtable_alloc, 0);
idmap_pg_dir 就变成如下:
>mmu print EL2N_S1_TTBR0_EL2 TTBR0_EL2=0x0000000081D76000
Input Address | Type | Next Level | Output Address | Properties
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0x00000000 | TTBR0_EL2 | NP:0x0000000081D76000 | | TBI1=1, TBI0=1, AS=1, IPS=1TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
+ 0x00000000 | Level 0 Table | NP:0x00000008FF7FE000 | | APTable=0x0, UXNTable=1, PXNTable=0
- 0x00000000 | Invalid | | |
- 0x40000000 | Invalid | | |
+ 0x80000000 | Level 1 Table | NP:0x00000008FF7FD000 | | APTable=0x0, UXNTable=1, PXNTable=0
- 0x80000000 | Invalid (x14) | | |
+ 0x81C00000 | Level 2 Table | NP:0x00000008FF7FC000 | | APTable=0x0, UXNTable=1, PXNTable=0
- 0x81C00000 | Invalid (x373) | | |
- 0x81D75000 | Level 3 Page | | NP:0x0000000081D75000 | UXN=1, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x81D76000 | Invalid (x138) | | |
- 0x81E00000 | Invalid (x497) | | |
- 0xC0000000 | Invalid (x509) | | |
- 0x8000000000 | Invalid (x511) | | |
当把__idmap_kpti_flag加进去之后,即
static void __init create_idmap(void)
{
//....
if (IS_ENABLED(CONFIG_UNMAP_KERNEL_AT_EL0)) {
extern u32 __idmap_kpti_flag;
u64 pa = __pa_symbol(&__idmap_kpti_flag);
/*
* The KPTI G-to-nG conversion code needs a read-write mapping
* of its synchronization flag in the ID map.
*/
__create_pgd_mapping(pgd, pa, pa, sizeof(u32), PAGE_KERNEL,
early_pgtable_alloc, 0);
}
}
idmap_pg_dir 就变成如下:
>mmu print EL2N_S1_TTBR0_EL2 TTBR0_EL2=0x0000000081D76000
Input Address | Type | Next Level | Output Address | Properties
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+ 0x00000000 | TTBR0_EL2 | NP:0x0000000081D76000 | | TBI1=1, TBI0=1, AS=1, IPS=1TB, TG1=4KB, SH1=0x3, ORGN1=0x1, IRGN1=0x1, EPD1=0, A1=1, T1SZ=16, TG0=4KB, SH0=0x3, ORGN0=0x1, IRGN0=0x1, EPD0=0, T0SZ=16, HPD1=0, HPD0=0, HD=0, HA=1, CnP=0, ASID=0
+ 0x00000000 | Level 0 Table | NP:0x00000008FF7FE000 | | APTable=0x0, UXNTable=1, PXNTable=0
- 0x00000000 | Invalid | | |
- 0x40000000 | Invalid | | |
+ 0x80000000 | Level 1 Table | NP:0x00000008FF7FD000 | | APTable=0x0, UXNTable=1, PXNTable=0
- 0x80000000 | Invalid (x14) | | |
+ 0x81C00000 | Level 2 Table | NP:0x00000008FF7FC000 | | APTable=0x0, UXNTable=1, PXNTable=0
- 0x81C00000 | Invalid (x373) | | |
- 0x81D75000 | Level 3 Page | | NP:0x0000000081D75000 | UXN=1, PXN=0, Contiguous=0, DBM=0, GP=0, nG=0, AF=1, SH=0x3, AP=0x2, AttrIndx=0x0
- 0x81D76000 | Invalid (x138) | | |
- 0x81E00000 | Invalid (x4) | | |
+ 0x82600000 | Level 2 Table | NP:0x00000008FF7FB000 | | APTable=0x0, UXNTable=1, PXNTable=0
- 0x82600000 | Invalid (x311) | | |
- 0x82737000 | Level 3 Page | | NP:0x0000000082737000 | UXN=1, PXN=1, Contiguous=0, DBM=1, GP=0, nG=0, AF=1, SH=0x3, AP=0x0, AttrIndx=0x0
- 0x82738000 | Invalid (x200) | | |
- 0x82800000 | Invalid (x492) | | |
- 0xC0000000 | Invalid (x509) | | |
- 0x8000000000 | Invalid (x511) | | |
idmap_pg_dir的安装
当主CPU启动好,可以detect CPU feature 时,如果发现CPU可以支持Common not Private bit,这会临时安装这个页表来重新设置TTBR1,即:
static inline void cpu_enable_swapper_cnp(void)
{
__cpu_replace_ttbr1(lm_alias(swapper_pg_dir), idmap_pg_dir, true);
}
当第二个CPU启动的时候,系统不在走主CPU启动的流程,直接安装这个页表,参考函数: secondary_startup。 这个页表会在 secondary_start_kernel-> cpu_uninstall_idmap 来卸载。
SYM_FUNC_START_LOCAL(secondary_startup)
/*
* Common entry point for secondary CPUs.
*/
mov x20, x0 // preserve boot mode
bl __cpu_secondary_check52bitva
#if VA_BITS > 48
ldr_l x0, vabits_actual
#endif
bl __cpu_setup // initialise processor
adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =__secondary_switched
br x8
SYM_FUNC_END(secondary_startup)
当CPU 从sleep状态恢复的时候,也会去使用这个idmap来安装mmu 页表,如下:
SYM_CODE_START(cpu_resume)
mov x0, xzr
bl init_kernel_el
mov x19, x0 // preserve boot mode
#if VA_BITS > 48
ldr_l x0, vabits_actual
#endif
bl __cpu_setup
/* enable the MMU early - so we can access sleep_save_stash by va */
adrp x1, swapper_pg_dir
adrp x2, idmap_pg_dir
bl __enable_mmu
ldr x8, =_cpu_resume
br x8
SYM_CODE_END(cpu_resume)
Comments !