ARM寄存器是如何传参的
void delay(signed int t){while(t--);
}
int sum(int a,int b,int c,int d,int e,int f)
{
return a + b + c + d + e + f;
}
int main(){
int tv,a,b,c,d,e,f;
tv = 0;
a=1;
b=2;
c=3;
d=4;
e=5;
f=6;
while(1){
a++;
b++;
c++;
d++;
e++;
f++;
tv = sum(a,b,c,d,e,f);
delay(tv);
}
return 0;
}
程序很简单,MDK U5 ARM9芯片 Q1优化之后主要的汇编代码如下:
000001a4 <__semihosting_library_function>: //delay函数
1a4: e2500001 subs r0, r0, #1
1a8: 2afffffd bcs 1a4 <__semihosting_library_function>
1ac: e12fff1e bx lr
//int sum(int a,int b,int c,int d,int e,int f)
000001b0 <main>:
1b0: e24dd008 sub sp, sp, #8
1b4: e3a06001 mov r6, #1 //a
1b8: e3a07002 mov r7, #2 //b
1bc: e3a08003 mov r8, #3 //c
1c0: e3a09004 mov r9, #4 //d
1c4: e3a04005 mov r4, #5 //e
1c8: e3a05006 mov r5, #6 //f
1cc: e2866001 add r6, r6, #1 //a++;
1d0: e2877001 add r7, r7, #1 //b++;
1d4: e2888001 add r8, r8, #1 //c++;
1d8: e2899001 add r9, r9, #1 //d++;
1dc: e2844001 add r4, r4, #1 //e++;
1e0: e2855001 add r5, r5, #1 //f++;
1e4: e88d0030 stm sp, {r4, r5} //默认是FD类型,保存,批量,从右到左入栈,即r5在高地址,r4在低地址 将寄存器保存到内存中,寄存器列表作为STM的源,方向与STR单次保存的源的方向相反,STM的源方向在右边。
1e8: e1a03009 mov r3, r9
1ec: e1a02008 mov r2, r8
1f0: e1a01007 mov r1, r7
1f4: e1a00006 mov r0, r6//从参数列表的左到右,分别是r0,r1,r2,r3
1f8: eb000001 bl 204 <sum>
1fc: ebffffe8 bl 1a4 <__semihosting_library_function>
200: eafffff1 b 1cc <main+0x1c>
00000204 <sum>:
204: e52d4004 push {r4} ; (str r4, !) sum函数中用到了r4,所以先保存
208: e59d4008 ldr r4, //执行本句之前,sp对应刚才入栈的r4,sp+4对应栈中main要传给sum的第五个参数e,sp+8对应第六个参数f,所以本句执行之后参数f传给寄存器r4
20c: e59dc004 ldr ip, //ip寄存器 https://blog.csdn.net/ssdsafsdsd/article/details/8686229
//APCS规范中:r11栈帧指针叫fp,r12内部过程调用寄存器叫 ip,r13栈指针叫sp,r14连接寄存器叫lr,r15程序计数器叫pc
//ldr ip, 加载寄存器,从内存中加载数据到寄存器,单次加载,源即内存地址在右边,sp+4对应参数e,本句之后 ip 寄存器即r12中的值是参数e
210: e0800001 add r0, r0, r1 //a+b
214: e0800002 add r0, r0, r2 //a+b+c
218: e0800003 add r0, r0, r3 //a+b+c+d
21c: e080000c add r0, r0, ip //a+b+c+d+e
220: e0800004 add r0, r0, r4 //a+b+c+d+e+f
224: e49d4004 pop {r4} ; (ldr r4, , #4) 弹出入栈保存的r4,从而恢复进入sum函数时的堆栈
228: e12fff1e bx lr//返回
总结起来:
参数个数<=4个,则参数列表中从左到右分别是寄存器r0,r1,r2,r3;当参数超过4个时,左边的4个还是用r0-r3传值,但其它的则用堆栈传参,只不过先入栈的是参数列表中的最右边的参数,即除了左边的4个参数外,其它参数从右到左入栈。
如果是用arm-none-eabi-gcc/as/ld工具链做实验:
t.ld脚本文件:
ENTRY(start) /* define start as the entry addressfile from: https://www.5axxw.com/questions/content/h1m1i2 */
SECTIONS
{
. = 0x10000; /* loading address, required by QEMU */
.text : { *(.text) }
.data : { *(.data) }
.bss : { *(.bss) }
. =ALIGN(8);
. =. + 0x1000;
stack_top =.;
}
t.c文件:
int g = 100; // un-initialized global
//extern int sum(int a,int b,int c,int d,int e,int f); 注意本句注释了
int sum(int a,int b,int c,int d,int e,int f)
{
return a + b + c + d + e + f;
}
int main()
{
int a,b,c,d,e,f; //local variables
a = b = c = d = e =1;
f = 3;
g = sum(a,b,c,d,e,f); //arm中r0中是a,r1中是b
}
ts.s汇编文件:
// Assembly file to define sum(...)
.global start //,sum
start:
//stack_top标号其实是在链接脚本t.ld中.bss节之后align(8)对齐之后的偏址为
//0x1000处的地址,也就是说sp其实是最高地址处
ldr sp,=stack_top //为什么会有"=",如果不用"="直接用 ldr sp,stack_top呢???
//在startup2.s中可以看到确实有ldr r2,str1,两种写法有什么不同???
//在qemu中查看实际效果
//arm汇编中LDR伪指令和LDR指令
//https://blog.csdn.net/new_eggs/article/details/8468008
//https://blog.csdn.net/ce123_zhouwei/article/details/7182756
bl main
stop: b stop
sum:
stmfd sp!,{fp,lr} //push lr and fp,参考 https://www.cnblogs.com/w-smile/p/14019942.html
//stmfd 多次 保存 将寄存器保存到存储器,源在右边(源的方向与单次保存的源方向相反)
//fd其实是满递减堆栈类型,这是ARM常用的类型
//sp!表示保存之后sp的值也要回写
add fp,sp,#4 //ARM的ATPCS(ARM程序调用标准),将最新的sp值+4,然后再传给fp,因为堆栈指针sp初始值在最高地址位
//而且关于堆栈用的是满递减指令stmfd,所以sp总是指向最后一次保存的数据的最低有效位
//也就是上句stmfd sp!,{fp,lr}中,在堆栈上的fp值的最低有效位地址处
//fp和lr在堆栈中的实际图示及例子见https://blog.csdn.net/qq_34430371/article/details/121795096
//在这个网页中也说了:fp的主要作用就是用来栈回溯,找到子程序的调用关系,也成为backtrace.
//另外,根据http://blog.itpub.net/70005277/viewspace-2870926/ 和 https://blog.csdn.net/qq_38131812/article/details/124572650 中图所说,FP
//每个栈由两个地址构成单链表,位于栈的底部,低地址存放指向上一个栈帧的栈基地址FP,类似链表的prev
//高地址存放函数的返回地址,即进入该函数时的lr值
//所以上面add fp,sp,#4就是将上一个caller的栈帧的栈基地址fp(其实在执行本句时sp值为fp,而sp+4之后其实就是lr
add r0,r0,r1 //sum(a,b,c,d,e,f),ARM平台实参是如何用寄存器或堆栈传参的
add r0, r0, r2 // r0 = a + b + c
add r0, r0, r3 // r0 = a + b + c + d
ldr r3, // r1 = e
add r0, r0, r3 // r0 = a + b + c + d + e
ldr r3, // r1 = f
add r0, r0, r3 // r0 = a + b + c + d + e + f
// return
sub sp, fp, #4 // point stack pointer to saved fp
ldmfd sp!, {fp, pc} // return to caller
//参考:https://blog.csdn.net/weixin_52288941/article/details/120676938
//https://blog.csdn.net/fivedoumi/article/details/50446493 此帖有C和汇编代码例子
//可以看到实验的结论:参数值传递按顺序:参数个数<=4时,从左到右分别是R0,R1,R2,R3
//如果超过4个,其它参数将保存在栈上,且参数顺序按从右到左入栈(http://lionwq.spaces.eepw.com.cn/articles/article/item/17475/)
//https://blog.csdn.net/fivedoumi/article/details/50446493
//stmfd sp {r4,r5} //将r4,r5入栈,但sp不变
//ldr r4, //sp此时刻是主程序中对应压入的r5值//注意 原代码中的主程序入栈:stmfd sp!,{r4,r5}这个stmfd其实是通过堆栈传参,然后再 bl add_six
//stmfd指令中 st表示store存储,将一个值放入堆栈指针所指的栈中,m表示多次,
//fd表示满递减类型,即满递减类型堆栈
//!表示每次sp操作之后都要将新值写入sp,{fp,lr}是先压入lr,再压入fp,即从右到左
//伪动作:
//sp -= 4
//sp=lr
//sp -= 4
//sp=fp
//如果是stmed sp,{fp,lr} 入栈 多次 空递减类型 不回写sp
//伪动作:
//sp = lr
//sp - 4 = fp
//如果是stmed sp!,{fp,lr}
//伪动作:
//sp = lr
//sp -= 4
//sp = fp
//sp -= 4
//如果用代码来检测ARM堆栈的类型可参考https://blog.csdn.net/GRT609/article/details/124206662
//add fp,sp,#4 //fp -> saved lr on stack
//add r0,r0,r1//sum(a,b,c,d,e,f),所以r0=r0+r1即r0=a+b
//add r0,r0,r2 //r0=r0+r2 即r0=a+b+c
//add r0,r0,r3 //r0=a+b+c+d
//ldr r3, //r1=e https://blog.csdn.net/ZXG0808/article/details/125330428
//ldr{条件} 目的寄存器 <存储器地址>
//将<存储器地址>所指地址处连续的4个字节(1个字)的数据传送到目的寄存器中
//ldr指令有多种形式:
//ldr Rn,label1 将label1标号所代码的存储器中数据传输到寄存器Rn中
//ldr R0, 将R1寄存器值所代表的存储器地址中的数据值加载到寄存器R0
//有一个问题:是什么类型的寻址方式,ARM有哪几种寻址方式???
//ldr R0,=0x00000040 将立即数装入寄存器R0中,如果立即数的有效位不超过8位,
//该指令(其实是ldr伪指令)最终被替换成mov R0,#0x40,如果立即数的有效位超过
//8位,比如:ldr R0,=0xf0000000,那么该伪指令被编译器拆分为ldr R0,
//和 .word 0xf0000000两条指令,即先用.word指令存储数据值到一个地址处(可能是
//在本指令所在的session节或其它节中,根据《ARM汇编语言官方手册.pdf》所说,一般放在本节的末尾),编译器计算数据所在的存储地址与当前
//正在执行指令PC指针的偏差offset。当然,如果.word指令最终将数据放的地址与本指令
//地址相差超过太多(4K),应该要采用ldr的寄存器间接寻址方式而不应该用ldr直接立即数寻址即我们常见的"标号寻址"
//比如:
//将存储器地址为R1值的字数据加载到寄存器R0中
//ldr R0,
//将存储器地址为R1+R2值的字数据加载到寄存器R0中
//ldr R0,
//将存储器地址为R1+8的字数据加载到寄存器R0中
//ldr R0, 注意立即数范围
//将存储器地址为R1的字数据加载到寄存器R0,并将R1+R2的值存入R1
//再次寄存器相加时的规律是相加之后值放入左边寄存器
//ldr R0,,R2
//将存储器地址为R1的字数据加载到寄存器R0,并将R1+8的值存入R1
//规律也是相加之后放入左边寄存器
//ldr R0,,#8 注意立即数范围注意:ldr Rd,,#8这种形式的指令,目的寄存器Rd不允许是R15 (r13是堆栈sp,r14是链接寄存器lr(存储子程序调用时的返回地址),r15是PC)
//将存储器地址为R1+R2的字数据加载到寄存器R0,并将R1+R2的值存入R1
//ldr R0,!
//单次加载源在右边,存储器地址为R1*8,然后将数据加载到寄存器R0
//ldr R0,https://blog.csdn.net/ZXG0808/article/details/125330428
//ldr单次加载源在右边,存储器地址为R1+R2*4,然后将数据加载到寄存器R0
//ldr R0,
//ldr单次加载源在右边,存储器地址为R1+R2*4,然后将数据加载到R0,最后还要将R1+R2*4保存到R1中
//ldr R0,!
//ldr单次加载源在右边,存储器地址为R1,数据加载到R0,最后也是要将R1+R2*4保存到R1中
//ldr R0,,R2,LSL #2
//ldr R0,label
//关于ARM汇编指令集及8种寻址方式等知识参考:https://blog.csdn.net/weixin_44705391/article/details/120994974
//8种寻址方式如下:
//1)寄存器寻址
// mov R1,R2 即R1=R2
//2)立即数寻址
// mov R1,#0x08 即R1=8
//3)寄存器移位寻址
// mov R0,R1,LSL #3 即R1左移3位,然后再将新值赋给R0即R0 = R1 << 3
//4)寄存器间接寻址
// ldr R1, 即寄存器R2值所对应的内存地址处的字数据赋值给R1,即是内存地址指针
//5)基址变址寻址
// ldr R1, 基址是R2值,变址是立即数#4,将内存地址R2+4所对应的数据赋给R1
// 基址变址应该还有:ldr R1,???
//6)多寄存器寻址 (下面LDMIA指令执行完之后R1值是多少?如果指令中R1没有!结果如何?)
// LDMIA R1!,{R2-R7,LR} LD表示加载,M多次,因为是普通数据传输不是堆栈传输,
// 所以不会有FA/FD/EA/ED(堆栈只有FA满递增,FD满递减,EA空递增,ED空递减),
// 只有IA(Increment After 传输之后地址增加)/IB(Increment Before 传输之前地址增加)
// /DA(Decrement After 传输之前地址减少)/DB(Descrement Before 传输之前地址减少)
// R1!表示地址回写,(另外,LDM指令与LDR指令或伪指令运行的方向不同,LDM是从左到右,LDR伪指令或LDR指令是从右到左,MOV等常用指令也是从右到左)
// LDMIA R1!,{R2-R7,LR}表示数据传输方向是从R1到{。。。},可以理解为:
// LDMIA !,{R2-R7,LR}即将R1寄存器值所代表的存储器地址中的值加载到R2中,
// 然后从R1+4对应的存储器地址中的值加载到R3中,一直到R7,最后一次是加载到LR中,
// 也就是说R1值相当于是一个数组的首地址,也就是数组名
//7)堆栈寻址
// STMFD SP!,{R2-R7,LR} (感觉就象LDM与LDR的指令运行方向不同一样,STM与STR方向也不同吗??)
// STMFD传输方向是从{R2-R7,LR}到R1,STR R0,方向是R0到
.end
t.elf: file format elf32-littlearm
Disassembly of section .text:
00010000 <start>:
10000: e59fd030 ldr sp, ; 10038 <sum+0x2c>
10004: eb000022 bl 10094 <main>
00010008 <stop>:
10008: eafffffe b 10008 <stop>
0001000c <sum>:
1000c: e92d4800 push {fp, lr}
10010: e28db004 add fp, sp, #4
10014: e0800001 add r0, r0, r1
10018: e0800002 add r0, r0, r2
1001c: e0800003 add r0, r0, r3
10020: e59b3004 ldr r3,
10024: e0800003 add r0, r0, r3
10028: e59b3008 ldr r3,
1002c: e0800003 add r0, r0, r3
10030: e24bd004 sub sp, fp, #4
10034: e8bd8800 pop {fp, pc}
10038: 00011120 andeq r1, r1, r0, lsr #2
0001003c <sum>:
1003c: e52db004 push {fp} ; (str fp, !)
10040: e28db000 add fp, sp, #0
10044: e24dd014 sub sp, sp, #20
10048: e50b0008 str r0,
1004c: e50b100c str r1,
10050: e50b2010 str r2,
10054: e50b3014 str r3, ; 0xffffffec
10058: e51b2008 ldr r2,
1005c: e51b300c ldr r3,
10060: e0822003 add r2, r2, r3
10064: e51b3010 ldr r3,
10068: e0822003 add r2, r2, r3
1006c: e51b3014 ldr r3, ; 0xffffffec
10070: e0822003 add r2, r2, r3
10074: e59b3004 ldr r3,
10078: e0822003 add r2, r2, r3
1007c: e59b3008 ldr r3,
10080: e0823003 add r3, r2, r3
10084: e1a00003 mov r0, r3
10088: e24bd000 sub sp, fp, #0
1008c: e49db004 pop {fp} ; (ldr fp, , #4)
10090: e12fff1e bx lr
00010094 <main>:
10094: e92d4800 push {fp, lr}
10098: e28db004 add fp, sp, #4
1009c: e24dd020 sub sp, sp, #32
100a0: e3a03001 mov r3, #1
100a4: e50b3008 str r3,
100a8: e51b3008 ldr r3,
100ac: e50b300c str r3,
100b0: e51b300c ldr r3,
100b4: e50b3010 str r3,
100b8: e51b3010 ldr r3,
100bc: e50b3014 str r3, ; 0xffffffec
100c0: e51b3014 ldr r3, ; 0xffffffec
100c4: e50b3018 str r3, ; 0xffffffe8
100c8: e3a03003 mov r3, #3
100cc: e50b301c str r3, ; 0xffffffe4
100d0: e51b301c ldr r3, ; 0xffffffe4
100d4: e58d3004 str r3,
100d8: e51b3008 ldr r3,
100dc: e58d3000 str r3,
100e0: e51b300c ldr r3,
100e4: e51b2010 ldr r2,
100e8: e51b1014 ldr r1, ; 0xffffffec
100ec: e51b0018 ldr r0, ; 0xffffffe8
100f0: ebffffd1 bl 1003c <sum>
100f4: e1a02000 mov r2, r0
100f8: e59f3014 ldr r3, ; 10114 <main+0x80>
100fc: e5832000 str r2,
10100: e3a03000 mov r3, #0
10104: e1a00003 mov r0, r3
10108: e24bd004 sub sp, fp, #4
1010c: e8bd4800 pop {fp, lr}
10110: e12fff1e bx lr
10114: 00010118 andeq r0, r1, r8, lsl r1
Disassembly of section .data:
00010118 <g>:
10118: 00000064 andeq r0, r0, r4, rrx
Disassembly of section .ARM.attributes:
00000000 <.ARM.attributes>:
0: 00002b41 andeq r2, r0, r1, asr #22
4: 61656100 cmnvs r5, r0, lsl #2
8: 01006962 tsteq r0, r2, ror #18
c: 00000021 andeq r0, r0, r1, lsr #32
10: 4d524105 ldfmie f4, ; 0xffffffec
14: 4d445437 cfstrdmi mvd5, ; 0xffffff24
18: 02060049 andeq r0, r6, #73 ; 0x49
1c: 01090108 tsteq r9, r8, lsl #2
20: 01140412 tsteq r4, r2, lsl r4
24: 03170115 tsteq r7, #1073741829 ; 0x40000005
28: 011a0118 tsteq sl, r8, lsl r1
Disassembly of section .comment:
00000000 <.comment>:
0: 3a434347 bcc 10d0d24 <stack_top+0x10bfc04>
4: 4e472820 cdpmi 8, 4, cr2, cr7, cr0, {1}
8: 6f542055 svcvs 0x00542055
c: 20736c6f rsbscs r6, r3, pc, ror #24
10: 20726f66 rsbscs r6, r2, r6, ror #30
14: 204d5241 subcs r5, sp, r1, asr #4
18: 65626d45 strbvs r6, ! ; 0xfffff2bb
1c: 64656464 strbtvs r6, , #-1124 ; 0xfffffb9c
20: 6f725020 svcvs 0x00725020
24: 73736563 cmnvc r3, #415236096 ; 0x18c00000
28: 2973726f ldmdbcs r3!, {r0, r1, r2, r3, r5, r6, r9, ip, sp, lr}^
2c: 332e3520 ; <UNDEFINED> instruction: 0x332e3520
30: 3220312e eorcc r3, r0, #-2147483637 ; 0x8000000b
34: 30363130 eorscc r3, r6, r0, lsr r1
38: 20373033 eorscs r3, r7, r3, lsr r0
3c: 6c657228 sfmvs f7, 2, , #-160 ; 0xffffff60
40: 65736165 ldrbvs r6, ! ; 0xfffffe9b
44: 415b2029 cmpmi fp, r9, lsr #32
48: 652f4d52 strvs r4, ! ; fffff2fe <stack_top+0xfffee1de>
4c: 6465626d strbtvs r6, , #-621 ; 0xfffffd93
50: 2d646564 cfstr64cs mvdx6, ! ; 0xfffffe70
54: 72622d35 rsbvc r2, r2, #3392 ; 0xd40
58: 68636e61 stmdavs r3!, {r0, r5, r6, r9, sl, fp, sp, lr}^
5c: 76657220 strbtvc r7, , -r0, lsr #4
60: 6f697369 svcvs 0x00697369
64: 3332206e teqcc r2, #110 ; 0x6e
68: 39383534 ldmdbcc r8!, {r2, r4, r5, r8, sl, ip, sp}
6c: Address 0x0000006c is out of bounds.
Disassembly of section .debug_line:
00000000 <.debug_line>:
0: 00000040 andeq r0, r0, r0, asr #32
4: 001b0002 andseq r0, fp, r2
8: 01020000 mrseq r0, (UNDEF: 2)
c: 000d0efb strdeq r0, , -fp
10: 01010101 tsteq r1, r1, lsl #2
14: 01000000 mrseq r0, (UNDEF: 0)
18: 00010000 andeq r0, r1, r0
1c: 732e7374 ; <UNDEFINED> instruction: 0x732e7374
20: 00000000 andeq r0, r0, r0
24: 02050000 andeq r0, r5, #0
28: 00010000 andeq r0, r1, r0
2c: 312f3517 ; <UNDEFINED> instruction: 0x312f3517
30: 2e090332 mcrcs 3, 0, r0, cr9, cr2, {1}
34: 2f2f2f30 svccs 0x002f2f30
38: 2f302f2f svccs 0x00302f2f
3c: 022e5e03 eoreq r5, lr, #3, 28 ; 0x30
40: 01010002 tsteq r1, r2
44: 00000037 andeq r0, r0, r7, lsr r0
48: 001a0002 andseq r0, sl, r2
4c: 01020000 mrseq r0, (UNDEF: 2)
50: 000d0efb strdeq r0, , -fp
54: 01010101 tsteq r1, r1, lsl #2
58: 01000000 mrseq r0, (UNDEF: 0)
5c: 00010000 andeq r0, r1, r0
60: 00632e74 rsbeq r2, r3, r4, ror lr
64: 00000000 andeq r0, r0, r0
68: 3c020500 cfstr32cc mvfx0, , {-0}
6c: 15000100 strne r0, ; 0xffffff00
70: 845908d7 ldrbhi r0, , #-2263 ; 0xfffff729
74: 4b3d0868 blmi f4221c <stack_top+0xf310fc>
78: 0a029108 beq a44a0 <stack_top+0x93380>
7c: Address 0x0000007c is out of bounds.
Disassembly of section .debug_info:
00000000 <.debug_info>:
0: 00000047 andeq r0, r0, r7, asr #32
4: 00000002 andeq r0, r0, r2
8: 01040000 mrseq r0, (UNDEF: 4)
c: 00000000 andeq r0, r0, r0
10: 00010000 andeq r0, r1, r0
14: 0001003c andeq r0, r1, ip, lsr r0
18: 732e7374 ; <UNDEFINED> instruction: 0x732e7374
1c: 5c3a4800 ldcpl 8, cr4, , #-0
20: 5c504d54 mrrcpl 13, 5, r4, r0, cr4
24: 73707466 cmnvc r0, #1711276032 ; 0x66000000
28: 65767265 ldrbvs r7, ! ; 0xfffffd9b
2c: 6f6f7272 svcvs 0x006f7272
30: 72615c74 rsbvc r5, r1, #116, 24 ; 0x7400
34: 6162396d cmnvs r2, sp, ror #18
38: 47006572 smlsdxmi r0, r2, r5, r6
3c: 4120554e ; <UNDEFINED> instruction: 0x4120554e
40: 2e322053 mrccs 0, 1, r2, cr2, cr3, {2}
44: 302e3632 eorcc r3, lr, r2, lsr r6
48: fc800100 stc2 1, cr0, , {0}
4c: 04000000 streq r0, , #-0
50: 00001400 andeq r1, r0, r0, lsl #8
54: 00010400 andeq r0, r1, r0, lsl #8
58: 0c000000 stceq 0, cr0, , {-0}
5c: 00632e74 rsbeq r2, r3, r4, ror lr
60: 00000051 andeq r0, r0, r1, asr r0
64: 0001003c andeq r0, r1, ip, lsr r0
68: 000000dc ldrdeq r0, , -ip
6c: 00000044 andeq r0, r0, r4, asr #32
70: 6d757302 ldclvs 3, cr7, !
74: 87030100 strhi r0,
78: 3c000000 stccc 0, cr0, , {-0}
7c: 58000100 stmdapl r0, {r8}
80: 01000000 mrseq r0, (UNDEF: 0)
84: 0000879c muleq r0, ip, r7
88: 00610300 rsbeq r0, r1, r0, lsl #6
8c: 00870301 addeq r0, r7, r1, lsl #6
90: 91020000 mrsls r0, (UNDEF: 2)
94: 00620374 rsbeq r0, r2, r4, ror r3
98: 00870301 addeq r0, r7, r1, lsl #6
9c: 91020000 mrsls r0, (UNDEF: 2)
a0: 00630370 rsbeq r0, r3, r0, ror r3
a4: 00870301 addeq r0, r7, r1, lsl #6
a8: 91020000 mrsls r0, (UNDEF: 2)
ac: 0064036c rsbeq r0, r4, ip, ror #6
b0: 00870301 addeq r0, r7, r1, lsl #6
b4: 91020000 mrsls r0, (UNDEF: 2)
b8: 00650368 rsbeq r0, r5, r8, ror #6
bc: 00870301 addeq r0, r7, r1, lsl #6
c0: 91020000 mrsls r0, (UNDEF: 2)
c4: 00660300 rsbeq r0, r6, r0, lsl #6
c8: 00870301 addeq r0, r7, r1, lsl #6
cc: 91020000 mrsls r0, (UNDEF: 2)
d0: 04040004 streq r0, , #-4
d4: 746e6905 strbtvc r6, , #-2309 ; 0xfffff6fb
d8: 004c0500 subeq r0, ip, r0, lsl #10
dc: 07010000 streq r0,
e0: 00000087 andeq r0, r0, r7, lsl #1
e4: 00010094 muleq r1, r4, r0
e8: 00000084 andeq r0, r0, r4, lsl #1
ec: 00f09c01 rscseq r9, r0, r1, lsl #24
f0: 61060000 mrsvs r0, (UNDEF: 6)
f4: 87090100 strhi r0,
f8: 02000000 andeq r0, r0, #0
fc: 62066491 andvs r6, r6, #-1862270976 ; 0x91000000
100: 87090100 strhi r0,
104: 02000000 andeq r0, r0, #0
108: 63066891 movwvs r6, #26769 ; 0x6891
10c: 87090100 strhi r0,
110: 02000000 andeq r0, r0, #0
114: 64066c91 strvs r6, , #-3217 ; 0xfffff36f
118: 87090100 strhi r0,
11c: 02000000 andeq r0, r0, #0
120: 65067091 strvs r7, ; 0xffffff6f
124: 87090100 strhi r0,
128: 02000000 andeq r0, r0, #0
12c: 66067491 ; <UNDEFINED> instruction: 0x66067491
130: 87090100 strhi r0,
134: 02000000 andeq r0, r0, #0
138: 07006091 ; <UNDEFINED> instruction: 0x07006091
13c: 01010067 tsteq r1, r7, rrx
140: 00000087 andeq r0, r0, r7, lsl #1
144: 01180305 tsteq r8, r5, lsl #6
148: Address 0x00000148 is out of bounds.
Disassembly of section .debug_abbrev:
00000000 <.debug_abbrev>:
0: 10001101 andne r1, r0, r1, lsl #2
4: 12011106 andne r1, r1, #-2147483647 ; 0x80000001
8: 1b080301 blne 200c14 <stack_top+0x1efaf4>
c: 13082508 movwne r2, #34056 ; 0x8508
10: 00000005 andeq r0, r0, r5
14: 25011101 strcs r1, ; 0xfffffeff
18: 030b130e movweq r1, #45838 ; 0xb30e
1c: 110e1b08 tstne lr, r8, lsl #22
20: 10061201 andne r1, r6, r1, lsl #4
24: 02000017 andeq r0, r0, #23
28: 193f012e ldmdbne pc!, {r1, r2, r3, r5, r8} ; <UNPREDICTABLE>
2c: 0b3a0803 bleq e82040 <stack_top+0xe70f20>
30: 19270b3b stmdbne r7!, {r0, r1, r3, r4, r5, r8, r9, fp}
34: 01111349 tsteq r1, r9, asr #6
38: 18400612 stmdane r0, {r1, r4, r9, sl}^
3c: 01194297 ; <UNDEFINED> instruction: 0x01194297
40: 03000013 movweq r0, #19
44: 08030005 stmdaeq r3, {r0, r2}
48: 0b3b0b3a bleq ec2d38 <stack_top+0xeb1c18>
4c: 18021349 stmdane r2, {r0, r3, r6, r8, r9, ip}
50: 24040000 strcs r0, , #-0
54: 3e0b0b00 vmlacc.f64 d0, d11, d0
58: 0008030b andeq r0, r8, fp, lsl #6
5c: 012e0500 ; <UNDEFINED> instruction: 0x012e0500
60: 0e03193f mcreq 9, 0, r1, cr3, cr15, {1}
64: 0b3b0b3a bleq ec2d54 <stack_top+0xeb1c34>
68: 01111349 tsteq r1, r9, asr #6
6c: 18400612 stmdane r0, {r1, r4, r9, sl}^
70: 01194296 ; <UNDEFINED> instruction: 0x01194296
74: 06000013 ; <UNDEFINED> instruction: 0x06000013
78: 08030034 stmdaeq r3, {r2, r4, r5}
7c: 0b3b0b3a bleq ec2d6c <stack_top+0xeb1c4c>
80: 18021349 stmdane r2, {r0, r3, r6, r8, r9, ip}
84: 34070000 strcc r0, , #-0
88: 3a080300 bcc 200c90 <stack_top+0x1efb70>
8c: 490b3b0b stmdbmi fp, {r0, r1, r3, r8, r9, fp, ip, sp}
90: 02193f13 andseq r3, r9, #19, 30 ; 0x4c
94: 00000018 andeq r0, r0, r8, lsl r0
Disassembly of section .debug_aranges:
00000000 <.debug_aranges>:
0: 0000001c andeq r0, r0, ip, lsl r0
4: 00000002 andeq r0, r0, r2
8: 00040000 andeq r0, r4, r0
c: 00000000 andeq r0, r0, r0
10: 00010000 andeq r0, r1, r0
14: 0000003c andeq r0, r0, ip, lsr r0
...
20: 0000001c andeq r0, r0, ip, lsl r0
24: 004b0002 subeq r0, fp, r2
28: 00040000 andeq r0, r4, r0
2c: 00000000 andeq r0, r0, r0
30: 0001003c andeq r0, r1, ip, lsr r0
34: 000000dc ldrdeq r0, , -ip
...
Disassembly of section .debug_str:
00000000 <.debug_str>:
0: 20554e47 subscs r4, r5, r7, asr #28
4: 20313143 eorscs r3, r1, r3, asr #2
8: 2e332e35 mrccs 14, 1, r2, cr3, cr5, {1}
c: 30322031 eorscc r2, r2, r1, lsr r0
10: 33303631 teqcc r0, #51380224 ; 0x3100000
14: 28203730 stmdacs r0!, {r4, r5, r8, r9, sl, ip, sp}
18: 656c6572 strbvs r6, ! ; 0xfffffa8e
1c: 29657361 stmdbcs r5!, {r0, r5, r6, r8, r9, ip, sp, lr}^
20: 52415b20 subpl r5, r1, #32, 22 ; 0x8000
24: 6d652f4d stclvs 15, cr2, ! ; 0xfffffecc
28: 64646562 strbtvs r6, , #-1378 ; 0xfffffa9e
2c: 352d6465 strcc r6, ! ; 0xfffffb9b
30: 6172622d cmnvs r2, sp, lsr #4
34: 2068636e rsbcs r6, r8, lr, ror #6
38: 69766572 ldmdbvs r6!, {r1, r4, r5, r6, r8, sl, sp, lr}^
3c: 6e6f6973 mcrvs 9, 3, r6, cr15, cr3, {3}
40: 34333220 ldrtcc r3, , #-544 ; 0xfffffde0
44: 5d393835 ldcpl 8, cr3, ! ; 0xffffff2c
48: 00672d20 rsbeq r2, r7, r0, lsr #26
4c: 6e69616d powvsez f6, f1, #5.0
50: 5c3a4800 ldcpl 8, cr4, , #-0
54: 5c504d54 mrrcpl 13, 5, r4, r0, cr4
58: 73707466 cmnvc r0, #1711276032 ; 0x66000000
5c: 65767265 ldrbvs r7, ! ; 0xfffffd9b
60: 6f6f7272 svcvs 0x006f7272
64: 72615c74 rsbvc r5, r1, #116, 24 ; 0x7400
68: 6162396d cmnvs r2, sp, ror #18
6c: Address 0x0000006c is out of bounds.
Disassembly of section .debug_frame:
00000000 <.debug_frame>:
0: 0000000c andeq r0, r0, ip
4: ffffffff ; <UNDEFINED> instruction: 0xffffffff
8: 7c020001 stcvc 0, cr0, , {1}
c: 000d0c0e andeq r0, sp, lr, lsl #24
10: 0000001c andeq r0, r0, ip, lsl r0
14: 00000000 andeq r0, r0, r0
18: 0001003c andeq r0, r1, ip, lsr r0
1c: 00000058 andeq r0, r0, r8, asr r0
20: 8b040e42 blhi 103930 <stack_top+0xf2810>
24: 0b0d4201 bleq 350830 <stack_top+0x33f710>
28: 420d0d64 andmi r0, sp, #100, 26 ; 0x1900
2c: 00000ecb andeq r0, r0, fp, asr #29
30: 00000020 andeq r0, r0, r0, lsr #32
34: 00000000 andeq r0, r0, r0
38: 00010094 muleq r1, r4, r0
3c: 00000084 andeq r0, r0, r4, lsl #1
40: 8b080e42 blhi 203950 <stack_top+0x1f2830>
44: 42018e02 andmi r8, r1, #2, 28
48: 78040b0c stmdavc r4, {r2, r3, r8, r9, fp}
4c: 42080d0c andmi r0, r8, #12, 26 ; 0x300
50: 000ecbce andeq ip, lr, lr, asr #23
对于上面的汇编代码,重点代码注释如下:
0001003c <sum>: //将sum单独做一个.s文件再与main.c混合编程实验看看是不是符合规范及正确的???
1003c: e52db004 push {fp} ; (str fp, !) str指令是单次存储,源方向在左,将fp寄存器的值存入sp-4地址中,然后sp值减4,表示sp是栈顶
10040: e28db000 add fp, sp, #0 //上句执行之后,堆栈的栈顶就是fp值,本句sp+0其实栈顶取值,也就是栈帧fp
10044: e24dd014 sub sp, sp, #20 //局部变量空间
10048: e50b0008 str r0, //在进入sum之前,栈中:高地址f参数 + 低地址e参数 (也就是超4个参数,最右边参数先入栈),所以fp-8对应的地址将第一个参数即a
//存入,但感觉跳过了fp-4,为什么要跳过,作用???
1004c: e50b100c str r1, //参数b
10050: e50b2010 str r2, //参数c
10054: e50b3014 str r3, ; 0xffffffec //参数d
10058: e51b2008 ldr r2, //r2值为a
1005c: e51b300c ldr r3, //r3=b
10060: e0822003 add r2, r2, r3 //r2=a+b
10064: e51b3010 ldr r3, //r3=c
10068: e0822003 add r2, r2, r3 //r2=r2+c=a+b+c
1006c: e51b3014 ldr r3, ; 0xffffffec r3=d
10070: e0822003 add r2, r2, r3 //r2=a+b+c+d
10074: e59b3004 ldr r3, //fp+4就是main中入栈的参数e所在的地址(再次提醒:超过4个参数,从参数列表最右边开始入栈,从右到左依次入栈,
//所以此句取值时栈地址越高越表示是参数列表的最右这的参数)
10078: e0822003 add r2, r2, r3 //r2=a+b+c+d+e
1007c: e59b3008 ldr r3, //fp+8比fp+4的地址更高,所以是参数列表的更右边的参数,即参数f(本例中是值3)
10080: e0823003 add r3, r2, r3 //r3=a+b+c+d+e+f
10084: e1a00003 mov r0, r3 //r0根据规范是sum函数的返回结果值
10088: e24bd000 sub sp, fp, #0 //利用栈帧fp,需要将本函数中使用到的堆栈全部恢复,以便退出函数返回到调用者
1008c: e49db004 pop {fp} ; (ldr fp, , #4) //https://blog.csdn.net/ZXG0808/article/details/125330428 ldr单次加载,源内存地址在右边,
//ldr指令寻址方式由两部分组成,一部分为一个基址寄存器即Rn,可以为任一通用寄存器;
//另一部分为一个地址偏移量,有三种格式:
//第一种格式:立即数
//第二种格式:寄存器
//第三种格式:寄存器及一个移位常量
//寻址方式虽然只有三种格式,但寻址方式的地址计算方法有三种
//第一种计算方法:偏移量方法,Rn基址寄存器中的值和地址偏移量作加减运算,生成操作数的地址
//第二种计算方法:事先更新方法,Rn基址寄存器中的值和地址偏移量作加减运算,生成操作数的地址,指令执行后,这个生成的操作数地址被写入基址寄存器
//第三种计算方法:事后更新方法,指令将Rn基址寄存器的值作为操作数的地址执行内存访问,然后再将基址寄存器Rn的值和地址偏移量做加减运算生成一个新的操作数地址,最后将这个新的操作数地址写入Rn基址寄存器
//ldr fp,,#4 地址偏移量为第一种格式即立即数,寻址方式的地址计算方法是第三种,事后更新法。sp的值 + 偏移量#4即堆栈顶部取一个字数据,
//然后将这个字数据地址对应的内存中的数据存入fp(fp为栈帧指针),最后 sp += 4,所以本ldr fp,,#4指令其实就是pop {fp}指令
10090: e12fff1e bx lr //返回调用者
00010094 <main>:
10094: e92d4800 push {fp, lr} //https://www.cnblogs.com/w-smile/p/14019942.html中所说,lr先入栈在高地址中,fp后入栈在低地址中;最后sp指向fp
10098: e28db004 add fp, sp, #4 //sp+4指向栈中lr,所以本句之后fp指向栈中lr所在的地址即当前sp指针+4地址值;如果每次调用函数时,进入函数第一句就是先push {fp,lr},
//第二句就是add fp,sp,#4 那么fp始终就是指向调用者的返回地址,也就是说fp是帧栈,目前新版的ARM APCS中就有这样的规范要求。
//注意:我在armcc工具产生的汇编代码中(前面MDK U5 ARM9)就没有看fp相关的汇编代码
1009c: e24dd020 sub sp, sp, #32 //局部变量空间
100a0: e3a03001 mov r3, #1 //初值1从下面开始到FP-24]分别对应变量a,b,c,d,e,对应变量f
100a4: e50b3008 str r3, //fp-8就是原来堆栈中(高地址lr+低地址fp)的更低地址XXX处:高地址lr+低地址fp+更低地址XXX; str r3,表示在“更低地址XXX"中存入r3值即存入1
100a8: e51b3008 ldr r3, //再将这个1取出并赋给r3 估计是没有优化的原因
100ac: e50b300c str r3, //高地址lr+低地址fp+更低地址XXX+地址YYY 的“地址YYY”中存入1
100b0: e51b300c ldr r3,
100b4: e50b3010 str r3,
100b8: e51b3010 ldr r3,
100bc: e50b3014 str r3, ; 0xffffffec
100c0: e51b3014 ldr r3, ; 0xffffffec
100c4: e50b3018 str r3, ; 0xffffffe8
100c8: e3a03003 mov r3, #3//最后一个参数f值
100cc: e50b301c str r3, ; 0xffffffe4
100d0: e51b301c ldr r3, ; 0xffffffe4
100d4: e58d3004 str r3, //本句之前,堆栈中:高地址lr+低地址fp+更低地址XXX(即fp-8)......+低地址(fp-32),而SP的值指向“低地址(fp-32)“,
//所以sp+4就是指向"低地址(fp-28)" 感觉也是重复前面str r3,,如果优化的话估计也没有这句了
100d8: e51b3008 ldr r3, //第一个参数的值
100dc: e58d3000 str r3, ////堆栈中 “低地址(fp-32)“的值置为1 也就是说堆栈中从 更低地址XXX(即fp-8)+......+低地址(fp-24)分别对应参数a,b,c,d,e,而低地址(fp-28)对应参数f
//低地址(fp-32)即SP值,本句执行后,栈顶将会被赋值为1
//可以看到在堆栈中,针对sum函数的最右边二个参数e,f,在调用sum之前,堆栈中高地址中保存的是f,低地址中保存的是e,这就相当于超过4个参数的从参数列表的最
//右边开始最先入栈,然后右边第二入栈,依次类推。
100e0: e51b300c ldr r3, //
100e4: e51b2010 ldr r2,
100e8: e51b1014 ldr r1, ; 0xffffffec
100ec: e51b0018 ldr r0, ; 0xffffffe8
100f0: ebffffd1 bl 1003c <sum> //bl执行之后,lr寄存器中保存的是返回地址值,即本句的下一句代码的地址
100f4: e1a02000 mov r2, r0
100f8: e59f3014 ldr r3, ; 10114 <main+0x80>
100fc: e5832000 str r2,
10100: e3a03000 mov r3, #0
10104: e1a00003 mov r0, r3
10108: e24bd004 sub sp, fp, #4
1010c: e8bd4800 pop {fp, lr}
10110: e12fff1e bx lr
10114: 00010118 andeq r0, r1, r8, lsl r1
通过上面做的实验可知:Armcc工具链对于ARM9并没有创建与之对应的fp栈帧框架,而gnu的arm-none-eabi-gcc工具则创建了栈帧框架 再做一个实验:
gnulinuxt:
arm-none-linux-gnueabihf-gcc -c -o gnulinuxts.o ts.s
arm-none-linux-gnueabihf-gcc -c -o gnulinuxt.o t.c
arm-none-linux-gnueabihf-ld -T t.ld -o gnulinuxt.elf gnulinuxts.o gnulinuxt.o
arm-none-linux-gnueabihf-objdump -D gnulinuxt.o > gnulinuxt_o.dis
arm-none-linux-gnueabihf-objdump -D gnulinuxt.elf > gnulinuxt_elf.dis
注意:我用的工具与armcc及arm-none-eabi-xxx是不同的,结果如下:
gnulinuxt.elf: file format elf32-littlearm
Disassembly of section .text:
00010000 <start>:
10000: e59fd030 ldr sp, ; 10038 <sum+0x2c>
10004: fa000018 blx 1006c <main>
00010008 <stop>:
10008: eafffffe b 10008 <stop>
0001000c <sum>:
1000c: e92d4800 push {fp, lr}
10010: e28db004 add fp, sp, #4
10014: e0800001 add r0, r0, r1
10018: e0800002 add r0, r0, r2
1001c: e0800003 add r0, r0, r3
10020: e59b3004 ldr r3,
10024: e0800003 add r0, r0, r3
10028: e59b3008 ldr r3,
1002c: e0800003 add r0, r0, r3
10030: e24bd004 sub sp, fp, #4
10034: e8bd8800 pop {fp, pc}
10038: 000110b8 strheq r1, , -r8
0001003c <sum>:
1003c: b480 push {r7}
1003e: b085 sub sp, #20
10040: af00 add r7, sp, #0
10042: 60f8 str r0,
10044: 60b9 str r1,
10046: 607a str r2,
10048: 603b str r3,
1004a: 68fa ldr r2,
1004c: 68bb ldr r3,
1004e: 441a add r2, r3
10050: 687b ldr r3,
10052: 441a add r2, r3
10054: 683b ldr r3,
10056: 441a add r2, r3
10058: 69bb ldr r3,
1005a: 441a add r2, r3
1005c: 69fb ldr r3,
1005e: 4413 add r3, r2
10060: 4618 mov r0, r3
10062: 3714 adds r7, #20
10064: 46bd mov sp, r7
10066: f85d 7b04 ldr.w r7, , #4
1006a: 4770 bx lr
0001006c <main>:
1006c: b580 push {r7, lr}
1006e: b088 sub sp, #32
10070: af02 add r7, sp, #8
10072: 2301 movs r3, #1
10074: 617b str r3,
10076: 697b ldr r3,
10078: 613b str r3,
1007a: 693b ldr r3,
1007c: 60fb str r3,
1007e: 68fb ldr r3,
10080: 60bb str r3,
10082: 68bb ldr r3,
10084: 607b str r3,
10086: 2303 movs r3, #3
10088: 603b str r3,
1008a: 683b ldr r3,
1008c: 9301 str r3,
1008e: 697b ldr r3,
10090: 9300 str r3,
10092: 693b ldr r3,
10094: 68fa ldr r2,
10096: 68b9 ldr r1,
10098: 6878 ldr r0,
1009a: f7ff ffcf bl 1003c <sum>
1009e: 4602 mov r2, r0
100a0: f240 03b4 movw r3, #180 ; 0xb4
100a4: f2c0 0301 movt r3, #1
100a8: 601a str r2,
100aa: 2300 movs r3, #0
100ac: 4618 mov r0, r3
100ae: 3718 adds r7, #24
100b0: 46bd mov sp, r7
100b2: bd80 pop {r7, pc}
Disassembly of section .data:
000100b4 <g>:
100b4: 00000064 andeq r0, r0, r4, rrx
Disassembly of section .ARM.attributes:
00000000 <.ARM.attributes>:
0: 00003041 andeq r3, r0, r1, asr #32
4: 61656100 cmnvs r5, r0, lsl #2
8: 01006962 tsteq r0, r2, ror #18
c: 00000026 andeq r0, r0, r6, lsr #32
10: 412d3705 ; <UNDEFINED> instruction: 0x412d3705
14: 070a0600 streq r0,
18: 09010841 stmdbeq r1, {r0, r6, fp}
1c: 0c030a02 ; <UNDEFINED> instruction: 0x0c030a02
20: 14041201 strne r1, , #-513 ; 0xfffffdff
24: 17011501 strne r1,
28: 1a011803 bne 4603c <stack_top+0x34f84>
2c: 22011c02 andcs r1, r1, #512 ; 0x200
30: Address 0x0000000000000030 is out of bounds.
Disassembly of section .comment:
00000000 <.comment>:
0: 3a434347 bcc 10d0d24 <stack_top+0x10bfc6c>
4: 4e472820 cdpmi 8, 4, cr2, cr7, cr0, {1}
8: 6f542055 svcvs 0x00542055
c: 68636c6f stmdavs r3!, {r0, r1, r2, r3, r5, r6, sl, fp, sp, lr}^
10: 206e6961 rsbcs r6, lr, r1, ror #18
14: 20726f66 rsbscs r6, r2, r6, ror #30
18: 20656874 rsbcs r6, r5, r4, ror r8
1c: 72702d41 rsbsvc r2, r0, #4160 ; 0x1040
20: 6c69666f stclvs 6, cr6, , #-444 ; 0xfffffe44
24: 72412065 subvc r2, r1, #101 ; 0x65
28: 74696863 strbtvc r6, , #-2147 ; 0xfffff79d
2c: 75746365 ldrbvc r6, ! ; 0xfffffc9b
30: 31206572 ; <UNDEFINED> instruction: 0x31206572
34: 2d332e30 ldccs 14, cr2, ! ; 0xffffff40
38: 31323032 teqcc r2, r2, lsr r0
3c: 2037302e eorscs r3, r7, lr, lsr #32
40: 6d726128 ldfvse f6, ! ; 0xffffff60
44: 2e30312d rsfcssp f3, f0, #5.0
48: 29293932 stmdbcs r9!, {r1, r4, r5, r8, fp, ip, sp}
4c: 2e303120 rsfcssp f3, f0, f0
50: 20312e33 eorscs r2, r1, r3, lsr lr
54: 31323032 teqcc r2, r2, lsr r0
58: 31323630 teqcc r2, r0, lsr r6
...
说明:arm-none-linux-gnueabihf-xxx(猜想arm-none-linux-gnueabi-xxx相同效果)也是不会产生栈帧 fp 框架,只有ARMCC才会用到栈帧框架。
页:
[1]