Topics
X86.1.ppt
Arithmetic operations
IA32 Processors
Totally Dominate Computer Market
Evolutionary Design
Starting in 1978 with 8086 (really 1971 with 4004) Added more features as time goes on Still support old features, although obsolete
Hard to match performance of Reduced Instruction Set Computers (RISC) But, Intel has done just that!
CS 105
Date 1971
Transistors 2.3K
8008
1972
3.3K
8080
1974
6K
Compatible at source level with 8008 Processor in first kit computers Pricing caused it to beat similar processors with better programming model
Motorola 6800 MOS Technologies (MOSTEK) 6502
CS 105
Date 1978
Transistors 29K
16-bit processor. Basis for IBM PC & DOS Limited to 1MB address space. DOS only gives you 640K
80286
1982
134K
Added elaborate, but not very useful, addressing scheme Basis for IBM PC-AT and Windows
386
1985
275K
Extended to 32 bits. Added flat addressing Capable of running Unix By default, Linux/gcc use no instructions introduced in later models
CS 105
Added special collection of instructions for operating on 64bit vectors of 1-, 2-, or 4-byte integer data
PentiumPro
1995
6.5M
CS 105
Date 1999
Transistors 8.2M
Added streaming SIMD instructions for operating on 128-bit vectors of 1-, 2-, or 4-byte integer or floating point data
Pentium 4
2001
42M
Added 8-byte formats and 144 new instructions for streaming SIMD mode
CS 105
Itanium
2001
10M
Extends to IA64, a 64-bit architecture Radically new instruction set designed for high performance Able to run existing IA32 programs
On-board x86 engine
Itanium 2
7
2002
221M
Historically
AMD has followed just behind Intel
A little bit slower, a lot cheaper
Recently
Recruited top circuit designers from Digital Equipment Corp. Exploited fact that Intel distracted by IA64 Now are close competitors to Intel
CS 105
Addresses
Registers E I P Data Condition Codes Instructions Object Code Program Data OS Data
Stack
Programmer-Visible State
Register File
Heavily used program data
Memory
Byte addressable array
Code, user data, (most) OS
Condition Codes
Store status information about
gcc -O
text
text
binary
binary
10
Generated Assembly
_sum: pushl %ebp movl %esp,%ebp movl 12(%ebp),%eax addl 8(%ebp),%eax movl %ebp,%esp popl %ebp ret
11
CS 105
Assembly Characteristics
Minimal data types
Primitive operations
Perform arithmetic function on register or memory data Transfer data between memory and register
Load data from memory into register Store register data into memory
Transfer control
Unconditional jumps to/from procedures Conditional branches
12
CS 105
Object Code
Code for sum
Assembler
0x401040 <sum>: Binary encoding of each instruction 0x55 Total of 13 0x89 Nearly-complete image of executable bytes 0xe5 code Each 0x8b instruction 1, Missing linkages between code in 0x45 2, or 3 bytes different files 0x0c Starts at 0x03 address Linker 0x45 0x401040 0x08 Resolves references between files 0x89 Combines with static run-time 0xec libraries 0x5d E.g., code for malloc, printf 0xc3
Translates .s into .o
execution
13 CS 105
C Code
Assembly
addl 8(%ebp),%eax
Similar to expression y += x
signed or unsigned
Operands:
y: x: t: Register %eax Memory M[%ebp+8] Register %eax Return function value in %eax
0x401046:
03 45 08
Object Code
14
0x0(%esi),%esi
Disassembler
objdump -d p
15
Useful tool for examining object code Analyzes bit pattern of series of instructions Produces approximate rendition of assembly code Can be run on either a.out (complete executable) or .o file
CS 105
Alternate Disassembly
Object
0x401040: 0x55 0x89 0xe5 0x8b 0x45 0x0c 0x03 0x45 0x08 0x89 0xec 0x5d 0xc3 0x401040 0x401041 0x401043 0x401046 0x401049 0x40104b 0x40104c 0x40104d
Disassembled
<sum>: <sum+1>: <sum+3>: <sum+6>: <sum+9>: <sum+11>: <sum+12>: <sum+13>: push mov mov add mov pop ret lea %ebp %esp,%ebp 0xc(%ebp),%eax 0x8(%ebp),%eax %ebp,%esp %ebp 0x0(%esi),%esi
16
CS 105
Anything that can be interpreted as executable code Disassembler examines bytes and reconstructs assembly source
CS 105
17
Moving Data
Moving Data
movl Source,Dest:
Operand Types
18
Reg Mem
Reg Mem
movl $0x4,%eax
movl $-147,(%eax) movl %eax,%edx movl %eax,(%edx) movl (%eax),%edx
movl
Reg
Mem
Reg
19
(R)
Mem[Reg[R]]
Displacement
D(R)
Mem[Reg[R]+D]
Register R specifies start of memory region Constant displacement D specifies offset movl 8(%ebp),%edx
20
CS 105
void swap(int *xp, int *yp) { int t0 = *xp; int t1 = *yp; *xp = t1; *yp = t0; }
Body
Finish
21
CS 105
Understanding Swap
void swap(int *xp, int *yp) { int t0 = *xp; int t1 = *yp; *xp = t1; *yp = t0; } Offset 12 8 4 yp xp Rtn adr %ebp
Stack
0 Old %ebp
Register %ecx %edx %eax %ebx Variable yp xp t1 t0 -4 Old %ebx movl movl movl movl movl movl 12(%ebp),%ecx 8(%ebp),%edx (%ecx),%eax (%edx),%ebx %eax,(%edx) %ebx,(%ecx) # # # # # # ecx edx eax ebx *xp *yp = = = = = =
22
Address
Understanding Swap
%eax Offset yp xp 12 8 4 %ebp
123
456
0x124
0x120 0x11c 0x118 0x114
%edx
%ecx %ebx
0x110 0x10c
%esi
%edi %esp %ebp 0x104 movl movl movl movl movl movl
0x108
0x104 0x100
0
-4
# # # # # #
= = = = = =
23
Address
Understanding Swap
%eax Offset 0x120 yp xp 12 8 4 %ebp
123
456
0x124
0x120 0x11c 0x118 0x114
%edx
%ecx %ebx
0x110 0x10c
%esi
%edi %esp %ebp 0x104 movl movl movl movl movl movl
0x108
0x104 0x100
0
-4
# # # # # #
= = = = = =
24
Address
Understanding Swap
%eax Offset yp xp 12 8 4 %ebp
123
456
0x124
0x120 0x11c 0x118
%edx
%ecx %ebx
0x124
0x120
%esi
%edi %esp %ebp 0x104 movl movl movl movl movl movl
0x108
0x104 0x100
0
-4
# # # # # #
= = = = = =
25
Address
Understanding Swap
%eax 456 Offset yp xp 12 8 4 %ebp
123
456
0x124
0x120 0x11c 0x118 0x114
%edx
%ecx %ebx
0x124
0x120
0x110 0x10c
%esi
%edi %esp %ebp 0x104 movl movl movl movl movl movl
0x108
0x104 0x100
0
-4
# # # # # #
= = = = = =
26
Address
Understanding Swap
%eax 456 Offset yp xp 12 8 4 %ebp
123
456
0x124
0x120 0x11c 0x118 0x114
%edx
%ecx %ebx
0x124
0x120 123
0x110 0x10c
%esi
%edi %esp %ebp 0x104 movl movl movl movl movl movl
0x108
0x104 0x100
0
-4
# # # # # #
= = = = = =
27
Address
Understanding Swap
%eax 456 Offset yp xp 12 8 4 %ebp
456
456
0x124
0x120 0x11c 0x118 0x114
%edx
%ecx %ebx
0x124
0x120 123
0x110 0x10c
%esi
%edi %esp %ebp 0x104 movl movl movl movl movl movl
0x108
0x104 0x100
0
-4
# # # # # #
= = = = = =
28
Address
Understanding Swap
%eax 456 Offset yp xp 12 8 4 %ebp
456
123
0x124
0x120 0x11c 0x118 0x114
%edx
%ecx %ebx
0x124
0x120 123
0x110 0x10c
%esi
%edi %esp %ebp 0x104 movl movl movl movl movl movl
0x108
0x104 0x100
0
-4
# # # # # #
= = = = = =
29
Mem[Reg[Rb]+S*Reg[Ri]+ D]
D: Constant displacement 1, 2, or 4 bytes Rb: Base register: Any of 8 integer registers Ri: Index register: Any, except for %esp
Unlikely youd use %ebp, either
S:
Scale: 1, 2, 4, or 8
Special Cases
(Rb,Ri) D(Rb,Ri) Mem[Reg[Rb]+Reg[Ri]] Mem[Reg[Rb]+Reg[Ri]+D]
(Rb,Ri,S)
30
Mem[Reg[Rb]+S*Reg[Ri]]
CS 105
%ecx
0x100
31
CS 105
Uses
32
CS 105
Computation
Dest Dest Dest Dest Dest Dest = = = = = = Dest Dest Dest Dest Dest Dest + Src - Src * Src << k >> k >> k
k is an immediate value or contents of %cl xorl Src,Dest Dest = Dest ^ Src andl Src,Dest Dest = Dest & Src orl Src,Dest Dest = Dest | Src
33 CS 105
Computation
Dest Dest Dest Dest = = = = Dest + 1 Dest - 1 -Dest ~Dest
34
CS 105
movl 8(%ebp),%eax movl 12(%ebp),%edx leal (%edx,%eax),%ecx leal (%edx,%edx,2),%edx sall $4,%edx addl 16(%ebp),%ecx leal 4(%edx,%eax),%eax imull %ecx,%eax
movl %ebp,%esp popl %ebp ret
Body
Finish
35
CS 105
Understanding arith
int arith (int x, int y, int z) { int t1 = x+y; int t2 = z+t1; int t3 = x+4; int t4 = y * 48; int t5 = t3 + t4; int rval = t2 * t5; return rval; } movl 8(%ebp),%eax movl 12(%ebp),%edx leal (%edx,%eax),%ecx leal (%edx,%edx,2),%edx sall $4,%edx addl 16(%ebp),%ecx leal 4(%edx,%eax),%eax imull %ecx,%eax # # # # # # # # eax edx ecx edx edx ecx eax eax Offset 16 12 8 4 z y x Rtn adr %ebp
Stack
0 Old %ebp = = = = = = = = x y x+y (t1) 3*y 48*y (t4) z+t1 (t2) 4+t4+x (t5) t5*t2 (rval)
36
CS 105
Understanding arith
int arith (int x, int y, int z) { int t1 = x+y; int t2 = z+t1; int t3 = x+4; int t4 = y * 48; int t5 = t3 + t4; int rval = t2 * t5; return rval; } # eax = x movl 8(%ebp),%eax # edx = y movl 12(%ebp),%edx # ecx = x+y (t1) leal (%edx,%eax),%ecx # edx = 3*y leal (%edx,%edx,2),%edx # edx = 48*y (t4) sall $4,%edx # ecx = z+t1 (t2) addl 16(%ebp),%ecx # eax = 4+t4+x (t5) leal 4(%edx,%eax),%eax # eax = t5*t2 (rval) imull %ecx,%eax
37
CS 105
Another Example
int logical(int x, int y) { int t1 = x^y; int t2 = t1 >> 17; int mask = (1<<13) - 7; int rval = t2 & mask; return rval; } logical: pushl %ebp movl %esp,%ebp Set Up
Finish
CS 105
CISC Properties
Instruction can reference different operand types
39
CS 105
Control
1) loops 2) conditionals 3) switch 4) Proc. call 5) Proc. return
C
mem proc
Assembly
mem Stack
40
regs
alu
1) byte 3) branch/jump 2) 2-byte word 4) call 3) 4-byte long word 5) ret 4) contiguous byte allocation 5) address of initial byte
CS 105
Announced in Feb. 95 Basis for Pentium II, Pentium III, and Celeron processors Pentium 4 similar idea, but different details
Features
41
CS 105
PentiumPro Operation
Translates instructions dynamically into Uops
Consequences
43
Indirect relationship between IA32 code & what actually gets executed Tricky to predict / optimize performance at assembly level
CS 105
Whose Assembler?
Intel/Microsoft Format
lea sub cmp mov eax,[ecx+ecx*2] esp,8 dword ptr [ebp-8],0 eax,dword ptr [eax*4+100h]
GAS/Gnu Format
leal subl cmpl movl (%ecx,%ecx,2),%eax $8,%esp $0,-8(%ebp) $0x100(,%eax,4),%eax
44