diff --git a/.gitignore b/.gitignore index 4411f23..734a4fe 100644 --- a/.gitignore +++ b/.gitignore @@ -2,9 +2,7 @@ *~ \#* .\#* -hello32 -hello64 -counted-hello32 -counted-hello64 -subroutine-hello32 -subroutine-hello64 +hello +strlen +subroutines +includes diff --git a/x86/Makefile b/x86/Makefile index 5b62d27..85f5099 100644 --- a/x86/Makefile +++ b/x86/Makefile @@ -23,6 +23,9 @@ strlen: strlen.o ## Build Lesson 2: Determine length programmatically subroutines: subroutines.o ## Build Lesson 3: Separate strlen() and puts() into subroutine. $(LD) -m $(LINK_32) -o $@ $< +includes: includes.o ## Build Lesson 4: Separate strlen() and puts() into their own files. + $(LD) -m $(LINK_32) -o $@ $< + help: run-help ## Print this helpful message (default) clean: diff --git a/x86/functions.asm b/x86/functions.asm new file mode 100644 index 0000000..a1e1c18 --- /dev/null +++ b/x86/functions.asm @@ -0,0 +1,68 @@ + ;; This is an includes file. It doesn't have its own compilation + ;; capabilities. There is no namespacing in assembly language; + ;; try not to use these names in your own code. + + ;; sys/unistd_32.h +%define SYS_write 4 +%define SYS_exit 1 + + ;; unistd.h +%define STDOUT 1 + + ;; strlen() function. Takes eax as an argument - the pointer to + ;; the initial string. Returns eax as the result - the length of + ;; the string. + +strlen: + push ebx ; We'll be borrowing this register, so we put its + ; current value on the stack. + mov ebx, eax + + ;; Note that even though these have underscores to indicate that + ;; outside users should not use them, they're still globally + ;; accesible in this program's namespace. A user could + ;; theoretically call _strlen_done from anywhere. Assembly gives + ;; you ALL the opportunities to shoot yourself in the foot! +_strlen_next: + cmp byte [eax], 0 + jz _strlen_done + inc eax + jmp _strlen_next + +_strlen_done: + sub eax, ebx + pop ebx ; Restore the register + ret + + ;; Puts() function - puts a string to the console. Takes EAX as + ;; its only argument - the pointer to the beginning of the string. + +puts: + push edx + push ecx + push ebx + push eax + call strlen ; Uses EAX as the pointer to the beginning of the string. + ; Returns EAX as the length of the string + + mov edx, eax ; Move the length of the string into EDX, where WRITE expects + pop eax ; Restore EAX from the stack + + push eax ; Put the value BACK on the stack. + mov ecx, eax ; Put the pointer to the message into ECX, where WRITE expects + mov ebx, STDOUT + mov eax, SYS_write + int 80h + pop eax ; Restore registers in reverse order. + pop ebx + pop ecx + pop edx + ret + + ;; exit(). Straight from the original. + +exit_program: + mov ebx, 0 + mov eax, SYS_exit + int 80h + diff --git a/x86/hello.s b/x86/hello.s index 63aad58..e273a61 100755 --- a/x86/hello.s +++ b/x86/hello.s @@ -1,31 +1,46 @@ - ;; Hello World Program #1 + ;; Hello World Program #2 ;; Compile with: nasm -f elf hello.s ;; Link with: ld -m elf_i386 -o hello hello.o ;; Run with: ./hello + ;; The following includes are derived from: ;; sys/unistd_32.h %define SYS_write 4 %define SYS_exit 1 + ;; The following includes are derived from: ;; unistd.h %define STDOUT 1 +;;; 'global' is the directive that tells the NASM the address of the +;;; first instruction to run. + +global _start + section .data msg db "Hello You Beautiful Human", 0Ah - len equ $-msg ; NASM-supplied macro + len equ $-msg ; The $ is a NASM helper that means + ; "The address of the current + ; instruction. 'equ' is a NASM + ; macro that performs the math and + ; places in the 'len' constant the + ; difference between the start of the + ; current instruction and the 'msg'. section .text - global _start - _start: - mov edx, len - mov ecx, msg ; Address of the message (not the content) + mov edx, len ; Mov the address of 'len' to register EDX + mov ecx, msg ; Mov the address of the message (not the content) mov ebx, STDOUT ; using STDOUT (see definition above) - mov eax, SYS_write ; Using WRITE in 32-bit mode? - int 80h ; Interrupt target. The 'h' means 'hexidecimal' + mov eax, SYS_write ; Acesss WRITE in 32-bit Linux + int 80h ; Call the kernel to run the WRITE command. - mov ebx, 0 - mov eax, SYS_exit - int 80h + ;; Note that it's always register AX that we use to tell the kernel + ;; what we want it to do. Depending on the kernel instruction, other + ;; registers may be used to fill out the command. + + mov ebx, 0 ; Mov '0' to be our exit code + mov eax, SYS_exit ; Access EXIT + int 80h ; Call the kernel to run the EXIT command. diff --git a/x86/includes.s b/x86/includes.s new file mode 100644 index 0000000..3e49364 --- /dev/null +++ b/x86/includes.s @@ -0,0 +1,15 @@ + ;; Hello World Program #5: Includes + +%include "functions.asm" + +section .data + msg db "Cool, we can now do something like structured programming.", 0Ah, 00h + +section .text + global _start + +_start: + mov eax, msg ; Put the address of our message into eax. + call puts ; Calls strlen internally! + call exit_program + diff --git a/x86/strlen.s b/x86/strlen.s index e01d116..f5e376e 100755 --- a/x86/strlen.s +++ b/x86/strlen.s @@ -10,11 +10,12 @@ ;; unistd.h %define STDOUT 1 +global _start + section .data msg db "Hello You Beautiful Human, You're Looking Fine Today!", 0Ah, 00h section .text - global _start _start: mov ebx, msg ; Move the address of the message into ebx @@ -28,16 +29,19 @@ nextchar: ;; sub does. cmp sets flags; does sub? This is why 'jz' works, ;; because if they're equal the result of subtraction is zero. jz counted ; Jump if the zero flag set - inc eax - jmp nextchar + inc eax ; Increment the counter + jmp nextchar ; Jump to the beginning of the loop counted: - sub eax, ebx ; Subtract the end from the start, and the result goes into the start + sub eax, ebx ; Subtract the end from the start, and + ; the result goes into the start + + mov edx, eax ; syswrite needs that register for something + ; else! Man, picking registers is hard. - mov edx, eax ; syswrite needs that register for something else! Man, picking registers is hard. mov ecx, msg ; Address of the message (not the content) mov ebx, STDOUT ; using STDOUT (see definition above) - mov eax, SYS_write ; Using WRITE in 32-bit mode? + mov eax, SYS_write ; Using WRITE in 32-bit mode. int 80h ; Interrupt target. The 'h' means 'hexidecimal' mov ebx, 0 diff --git a/x86/subroutines.s b/x86/subroutines.s index a918e69..413a8d5 100644 --- a/x86/subroutines.s +++ b/x86/subroutines.s @@ -23,7 +23,6 @@ _start: call exit strlen: - ; will probably want its state restored correctly, right? mov edx, ecx strlen_next: @@ -33,7 +32,7 @@ strlen_next: jmp strlen_next strlen_done: - sub edx, ecx ; Straight from the counted-hello file + sub edx, ecx ; Straight from the strlen file ret ;; Takes EAX as the address of the message and EDX as the diff --git a/x86_64/Makefile b/x86_64/Makefile index e2444b5..688508d 100644 --- a/x86_64/Makefile +++ b/x86_64/Makefile @@ -23,6 +23,9 @@ strlen: strlen.o ## Build Lesson 2: Determine length programmatically subroutines: subroutines.o ## Build Lesson 3: Separate strlen() and puts() into subroutine. $(LD) -m $(LINK_64) -o $@ $< +includes: includes.o ## Build Lesson 4: Separate strlen() and puts() into their own files. + $(LD) -m $(LINK_64) -o $@ $< + help: run-help ## Print this helpful message (default) clean: diff --git a/x86_64/functions.asm b/x86_64/functions.asm new file mode 100644 index 0000000..58478d9 --- /dev/null +++ b/x86_64/functions.asm @@ -0,0 +1,64 @@ + ;; This is an includes file. It doesn't have its own compilation + ;; capabilities. There is no namespacing in assembly language; + ;; try not to use these names in your own code. + + ;; sys/unistd_32.h +%define SYS_write 1 +%define SYS_exit 60 + + ;; unistd.h +%define STDOUT 1 + + ;; strlen() function. Takes rdx as an argument - the pointer to + ;; the initial string. Returns rdx as the result - the length of + ;; the string. + +strlen: + push rsi + mov rsi, rdx + +strlen_next: + cmp byte [rdx], 0 + jz strlen_done + inc rdx + jmp strlen_next + +strlen_done: + sub rdx, rsi + pop rsi + ret + + ;; puts() function - puts a string to the console. Takes RSI as + ;; its only argument - the pointer to the beginning of the string. + +puts: + push rdx ; RDX (data) is used as the length of the message + push rax ; RAX is the instruction to WRITE + push rdi ; RDI (destination) is used as an instruction to WRITE + + mov rdx, rsi + call strlen + + ;; Because I chose to use RDX as the target for strlen, I don't need to + ;; do the pop/push dance as I did in the 32-bit version. + + mov rdi, STDOUT ; using STDOUT (see definition above) + mov rax, SYS_write ; Using WRITE in 32-bit mode? + syscall + + ;; Likewise, because I chose RSI as the Source (pointer) to my + ;; message, it remains unchanged during the lifetime of this + ;; routine (unless there are any bugs in strlen) and I don't + ;; have to restore it either. + + pop rdi + pop rax + pop rdx + ret + + ;; exit(). Straight from the original. + +exit_program: + mov rdi, 0 + mov rax, SYS_exit + syscall diff --git a/x86_64/hello.s b/x86_64/hello.s index 1b8ae3a..69515a6 100644 --- a/x86_64/hello.s +++ b/x86_64/hello.s @@ -1,30 +1,45 @@ - ;; Hello World Program #1 + ;; Hello World Program #2 ;; Compile with: nasm -f elf64 hello.s - ;; Link with: ld -o hello hello.o + ;; Link with: ld -m elf_x86_64 -o hello hello.o ;; Run with: ./hello + ;; The following includes are derived from: ;; sys/unistd_64.h %define SYS_write 1 %define SYS_exit 60 + ;; The following includes are derived from: ;; unistd.h %define STDOUT 1 +;;; 'global' is the directive that tells the NASM the address of the +;;; first instruction to run. + +global _start + section .data msg db "Hello You Beautiful Human", 0Ah - len equ $-msg ; NASM-supplied macro + len equ $-msg ; The $ is a NASM helper that means + ; "The address of the current + ; instruction. 'equ' is a NASM + ; macro that performs the math and + ; places in the 'len' constant the + ; difference between the start of the + ; current instruction and the 'msg'. section .text - global _start - _start: - mov rdx, len ; Length of the message - mov rsi, msg ; Address of the message + mov rdx, len ; Mov the address of 'len' to register RDX + mov rsi, msg ; Mov the address of the message to RSI (Source Index) mov rdi, STDOUT ; using STDOUT (see definition above) - mov rax, SYS_write ; Using WRITE in 32-bit mode? - syscall + mov rax, SYS_write ; Access WRITE in 64-bit linux + syscall ; Call the kernel to run the WRITE command. - mov rdi, 0 - mov rax, SYS_exit - syscall + ;; Note that it's always register AX that we use to tell the kernel + ;; what we want it to do. Depending on the kernel instruction, other + ;; registers may be used to fill out the command. + + mov rdi, 0 ; Mov '0' to be our exit code + mov rax, SYS_exit ; Access the exit command + syscall ; Call the kernel to run the EXIT command. diff --git a/x86_64/includes.s b/x86_64/includes.s new file mode 100644 index 0000000..67518cb --- /dev/null +++ b/x86_64/includes.s @@ -0,0 +1,14 @@ + ;; Hello World Program #5: Includes + +%include "functions.asm" + +section .data + msg db "Cool, we can now do something like structured programming - 64 bit version.", 0Ah, 00h + +section .text + global _start + +_start: + mov rsi, msg ; Put the address of our message into eax. + call puts ; Calls strlen internally! + call exit_program diff --git a/x86_64/strlen.s b/x86_64/strlen.s index 98f1daa..721ce07 100644 --- a/x86_64/strlen.s +++ b/x86_64/strlen.s @@ -1,7 +1,7 @@ - ;; Hello World Program #1 - ;; Compile with: nasm -f elf64 hello.s - ;; Link with: ld -o hello hello.o - ;; Run with: ./hello + ;; Hello World Program #3 + ;; Compile with: nasm -f elf64 strlen.s + ;; Link with: ld -o strlen strlen.o + ;; Run with: ./strlen ;; sys/unistd_64.h %define SYS_write 1 @@ -10,19 +10,20 @@ ;; unistd.h %define STDOUT 1 +global _start + section .data msg db "Hello You Beautiful Human, You're Looking Mighty Fine!", 0Ah, 00h -section .text - global _start +section .text _start: mov rsi, msg ; Move the address of the message into rsi mov rax, rsi ; Move the address of the message into rax ; (Register-to-register copying is faster that a constant!) nextchar: - cmp byte [rax], 0 ; Compare the byte pointed to by eax with zero + cmp byte [rax], 0 ; Compare the byte pointed to by rax with zero ;; Small detail: cmp and sub use the same internal architecture, @@ -30,16 +31,16 @@ nextchar: ;; sub does. cmp sets flags; does sub? This is why 'jz' works, ;; because if they're equal the result of subtraction is zero. jz counted ; Jump if the zero flag set - inc rax - jmp nextchar + inc rax ; Increment rax by 1 + jmp nextchar ; Jump to the beginning of the loop counted: sub rax, rsi ; Substract source from endpointer, leaving counter mov rdx, rax ; Length of the message mov rsi, msg ; Address of the message mov rdi, STDOUT ; using STDOUT (see definition above) - mov rax, SYS_write ; Using WRITE in 32-bit mode? - syscall + mov rax, SYS_write ; Using WRITE in 64-bin mode. + syscall ; Call the kernel mov rdi, 0 mov rax, SYS_exit