From 33f3aa7752dedd1ae4bbf9ae6b30eebc5356caed Mon Sep 17 00:00:00 2001 From: "Elf M. Sternberg" Date: Mon, 30 Apr 2018 20:46:25 -0700 Subject: [PATCH] Lessons 7 & 8: puts with newline, and command line argument handling. --- README.md | 46 +++++++++++++++++++++++++++++++++++++++++++- x86/Makefile | 23 ++++++++++++++++------ x86/argv.s | 24 +++++++++++++++++++++++ x86/functions.asm | 16 ++++++++++++++- x86_64/Makefile | 23 ++++++++++++++++------ x86_64/argv.s | 21 ++++++++++++++++++++ x86_64/functions.asm | 16 +++++++++++++++ 7 files changed, 155 insertions(+), 14 deletions(-) create mode 100644 x86/argv.s create mode 100644 x86_64/argv.s diff --git a/README.md b/README.md index 3df4275..67bba33 100644 --- a/README.md +++ b/README.md @@ -157,7 +157,51 @@ one cycle (register copies are *cheap*, people) on my computer that (checks `lshw`) executes approximately 2,870,000 instructions per **second**. -More to come... I hope... +## Lesson 5: Includes + +Lesson 5 takes the functions we wrote in Lesson 4 and moves them into +their own file, so that they can be called multiple time. This means +that the "register abuse" I engaged in in Lesson 4 has to be backed out; +I have to be "good" and use the registers as recommended by the +textbooks, because now they'll have multiple users and the conventions +must be honored in that case. + +## Lesson 7 & 8: Print-with-linefeed and Argv + +Lesson 6 is virtually indistinguishable from Lessons 5 and 7; it's a +tiny jump to using null instead of LF as our terminator, and I was +already doing that. Lesson 7 creates a wrapper around `puts()` that +automatically appends a line-feed to the end of your null-terminated +string. + +This leads into lesson 8, in which the environment provides a new chunk +of memory containing the strings with which the program was initialized, +and pointers to those strings are placed on the stack. The first value +on the stack is the number of pointers. + +With the "add a line feed" wrapper, the original text has you putting +your line-feed string data into the stack, but I cheaped out and made my +line-feed a two-byte (LF + NULL) constant and referred to it by address +instead. + +One thing I did learn here? When I ported it to X86_64, it broke +badly. It turns out that `syscall`, unlike `int 80h`, clobbers the +counter register `rcx`. And since that's what we were using in the +32-bit version as our argv counter, I preserved that semantic in the +64-bit version, which also means I had to modify `putslf()` to push +`rcx` onto the stack and pop it off afterward. + +### Sidebar: A bug! + +Early on in Lesson 4, I spotted and fixed a bug where I had one too many +`pops` off the stack (see +[commit 89b58186](https://github.com/elfsternberg/asmtutorials/commit/89b58186fbc54508891c0077cc3e32b3fed8d7cb#diff-89abeb42c81885d8d2e202657820501bL58), +but what perplexed me is how the system didn't crash with a stack +underflow. Now I know why: the stack had two values on it already: the +counter, and the pointer to the program name, which is always `argv[0]`. +Kinda cool to realize that now. + +More to come... maybe ## Authors diff --git a/x86/Makefile b/x86/Makefile index 85f5099..f38b5c6 100644 --- a/x86/Makefile +++ b/x86/Makefile @@ -9,26 +9,37 @@ LINK_32=elf_i386 default: help -all: hello strlen subroutines ## Build everything at once +PROGRAMS=hello strlen subroutines includes argv -%.o: %.s +all: $(PROGRAMS) ## Build everything at once + +%.o: %.s functions.asm $(NASM) -f $(COMPILE_32) $< +# Lesson 1 was skipped because it was just Lesson 2 without a proper +# exit handler... and who does that? + hello: hello.o ## Build Lesson 2: Print string with known length, exit cleanly $(LD) -m $(LINK_32) -o $@ $< -strlen: strlen.o ## Build Lesson 2: Determine length programmatically +strlen: strlen.o ## Build Lesson 3: Determine length programmatically $(LD) -m $(LINK_32) -o $@ $< -subroutines: subroutines.o ## Build Lesson 3: Separate strlen() and puts() into subroutine. +subroutines: subroutines.o ## Build Lesson 4: Separate strlen() and puts() into subroutine. $(LD) -m $(LINK_32) -o $@ $< -includes: includes.o ## Build Lesson 4: Separate strlen() and puts() into their own files. +includes: includes.o ## Build Lesson 5: Separate strlen() and puts() into their own files. + $(LD) -m $(LINK_32) -o $@ $< + +# Lesson 6 is wrapped in Lesson 7: using null-terminated strings, but +# lesson 7 then adds a line feed, so the jump isn't too big. + +argv: argv.o ## Build Lesson 7 and 8: Line feeds and command line arguments $(LD) -m $(LINK_32) -o $@ $< help: run-help ## Print this helpful message (default) clean: - rm -f hello strlen subroutines *.o + rm -f $(PROGRAMS) *.o include ../makefiles/help.make diff --git a/x86/argv.s b/x86/argv.s new file mode 100644 index 0000000..5c81ce8 --- /dev/null +++ b/x86/argv.s @@ -0,0 +1,24 @@ + ;; Hello World Program #7 & 8: Println (in functions) and command line arguments + +%include "functions.asm" + +section .text + global _start + +;;; We'll show how to access any arguments passed in by the +;;; environment (usually the Shell). + +_start: + pop ecx ; The first object on the stack is ARGC: The number of arguments passed in. + +next_arg: + cmp ecx, 0h ; If there are no arguments, or no more arguments left, exit. + jz exit + pop eax ; The stack currently has pointer to the messages, null-terminated + ; So we pop one off, put it into the register used by putslf, and... + call putslf + dec ecx ; Drop the count by one, and try again. + jmp next_arg + +exit: + call exit_program diff --git a/x86/functions.asm b/x86/functions.asm index a1e1c18..08a4604 100644 --- a/x86/functions.asm +++ b/x86/functions.asm @@ -9,10 +9,16 @@ ;; unistd.h %define STDOUT 1 + ;; A single null-terminated line feed. +section .data + _lf db 0ah, 00h + +section .text + ;; strlen() function. Takes eax as an argument - the pointer to ;; the initial string. Returns eax as the result - the length of ;; the string. - + strlen: push ebx ; We'll be borrowing this register, so we put its ; current value on the stack. @@ -59,6 +65,14 @@ puts: pop edx ret +putslf: + call puts ; Print the string + push eax ; Preserve this register + mov eax, _lf + call puts + pop eax + ret + ;; exit(). Straight from the original. exit_program: diff --git a/x86_64/Makefile b/x86_64/Makefile index 688508d..c3b9a74 100644 --- a/x86_64/Makefile +++ b/x86_64/Makefile @@ -9,27 +9,38 @@ LINK_64=elf_x86_64 default: help -all: hello strlen subroutines ## Build everything at once +PROGRAMS=hello strlen subroutines includes argv -%.o: %.s +all: $(PROGRAMS) ## Build everything at once + +%.o: %.s functions.asm $(NASM) -f $(COMPILE_64) $< +# Lesson 1 was skipped because it was just Lesson 2 without a proper +# exit handler... and who does that? + hello: hello.o ## Build Lesson 2: Print string with known length, exit cleanly $(LD) -m $(LINK_64) -o $@ $< -strlen: strlen.o ## Build Lesson 2: Determine length programmatically +strlen: strlen.o ## Build Lesson 3: Determine length programmatically $(LD) -m $(LINK_64) -o $@ $< -subroutines: subroutines.o ## Build Lesson 3: Separate strlen() and puts() into subroutine. +subroutines: subroutines.o ## Build Lesson 4: Separate strlen() and puts() into subroutine. $(LD) -m $(LINK_64) -o $@ $< -includes: includes.o ## Build Lesson 4: Separate strlen() and puts() into their own files. +includes: includes.o ## Build Lesson 5: Separate strlen() and puts() into their own files. + $(LD) -m $(LINK_64) -o $@ $< + +# Lesson 6 is wrapped in Lesson 7: using null-terminated strings, but +# lesson 7 then adds a line feed, so the jump isn't too big. + +argv: argv.o ## Build Lesson 7 and 8: Line feeds and command line arguments $(LD) -m $(LINK_64) -o $@ $< help: run-help ## Print this helpful message (default) clean: - rm -f hello strlen subroutines *.o + rm -f $(PROGRAMS) *.o include ../makefiles/help.make diff --git a/x86_64/argv.s b/x86_64/argv.s new file mode 100644 index 0000000..bd0a569 --- /dev/null +++ b/x86_64/argv.s @@ -0,0 +1,21 @@ + ;; Hello World Program #5: Includes + +%include "functions.asm" + +section .text + global _start + +_start: + pop rcx ; Counter. The first object is ARGC, the count of arguments. + +next_arg: + cmp rcx, 0h ; If there are no arguments, or no more arguments left, exit. + jz exit + pop rsi ; The stack currently has pointer to the messages, null-terminated + ; So we pop one off, put it into the register used by putslf, and... + call putslf + dec rcx ; Drop the count by one, and try again. + jmp next_arg + +exit: + call exit_program diff --git a/x86_64/functions.asm b/x86_64/functions.asm index 58478d9..8cf4640 100644 --- a/x86_64/functions.asm +++ b/x86_64/functions.asm @@ -9,6 +9,12 @@ ;; unistd.h %define STDOUT 1 + ;; A single null-terminated line feed. +section .data + _lf db 0ah, 00h + +section .text + ;; strlen() function. Takes rdx as an argument - the pointer to ;; the initial string. Returns rdx as the result - the length of ;; the string. @@ -32,6 +38,7 @@ strlen_done: ;; its only argument - the pointer to the beginning of the string. puts: + push rcx ; syscall uses RCX itself push rdx ; RDX (data) is used as the length of the message push rax ; RAX is the instruction to WRITE push rdi ; RDI (destination) is used as an instruction to WRITE @@ -54,6 +61,15 @@ puts: pop rdi pop rax pop rdx + pop rcx + ret + +putslf: + call puts ; Print the string + push rsi ; Preserve this register + mov rsi, _lf + call puts + pop rsi ret ;; exit(). Straight from the original.