1. Strings

Introduction

; strlen (C calling convention)
;   input:
;     s  : pointer to const ASCIIZ string (address at esp+4).
;   algo:
;     use formulae "(x-1) & ~x = 2^first_bit(x) - 1" found in glibc code
;     but used differently. 0xfefefeff allow decrementing all byte of
;     a dword, the first null byte disable carry so the string must be
;     checked in little endian only.
strlen:
        mov eax,[esp+4]         ; get s value
.align: cmp byte [eax],0        ; realign pointer on dword boundaries
        jz .end
        inc eax
        test al,3
        jnz .align
.loop:  mov edx,[eax]           ; test 4 bytes at one time
        lea ecx,[edx+0xfefefeff]
        not edx
        and edx,ecx
        add eax,byte 4
        test edx,0x80808080     ; if one of the byte is null
        jz .loop
.final: shr edx,8               ; check first lo byte
        inc eax
        jnc .final
        sub eax, byte 5         ; realign pointer to null char
.end:   sub eax,[esp+4]
        ret


; strcpy (C calling convention)
;   input:
;     dest : pointer to destination ASCIIZ string (address at esp+4).
;     src  : pointer to source ASCIIZ string (address at esp+8).
;   algo:
;     see strlen above for algorithm.
strcpy:
        mov ecx,[esp+8]                 ; get src
        mov edx,[esp+4]                 ; get dest
.align: mov al,[ecx]
        inc ecx
        mov [edx],al                    ; copy one byte to align source ptr
        inc edx
        test al,al
        jz .ret
        test cl,3
        jnz .align
        mov [esp+8],ebx                 ; ok save ebx as it may be used by gcc
        jmp short .start                ; start is inside the loop.
.loop:  mov [edx],ebx
        add edx,byte 4
.start: mov eax,[ecx]                   ; get dword and search for a null byte
        lea ebx,[eax+0xfefefeff]
        not eax
        and eax,ebx
        sub ebx,0xfefefeff              ; restore ebx to dword read
        add ecx,byte 4
        test eax,0x80808080             ; one of the byte is null?
        jz .loop
.final: mov [edx],bl                    ; search thorugh ebx itself
        ror ebx,8                       ; slow but compact
        inc edx
        test ebx,0xff000000
        jnz .final
        mov ebx,[esp+8]                 ; restore ebx value
.ret:   mov eax,[esp+4]                 ; strcpy should return dest
        ret

Accessing Substrings

Establishing a Default Value

Exchanging Values Without Using Temporary Variables

Converting Between ASCII Characters and Values

Processing a String One Character at a Time

Reversing a String by Word or Character

Expanding and Compressing Tabs

Expanding Variables in User Input

Controlling Case

Interpolating Functions and Expressions Within Strings

Indenting Here Documents

Reformatting Paragraphs

Escaping Characters

Trimming Blanks from the Ends of a String

Parsing Comma-Separated Data

Soundex Matching

Program: fixstyle

Program: psgrep