;
;	Assembler versions of the darkness routines
;

[BITS 32]
[SECTION .text]

; Declare public symbols

GLOBAL _darken_x86_32mmx
GLOBAL darken_x86_32mmx
GLOBAL _darken_x86_32smmx
GLOBAL darken_x86_32smmx
GLOBAL _darken_x86_16
GLOBAL darken_x86_16
GLOBAL _darken_x86_16s
GLOBAL darken_x86_16s
GLOBAL _darken_x86_8
GLOBAL darken_x86_8
GLOBAL _darken_x86_8s
GLOBAL darken_x86_8s
GLOBAL _darken_x86_blit8
GLOBAL darken_x86_blit8
GLOBAL _darken_x86_blit16
GLOBAL darken_x86_blit16
GLOBAL _darken_x86_blit32
GLOBAL darken_x86_blit32

;	MMX darkness code

_darken_x86_32mmx:
 darken_x86_32mmx:
	push ebp
	mov ebp,esp

	pushad
	emms

	mov edi,[ebp+8]			;Dest
	mov esi,[ebp+12]		;Source
	mov ecx,[ebp+16]		;Len

darkloop32:
	mov al,[esi]       ; get the source pixel
;	shl al,3           ; Expand to 8 bits
	mov edx,[edi]      ; get the dest pixel

	; Replicate source level throughout EAX

	mov ah,al
	shl eax,8
	mov al,ah
	shl eax,8
	mov al,ah

	; Do the thing

	movd mm0,edx
	movd mm1,eax
	psubusb mm0,mm1
	movd eax,mm0
	mov [edi],eax		; Write output

	; Next pixel

	add edi,4
	inc esi                         ; inc byte source
	loop darkloop32

	emms
	popad
	pop ebp
	ret

; Single colour (not using a darkmap)

_darken_x86_32smmx:
 darken_x86_32smmx:
	push ebp
	mov ebp,esp

	pushad
	emms

	mov edi,[ebp+8]			;Dest
	mov esi,[ebp+12]		;Source colour (not address)
	mov ecx,[ebp+16]		;Len

darkloop32s:
	mov eax,esi       ; get the source pixel
	mov edx,[edi]      ; get the dest pixel

	; Replicate source level throughout EAX

	mov ah,al
	shl eax,8
	mov al,ah
	shl eax,8
	mov al,ah

	; Do the thing

	movd mm0,edx
	movd mm1,eax
	psubusb mm0,mm1
	movd eax,mm0

	; We use colour separation in the roof projector, and 0 is transparent
	; So we need to make it non-zero unless it is supposed to be transparent

	test eax,0xffffffff ; Is it zero?
	jnz dark32noclip
	test edx,0xffffffff ; If it's meant to be 0, don't adjust it
	jz dark32noclip
	or eax,0x01000000
dark32noclip:

	mov [edi],eax		; Write output

	; Next pixel

	add edi,4
	loop darkloop32s

	emms
	popad
	pop ebp
	ret

;	16bpp darkness code

_darken_x86_16:
darken_x86_16:
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]	        ;Dest
	mov esi,[ebp+12]		;Source
	mov ecx,[ebp+16]        ;Len
	mov ebx,[ebp+20]        ;LUT address

	xor edx,edx          ; Clear high bits

darkloop16:
	mov al,[esi]       ; get the source pixel
	mov dx,[edi]       ; get the dest pixel

	; Consult lookuptable to get correct lighting value
	; shl 13 instead of shl 16 converts light level to 5-bit, effective shr 3

	; ax = I32_clut[(ax*65536)+dx]
	and eax,0x000000f8				; ax = (ax)
	shl eax,13						; ax = (ax * 65536)
	add eax,edx						; ax = (ax * 65536)+dx
	shl eax,1						; align to 16 bit array
	add eax,ebx						; I32_clut[(ax * 65536)+dx]
	mov ax,[eax]					; ax = I32_clut[(ax * 65536)+dx]

	mov [edi],ax					; Write output

	inc edi							; inc word output
	inc edi
	inc esi							; inc byte source
	loop darkloop16

	popad
	pop ebp
	ret

;   Single colour (not using a darkmap)

_darken_x86_16s:
darken_x86_16s:
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]	        ;Dest
	mov esi,[ebp+12]		;Source colour (not address)
	mov ecx,[ebp+16]        ;Len
	mov ebx,[ebp+20]        ;LUT address

	xor edx,edx          ; Clear high bits

darkloop16s:
	mov eax,esi        ; get the source pixel
	mov dx,[edi]       ; get the dest pixel

	; Consult lookuptable to get correct lighting value
	; shl 13 instead of shl 16 converts light level to 5-bit, effective shr 3

	; ax = I32_clut[(ax*65536)+dx]
	and eax,0x000000f8				; ax = (ax)
	shl eax,13						; ax = (ax * 65536)
	add eax,edx						; ax = (ax * 65536)+dx
	shl eax,1						; align to 16 bit array
	add eax,ebx						; I32_clut[(ax * 65536)+dx]
	mov ax,[eax]					; ax = I32_clut[(ax * 65536)+dx]

	mov [edi],ax					; Write output

	inc edi							; inc word output
	inc edi
	loop darkloop16s

	popad
	pop ebp
	ret

;	8bpp darkness code

_darken_x86_8:
darken_x86_8:
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]	        ;Dest
	mov esi,[ebp+12]		;Source
	mov ecx,[ebp+16]        ;Len
	mov ebx,[ebp+20]        ;LUT address

	xor edx,edx          ; Clear high bits

darkloop8:
	mov ah,[esi]       ; get the source pixel
	mov al,[edi]       ; get the dest pixel

	; Consult lookuptable to get correct lighting value

	and eax,0x0000ffff				; Clear to 64k space
	add eax,ebx						; Add LUT base
	mov al,[eax]					; Get colour from table
	mov [edi],al					; Write output

	inc edi							; inc byte output
	inc esi							; inc byte source
	loop darkloop8

	popad
	pop ebp
	ret

_darken_x86_8s:
darken_x86_8s:
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]	        ;Dest
	mov esi,[ebp+12]		;Source colour (not address)
	mov ecx,[ebp+16]        ;Len
	mov ebx,[ebp+20]        ;LUT address

	xor edx,edx          ; Clear high bits
	shl esi,8            ; move to AH position

; Single colour (not using a darkmap)

darkloop8s:
	mov eax,esi       ; get the source pixel (This is already in AH position)
	mov al,[edi]       ; get the dest pixel

	; Consult lookuptable to get correct lighting value

	and eax,0x0000ffff				; Clear to 64k space
	add eax,ebx						; Add LUT base
	mov al,[eax]					; Get colour from table
	mov [edi],al					; Write output

	inc edi							; inc byte output
	loop darkloop8s

	popad
	pop ebp
	ret

; Bitmap Combining

; 8bpp

_darken_x86_blit8:
 darken_x86_blit8:
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]			;Dest
	mov esi,[ebp+12]		;Source
	mov ecx,[ebp+16]		;Len

blitloop8:
	lodsb
	test al,0xff
	jz blitskip8
	mov [edi],al
blitskip8:
	inc edi
	loop blitloop8

	popad
	pop ebp
	ret

; 16bpp

_darken_x86_blit16:
 darken_x86_blit16:
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]			;Dest
	mov esi,[ebp+12]		;Source
	mov ecx,[ebp+16]		;Len

blitloop16:
	lodsw
	test ax,0xffff
	jz blitskip16
	mov [edi],ax
blitskip16:
	inc edi
	inc edi
	loop blitloop16

	popad
	pop ebp
	ret

; 32bpp

_darken_x86_blit32:
 darken_x86_blit32:
	push ebp
	mov ebp,esp

	pushad

	mov edi,[ebp+8]			;Dest
	mov esi,[ebp+12]		;Source
	mov ecx,[ebp+16]		;Len

blitloop32:
	lodsd
	test eax,0xffffffff
	jz blitskip32
	mov [edi],eax
blitskip32:
	inc edi
	inc edi
	inc edi
	inc edi
	loop blitloop32

	popad
	pop ebp
	ret

