/*
 * Written by Solar Designer <solar at openwall.com> in 1998-2010.
 * No copyright is claimed, and the software is hereby placed in the public
 * domain.  In case this attempt to disclaim copyright and place the software
 * in the public domain is deemed null and void, then the software is
 * Copyright (c) 1998-2010 Solar Designer and it is hereby released to the
 * general public under the following terms:
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted.
 *
 * There's ABSOLUTELY NO WARRANTY, express or implied.
 *
 * See crypt_blowfish.c for more information.
 */

#ifdef __i386__

#if defined(__OpenBSD__) && !defined(__ELF__)
#define UNDERSCORES
#define ALIGN_LOG
#endif

#if defined(__CYGWIN32__) || defined(__MINGW32__)
#define UNDERSCORES
#endif

#ifdef __DJGPP__
#define UNDERSCORES
#define ALIGN_LOG
#endif

#ifdef UNDERSCORES
#define _BF_body_r			__BF_body_r
#endif

#ifdef ALIGN_LOG
#define DO_ALIGN(log)			.align (log)
#elif defined(DUMBAS)
#define DO_ALIGN(log)			.align 1 << log
#else
#define DO_ALIGN(log)			.align (1 << (log))
#endif

#define BF_FRAME			0x200
#define ctx				%esp

#define BF_ptr				(ctx)

#define S(N, r)				N+BF_FRAME(ctx,r,4)
#ifdef DUMBAS
#define P(N)				0x1000+N+N+N+N+BF_FRAME(ctx)
#else
#define P(N)				0x1000+4*N+BF_FRAME(ctx)
#endif

/*
 * This version of the assembly code is optimized primarily for the original
 * Intel Pentium but is also careful to avoid partial register stalls on the
 * Pentium Pro family of processors (tested up to Pentium III Coppermine).
 *
 * It is possible to do 15% faster on the Pentium Pro family and probably on
 * many non-Intel x86 processors, but, unfortunately, that would make things
 * twice slower for the original Pentium.
 *
 * An additional 2% speedup may be achieved with non-reentrant code.
 */

#define L				%esi
#define R				%edi
#define tmp1				%eax
#define tmp1_lo				%al
#define tmp2				%ecx
#define tmp2_hi				%ch
#define tmp3				%edx
#define tmp3_lo				%dl
#define tmp4				%ebx
#define tmp4_hi				%bh
#define tmp5				%ebp

.text

#define BF_ROUND(L, R, N) \
	xorl L,tmp2; \
	xorl tmp1,tmp1; \
	movl tmp2,L; \
	shrl $16,tmp2; \
	movl L,tmp4; \
	movb tmp2_hi,tmp1_lo; \
	andl $0xFF,tmp2; \
	movb tmp4_hi,tmp3_lo; \
	andl $0xFF,tmp4; \
	movl S(0,tmp1),tmp1; \
	movl S(0x400,tmp2),tmp5; \
	addl tmp5,tmp1; \
	movl S(0x800,tmp3),tmp5; \
	xorl tmp5,tmp1; \
	movl S(0xC00,tmp4),tmp5; \
	addl tmp1,tmp5; \
	movl 4+P(N),tmp2; \
	xorl tmp5,R

#define BF_ENCRYPT_START \
	BF_ROUND(L, R, 0); \
	BF_ROUND(R, L, 1); \
	BF_ROUND(L, R, 2); \
	BF_ROUND(R, L, 3); \
	BF_ROUND(L, R, 4); \
	BF_ROUND(R, L, 5); \
	BF_ROUND(L, R, 6); \
	BF_ROUND(R, L, 7); \
	BF_ROUND(L, R, 8); \
	BF_ROUND(R, L, 9); \
	BF_ROUND(L, R, 10); \
	BF_ROUND(R, L, 11); \
	BF_ROUND(L, R, 12); \
	BF_ROUND(R, L, 13); \
	BF_ROUND(L, R, 14); \
	BF_ROUND(R, L, 15); \
	movl BF_ptr,tmp5; \
	xorl L,tmp2; \
	movl P(17),L

#define BF_ENCRYPT_END \
	xorl R,L; \
	movl tmp2,R

DO_ALIGN(5)
.globl _BF_body_r
_BF_body_r:
	movl 4(%esp),%eax
	pushl %ebp
	pushl %ebx
	pushl %esi
	pushl %edi
	subl $BF_FRAME-8,%eax
	xorl L,L
	cmpl %esp,%eax
	ja BF_die
	xchgl %eax,%esp
	xorl R,R
	pushl %eax
	leal 0x1000+BF_FRAME-4(ctx),%eax
	movl 0x1000+BF_FRAME-4(ctx),tmp2
	pushl %eax
	xorl tmp3,tmp3
BF_loop_P:
	BF_ENCRYPT_START
	addl $8,tmp5
	BF_ENCRYPT_END
	leal 0x1000+18*4+BF_FRAME(ctx),tmp1
	movl tmp5,BF_ptr
	cmpl tmp5,tmp1
	movl L,-8(tmp5)
	movl R,-4(tmp5)
	movl P(0),tmp2
	ja BF_loop_P
	leal BF_FRAME(ctx),tmp5
	xorl tmp3,tmp3
	movl tmp5,BF_ptr
BF_loop_S:
	BF_ENCRYPT_START
	BF_ENCRYPT_END
	movl P(0),tmp2
	movl L,(tmp5)
	movl R,4(tmp5)
	BF_ENCRYPT_START
	BF_ENCRYPT_END
	movl P(0),tmp2
	movl L,8(tmp5)
	movl R,12(tmp5)
	BF_ENCRYPT_START
	BF_ENCRYPT_END
	movl P(0),tmp2
	movl L,16(tmp5)
	movl R,20(tmp5)
	BF_ENCRYPT_START
	addl $32,tmp5
	BF_ENCRYPT_END
	leal 0x1000+BF_FRAME(ctx),tmp1
	movl tmp5,BF_ptr
	cmpl tmp5,tmp1
	movl P(0),tmp2
	movl L,-8(tmp5)
	movl R,-4(tmp5)
	ja BF_loop_S
	movl 4(%esp),%esp
	popl %edi
	popl %esi
	popl %ebx
	popl %ebp
	ret

BF_die:
/* Oops, need to re-compile with a larger BF_FRAME. */
	hlt
	jmp BF_die

#endif

#if defined(__ELF__) && defined(__linux__)
.section .note.GNU-stack,"",@progbits
#endif