// $Id: bonasse.h,v 1.2 2010-03-29 01:23:35 eiki Exp $

#if defined( USE_SSE4 ) && !defined( USE_SSE )
#define USE_SSE
#endif

#ifdef USE_SSE
#ifdef USE_SSE4
#include <smmintrin.h>
#else
#include <emmintrin.h>
#endif
#endif



#ifdef USE_SSE

 // NOTE: testz : sse4   insert : sse3

#define castbb(bb)   (*((__m128i*)(&(bb))))
#define castmm(mm)   (*((bitboard_t*)(&(mm))))
#define MMZERO       _mm_setzero_si128()
#define MMFF         _mm_set1_epi8(0xff)

#define BBIniS(bb)                (bb) = MMZERO

#ifdef USE_SSE4
#define BBToUS(bb)                (! _mm_testc_si128(MMZERO, castbb(bb)))

//#define BBToUShift(bb)           ((bb).p[0]<<2 | (bb).p[1]<<1 | (bb).p[2])
//#define PopuCount(bb)            popu_count012( bb.p[0], bb.p[1], bb.p[2] )
//#define FirstOne(bb)             first_one012( bb.p[0], bb.p[1], bb.p[2] )
//#define LastOne(bb)              last_one210( bb.p[2], bb.p[1], bb.p[0] )

#define BBCmpS(bb1,bb2) \
    (! _mm_testc_si128(MMZERO, _mm_xor( castbb(bb1), castbb(bb2) )))
#endif

#define BBNotS(bb,bb1)           bb = _mm_andnot_si128( castbb(bb1), MMFF )

#define BBOrS(bb,bb1,bb2)        bb = _mm_or_si128( castbb(bb1), castbb(bb2) )

#define BBAndS(bb,bb1,bb2)       bb = _mm_and_si128( castbb(bb1), castbb(bb2) )

#define BBXorS(bb,bb1,bb2)       bb = _mm_xor_si128( castbb(bb1), castbb(bb2) )

#define BBAndOrS(bb,bb1,bb2)    \
    bb = _mm_or_si128( (bb), _mm_and_si128( castbb(bb1), castbb(bb2) ) )

#define BBNotAndS(bb,bb1)        bb = _mm_andnot_si128( castbb(bb1), (bb) )


//#define BBContractShift(bb1,bb2) ( ( (bb1).p[0] & (bb2).p[0] ) << 2        
//                                     | ( (bb1).p[1] & (bb2).p[1] ) << 1   
//                                     | ( (bb1).p[2] & (bb2).p[2] ) )

#ifdef USE_SSE4
#define BBContractS(bb1,bb2)    (! _mm_testz_si128( castbb(bb1), castbb(bb2) ))
#endif

#ifdef USE_XOR_SSE
#define Xor(i,bb)    castbb(bb) = \
           _mm_xor_si128( castbb(bb), castbb(abb_mask[i]) )

#define XorFile(i,bb)    castbb(bb) = \
           _mm_xor_si128( castbb(bb), castbb(abb_mask_rl90[i]) )

#define XorDiag1(i,bb)   castbb(bb) = \
           _mm_xor_si128( castbb(bb), castbb(abb_mask_rr45[i]) )

#define XorDiag2(i,bb)   castbb(bb) = \
           _mm_xor_si128( castbb(bb), castbb(abb_mask_rl45[i]) )

#define SetClear(bb)     castbb(bb) = \
           _mm_xor_si128( castbb(bb), (bb_set_clear) )

#define SetClearFile(i1,i2,bb) castbb(bb) = _mm_xor_si128( castbb(bb), \
       _mm_or_si128( castbb(abb_mask_rl90[i1]), castbb(abb_mask_rl90[i2]) ) )

#define SetClearDiag1(i1,i2,bb) castbb(bb) = _mm_xor_si128( castbb(bb), \
       _mm_or_si128( castbb(abb_mask_rr45[i1]), castbb(abb_mask_rr45[i2]) ) )

#define SetClearDiag2(i1,i2,bb) castbb(bb) = _mm_xor_si128( castbb(bb), \
       _mm_or_si128( castbb(abb_mask_rl45[i1]), castbb(abb_mask_rl45[i2]) ) )
#endif


#define AttackFileS(i)  castbb(AttackFile(i))
#define AttackDiag1S(i)  castbb(AttackDiag1(i))
#define AttackDiag2S(i)  castbb(AttackDiag2(i))

#define AttackRankS(i) \
 (aslide[i].ir0 == 0 ? _mm_set_epi32(0, 0, 0, AttackRank(i)) : \
  aslide[i].ir0 == 1 ? _mm_set_epi32(0, 0, AttackRank(i), 0) : \
  aslide[i].ir0 == 2 ? _mm_set_epi32(0, AttackRank(i), 0, 0) : \
                       _mm_set_epi32(AttackRank(i), 0, 0, 0)     )

#define AttackBishopS(bb,i)   BBOrS( bb, AttackDiag1(i), AttackDiag2(i) )
#endif


