/*
  Copyright (c) 2009 nosuke <sasugaanija@gmail.com>

  All rights reserved.

  Redistribution and use in source and binary forms, with or
  without modification, are permitted provided that the
  following conditions are met:

  1. Redistributions of source code must retain the above
     copyright notice, this list of conditions and the
     following disclaimer.
  2. Redistributions in binary form must reproduce the above
     copyright notice, this list of conditions and the
     following disclaimer in the documentation and/or other
     materials provided with the distribution.
  3. Neither the name of authors nor the names of its
     contributors may be used to endorse or promote products
     derived from this software without specific prior written
     permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/


#include <spu_intrinsics.h>
#include <stdio.h>

vec_int4
mul32(vec_int4 a, vec_int4 b)
{
    vec_int4 tmp1, tmp2, tmp3;
    tmp1 = spu_mulh((vec_short8)a, (vec_short8)b);
    tmp2 = spu_mulh((vec_short8)b, (vec_short8)a);
    tmp3 = (vec_int4)spu_mulo((vec_ushort8)b, (vec_ushort8)a);
    return spu_add(tmp3, spu_add(tmp1, tmp2));
}

int
main()
{
    int i;
    int n, tmp;
    vec_int4 vi1, vi2, vi3;

    vi1 = (vec_int4){0x3456789a, 0x00000003, 0x89abcdef, 0x23456789};
    vi2 = (vec_int4){0x00000003, 0x456789ab, 0x12345678, 0x789abcde};

    /* SIMD黻ˤ32bitݤƤӽФ */
    vi3 = mul32(vi1, vi2);

    /* Ƿ׻Ӥ碌 */
    for (i = 0; i < 4; i++) {
        tmp = spu_extract(vi3, i);
        n = spu_extract(vi1, i) * spu_extract(vi2, i);
        if (n != tmp)
            printf("word%d Failed\n", i);
        else
            printf("word%d OK\n", i);
        printf("mul32:  0x%08x\n", tmp);
        printf("answer: 0x%08x\n", n);
    }

    return 0;
}
