/*
  Copyright (c) 2010 nosuke <sasugaanija@gmail.com>

  All rights reserved.

  Redistribution and use in source and binary forms, with or
  without modification, are permitted provided that the
  following conditions are met:

  1. Redistributions of source code must retain the above
     copyright notice, this list of conditions and the
     following disclaimer.
  2. Redistributions in binary form must reproduce the above
     copyright notice, this list of conditions and the
     following disclaimer in the documentation and/or other
     materials provided with the distribution.
  3. Neither the name of authors nor the names of its
     contributors may be used to endorse or promote products
     derived from this software without specific prior written
     permission.

  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
  MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <malloc.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <arpa/inet.h>
#include <pthread.h>
#include <spurs/spha/spha.h>

#define HOST_NUM 4 // ۥȤΥåɤο
#define SPE_NUM 4  // SPEåɤο

static spha_session_t session;
// ݸѥޥեͭꡤХꥢѥåݡȡ2
static spha_object_t csem, shmem, bmp1, bmp2;

// pathǻꤷSPEץɤ߽Ф
void
load_spe_program(const char *path, int32_t *size, uint8_t **buf,
                 uint32_t *buf_len)
{
    struct stat prog_stat;
    FILE *fp = fopen(path, "r");
    stat(path, &prog_stat);
    *size = prog_stat.st_size;
    *buf_len = (*size + 127) & ~127;
    *buf = memalign(128, *buf_len);
    fread(*buf, 1, *size, fp);
    fclose(fp);
}

// åݡȤ2ĻȤäХꥢƱ
void
barrier(void)
{
    uint32_t size, seq, dat;
    // åݡ1Υֹ椬1ʾˤʤޤԤ
    size = sizeof(dat);
    spha_wait_message_port(session, bmp1, SP3_MPORT_WAIT_NEXT_SEQ, 0,
                           SP3_TIMEOUT_INDEFINITE, &seq, &dat, &size);

    // åݡ1Ŭͤ񤤤ơֹ1䤹
    spha_signal_message_port(session, bmp1, SP3_MPORT_SIGNAL_ASYNC, 0,
                             &dat, sizeof(dat), &seq);

    if (seq == SPE_NUM + HOST_NUM + 1) {
        // Ǹ˥åݡ1Υֹ䤷åɤϡ
        // ɤ˥åݡ1Υֹ0ˤƤ
        spha_signal_message_port(session, bmp1, SP3_MPORT_SIGNAL_UPDATE, 0,
                                 &dat, sizeof(dat), &seq);

        // Хꥢ٤åݡ2Υֹ1ˤ
        spha_signal_message_port(session, bmp2, SP3_MPORT_SIGNAL_UPDATE, 1,
                                 &dat, sizeof(dat), &seq);
    }

    // åݡ2Υֹ椬1ʾˤʤΤԤ
    size = sizeof(dat);
    spha_wait_message_port(session, bmp2, SP3_MPORT_WAIT_NEXT_SEQ, 0,
                           SP3_TIMEOUT_INDEFINITE, &seq, &dat, &size);

    // åݡ1Ŭͤ񤤤ơֹ1䤹
    spha_signal_message_port(session, bmp2, SP3_MPORT_SIGNAL_ASYNC, 0, &dat,
                             sizeof(dat), &seq);

    if (seq == SPE_NUM + HOST_NUM + 1) {
        // Ǹ˥åݡ2Υֹ䤷åɤϡ
        // åݡ2Υֹ0᤹
        spha_signal_message_port(session, bmp2, SP3_MPORT_SIGNAL_UPDATE, 0,
                                 &dat, sizeof(dat), &seq);

        // åݡ1Υֹ1᤹
        spha_signal_message_port(session, bmp1, SP3_MPORT_SIGNAL_UPDATE, 1,
                                 &dat, sizeof(dat), &seq);
    }
}

void *
host_thread_fn(void *arg)
{
    int i;
    uint8_t *buf;

    // ǽ˥ХꥢƱ (SPEåɤޤ)
    barrier();

#define BUFSIZE 128
    buf = memalign(128, BUFSIZE);

    for (i = 0; i < 1000; i++) {
        uint32_t size;

        // ޥե
        spha_wait_semaphore(session, csem, SP3_TIMEOUT_INDEFINITE);

        // ͭΰɤ߽Фƥ󥯥Ȥƽᤷ
        size = BUFSIZE;
        spha_data_transfer_from(session, shmem, 0, buf, &size);

        *(uint32_t *)buf = htonl(ntohl(*(uint32_t *)buf) + 1);

        size = BUFSIZE;
        spha_data_transfer_to(session, shmem, 0, buf, &size);

        // ޥե
        spha_signal_semaphore(session, csem);
    }
#undef BUFSIZE

    // Ǹ˥Хꥢ (SPEåɤޤ)
    barrier();

    free(buf);
    return NULL;
}

int
main()
{
    int i;
    int32_t spe_prog_file_size;
    uint32_t  spe_prog_buf_size;
    uint8_t *spe_prog_buf;
    spha_object_t spe_prog_mem;
    uint32_t spe_program_ea, loader_args[3], call_args[32];

    // ʣSPEѤΤʣѰ
    spha_thread_t spha_thread[SPE_NUM];
    uint8_t *tmp;
    uint32_t size, dat, seq;

    // ͭEA
    uint32_t shmem_ea;

    // ۥ¦ζ祹å
    pthread_t host_thread[HOST_NUM];

    // SPEͤϤå塼
    spha_object_t arg_mq[SPE_NUM];

    // SPEץELF
    load_spe_program("sync_spe", &spe_prog_file_size, &spe_prog_buf,
                     &spe_prog_buf_size);

    // åĥ
    spha_create_session(NULL, &session);
    spha_connect_session(session);

    // ޥե (ͥޥե)
    spha_create_semaphore(session, 0, 1, 1, &csem);
    // ХꥢƱѤ˥åݡȤ2ĺäƽ
    spha_create_message_port(session, 0, 16, &bmp1);
    spha_create_message_port(session, 0, 16, &bmp2);
    // ߡΥǡ
    dat = 0;
    // bmp1νֹ1
    spha_signal_message_port(session, bmp1, SP3_MPORT_SIGNAL_UPDATE, 1, &dat,
                             sizeof(dat), &seq);
    // bmp2νֹ0
    spha_signal_message_port(session, bmp2, SP3_MPORT_SIGNAL_UPDATE, 0, &dat,
                             sizeof(dat), &seq);

    // ͭ
#define SHMEM_SIZE 4096
    spha_create_memory(session, 0, 0, 0, SHMEM_SIZE, 0, &shmem);
    // ۥȥǽͤѰ
    tmp = memalign(128, SHMEM_SIZE);
    memset(tmp, 0, SHMEM_SIZE);
    // ͤ񤭹
    size = SHMEM_SIZE;
    spha_data_transfer_to(session, shmem, 0, tmp, &size);
    // EA֤˥ޥå
    spha_map_memory(session, shmem, 0, 0, 0, SHMEM_SIZE, 0, &shmem_ea);
#undef SHMEM_SIZE

    // SPEץELFŽդEA֤˥ޥå
    spha_create_memory(session, 0, 0, 0, spe_prog_buf_size, 0, &spe_prog_mem);
    spha_data_transfer_to(session, spe_prog_mem, 0, spe_prog_buf,
                          &spe_prog_buf_size);
    free(spe_prog_buf);
    spha_map_memory(session, spe_prog_mem, 0, 0, 0, spe_prog_file_size, 0,
                    &spe_program_ea);

    // ΰ
    loader_args[0] = htonl(spe_program_ea);
    loader_args[1] = htonl(spe_prog_file_size);
    loader_args[2] = 0;

    // SPE_NUMĤSPEåɤ
    for (i = 0; i < SPE_NUM; i++) {
        // SPEåɤˡƥåɤȤ̿ѤΥå塼
        spha_create_message_queue(session, 0, 8, 16, &arg_mq[i]);

        // SPEELFꤷƥå塼
        spha_create_spe_thread(session, 0, NULL, SP3_SPE_LOADER_DEFAULT,
                               loader_args, sizeof(loader_args),
                               &spha_thread[i]);

        // SPEåɤθͭǡΰ˽񤯥ǡ(mainΰ)
        call_args[0] = htonl(i);   // SPEåɤˤĤID
        call_args[4] = arg_mq[i];  // å塼
        // SPEåɤΥ֥å󥰼¹
        spha_resume_spe_thread(session, spha_thread[i], 0,
                               SP3_SPE_THREAD_RESUME_DEFAULT_ENTRY, call_args,
                               sizeof(call_args), NULL, NULL, NULL);
    }

    // å塼̤ƳSPEåɤ˥֥ȤλȤEA
    for (i = 0; i < SPE_NUM; i++) {
        uint32_t arg;
        spha_signal_message_queue(session, arg_mq[i], &bmp1, sizeof(bmp1));
        spha_signal_message_queue(session, arg_mq[i], &bmp2, sizeof(bmp2));
        arg = htonl(shmem_ea);
        spha_signal_message_queue(session, arg_mq[i], &arg, sizeof(arg));
        spha_signal_message_queue(session, arg_mq[i], &csem, sizeof(csem));
    }

    // HOST_NUMĤΥۥȤΥåɤơhost_thread_fn¹Ԥ
    for (i = 0; i < HOST_NUM; i++)
        pthread_create(&host_thread[i], NULL, host_thread_fn, NULL);

    // ʣSPEåɤߡλޤȤԤ
    for (i = 0; i < SPE_NUM; i++)
        spha_wait_spe_thread(session, SP3_SPE_THREAD_NAME_ANY, NULL, NULL,
                             NULL);

    // ۥȤΥåɤνλǧ
    for (i = 0; i < HOST_NUM; i++)
        pthread_join(host_thread[i], NULL);

    // ͭХåեɤ߽ФơƬ4Byteͤ
    size = 128;
    spha_data_transfer_from(session, shmem, 0, tmp, &size);
    printf("cnt = %d\n", ntohl(*(uint32_t *)tmp));

    // ͭХåե
    free(tmp);

    // SPEåɤ˴
    for (i = 0; i < SPE_NUM; i++)
        spha_delete_spe_thread(session, spha_thread[i]);

    // åǤ˴
    spha_close_session(session);
    spha_delete_session(session);
    return 0;
}

