/*
    ChibiOS/RT - Copyright (C) 2006,2007,2008,2009,2010,
                 2011 Giovanni Di Sirio.

    This file is part of ChibiOS/RT.

    ChibiOS/RT is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3 of the License, or
    (at your option) any later version.

    ChibiOS/RT is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/

#include "ch.h"
#include "test.h"

/**
 * @page test_benchmarks Kernel Benchmarks
 *
 * File: @ref testbmk.c
 *
 * <h2>Description</h2>
 * This module implements a series of system benchmarks. The benchmarks are
 * useful as a stress test and as a reference when comparing ChibiOS/RT
 * with similar systems.
 *
 * <h2>Objective</h2>
 * Objective of the test module is to provide a performance index for the
 * most critical system subsystems. The performance numbers allow to
 * discover performance regressions between successive ChibiOS/RT releases.
 *
 * <h2>Preconditions</h2>
 * None.
 *
 * <h2>Test Cases</h2>
 * - @subpage test_benchmarks_001
 * - @subpage test_benchmarks_002
 * - @subpage test_benchmarks_003
 * - @subpage test_benchmarks_004
 * - @subpage test_benchmarks_005
 * - @subpage test_benchmarks_006
 * - @subpage test_benchmarks_007
 * - @subpage test_benchmarks_008
 * - @subpage test_benchmarks_009
 * - @subpage test_benchmarks_010
 * - @subpage test_benchmarks_011
 * - @subpage test_benchmarks_012
 * - @subpage test_benchmarks_013
 * .
 * @file testbmk.c Kernel Benchmarks
 * @brief Kernel Benchmarks source file
 * @file testbmk.h
 * @brief Kernel Benchmarks header file
 */

static Semaphore sem1;
#if CH_USE_MUTEXES || defined(__DOXYGEN__)
static Mutex mtx1;
#endif

static msg_t thread1(void *p) {
  Thread *tp;
  msg_t msg;

  (void)p;
  do {
    tp = chMsgWait();
    msg = chMsgGet(tp);
    chMsgRelease(tp, msg);
  } while (msg);
  return 0;
}

#ifdef __GNUC__
__attribute__((noinline))
#endif
static unsigned int msg_loop_test(Thread *tp) {

  uint32_t n = 0;
  test_wait_tick();
  test_start_timer(1000);
  do {
    (void)chMsgSend(tp, 1);
    n++;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  (void)chMsgSend(tp, 0);
  return n;
}

/**
 * @page test_benchmarks_001 Messages performance #1
 *
 * <h2>Description</h2>
 * A message server thread is created with a lower priority than the client
 * thread, the messages throughput per second is measured and the result
 * printed in the output log.
 */

static void bmk1_execute(void) {
  uint32_t n;

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()-1, thread1, NULL);
  n = msg_loop_test(threads[0]);
  test_wait_threads();
  test_print("--- Score : ");
  test_printn(n);
  test_print(" msgs/S, ");
  test_printn(n << 1);
  test_println(" ctxswc/S");
}

ROMCONST struct testcase testbmk1 = {
  "Benchmark, messages #1",
  NULL,
  NULL,
  bmk1_execute
};

/**
 * @page test_benchmarks_002 Messages performance #2
 *
 * <h2>Description</h2>
 * A message server thread is created with an higher priority than the client
 * thread, the messages throughput per second is measured and the result
 * printed in the output log.
 */

static void bmk2_execute(void) {
  uint32_t n;

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()+1, thread1, NULL);
  n = msg_loop_test(threads[0]);
  test_wait_threads();
  test_print("--- Score : ");
  test_printn(n);
  test_print(" msgs/S, ");
  test_printn(n << 1);
  test_println(" ctxswc/S");
}

ROMCONST struct testcase testbmk2 = {
  "Benchmark, messages #2",
  NULL,
  NULL,
  bmk2_execute
};

static msg_t thread2(void *p) {

  return (msg_t)p;
}

/**
 * @page test_benchmarks_003 Messages performance #3
 *
 * <h2>Description</h2>
 * A message server thread is created with an higher priority than the client
 * thread, four lower priority threads crowd the ready list, the messages
 * throughput per second is measured while the ready list and the result
 * printed in the output log.
 */

static void bmk3_execute(void) {
  uint32_t n;

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()+1, thread1, NULL);
  threads[1] = chThdCreateStatic(wa[1], WA_SIZE, chThdGetPriority()-2, thread2, NULL);
  threads[2] = chThdCreateStatic(wa[2], WA_SIZE, chThdGetPriority()-3, thread2, NULL);
  threads[3] = chThdCreateStatic(wa[3], WA_SIZE, chThdGetPriority()-4, thread2, NULL);
  threads[4] = chThdCreateStatic(wa[4], WA_SIZE, chThdGetPriority()-5, thread2, NULL);
  n = msg_loop_test(threads[0]);
  test_wait_threads();
  test_print("--- Score : ");
  test_printn(n);
  test_print(" msgs/S, ");
  test_printn(n << 1);
  test_println(" ctxswc/S");
}

ROMCONST struct testcase testbmk3 = {
  "Benchmark, messages #3",
  NULL,
  NULL,
  bmk3_execute
};

/**
 * @page test_benchmarks_004 Context Switch performance
 *
 * <h2>Description</h2>
 * A thread is created that just performs a @p chSchGoSleepS() into a loop,
 * the thread is awakened as fast is possible by the tester thread.<br>
 * The Context Switch performance is calculated by measuring the number of
 * iterations after a second of continuous operations.
 */

msg_t thread4(void *p) {
  msg_t msg;
  Thread *self = chThdSelf();

  (void)p;
  chSysLock();
  do {
    chSchGoSleepS(THD_STATE_SUSPENDED);
    msg = self->p_u.rdymsg;
  } while (msg == RDY_OK);
  chSysUnlock();
  return 0;
}

static void bmk4_execute(void) {
  Thread *tp;
  uint32_t n;

  tp = threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()+1, thread4, NULL);
  n = 0;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chSysLock();
    chSchWakeupS(tp, RDY_OK);
    chSchWakeupS(tp, RDY_OK);
    chSchWakeupS(tp, RDY_OK);
    chSchWakeupS(tp, RDY_OK);
    chSysUnlock();
    n += 4;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  chSysLock();
  chSchWakeupS(tp, RDY_TIMEOUT);
  chSysUnlock();

  test_wait_threads();
  test_print("--- Score : ");
  test_printn(n * 2);
  test_println(" ctxswc/S");
}

ROMCONST struct testcase testbmk4 = {
  "Benchmark, context switch",
  NULL,
  NULL,
  bmk4_execute
};

/**
 * @page test_benchmarks_005 Threads performance, full cycle
 *
 * <h2>Description</h2>
 * Threads are continuously created and terminated into a loop. A full
 * @p chThdCreateStatic() / @p chThdExit() / @p chThdWait() cycle is performed
 * in each iteration.<br>
 * The performance is calculated by measuring the number of iterations after
 * a second of continuous operations.
 */

static void bmk5_execute(void) {

  uint32_t n = 0;
  void *wap = wa[0];
  tprio_t prio = chThdGetPriority() - 1;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chThdWait(chThdCreateStatic(wap, WA_SIZE, prio, thread2, NULL));
    n++;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n);
  test_println(" threads/S");
}

ROMCONST struct testcase testbmk5 = {
  "Benchmark, threads, full cycle",
  NULL,
  NULL,
  bmk5_execute
};

/**
 * @page test_benchmarks_006 Threads performance, create/exit only
 *
 * <h2>Description</h2>
 * Threads are continuously created and terminated into a loop. A partial
 * @p chThdCreateStatic() / @p chThdExit() cycle is performed in each
 * iteration, the @p chThdWait() is not necessary because the thread is
 * created at an higher priority so there is no need to wait for it to
 * terminate.<br>
 * The performance is calculated by measuring the number of iterations after
 * a second of continuous operations.
 */

static void bmk6_execute(void) {

  uint32_t n = 0;
  void *wap = wa[0];
  tprio_t prio = chThdGetPriority() + 1;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chThdCreateStatic(wap, WA_SIZE, prio, thread2, NULL);
    n++;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n);
  test_println(" threads/S");
}

ROMCONST struct testcase testbmk6 = {
  "Benchmark, threads, create only",
  NULL,
  NULL,
  bmk6_execute
};

/**
 * @page test_benchmarks_007 Mass reschedule performance
 *
 * <h2>Description</h2>
 * Five threads are created and atomically rescheduled by resetting the
 * semaphore where they are waiting on. The operation is performed into a
 * continuous loop.<br>
 * The performance is calculated by measuring the number of iterations after
 * a second of continuous operations.
 */

static msg_t thread3(void *p) {

  (void)p;
  while (!chThdShouldTerminate())
    chSemWait(&sem1);
  return 0;
}

static void bmk7_setup(void) {

  chSemInit(&sem1, 0);
}

static void bmk7_execute(void) {
  uint32_t n;

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()+5, thread3, NULL);
  threads[1] = chThdCreateStatic(wa[1], WA_SIZE, chThdGetPriority()+4, thread3, NULL);
  threads[2] = chThdCreateStatic(wa[2], WA_SIZE, chThdGetPriority()+3, thread3, NULL);
  threads[3] = chThdCreateStatic(wa[3], WA_SIZE, chThdGetPriority()+2, thread3, NULL);
  threads[4] = chThdCreateStatic(wa[4], WA_SIZE, chThdGetPriority()+1, thread3, NULL);

  n = 0;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chSemReset(&sem1, 0);
    n++;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_terminate_threads();
  chSemReset(&sem1, 0);
  test_wait_threads();

  test_print("--- Score : ");
  test_printn(n);
  test_print(" reschedules/S, ");
  test_printn(n * 6);
  test_println(" ctxswc/S");
}

ROMCONST struct testcase testbmk7 = {
  "Benchmark, mass reschedule, 5 threads",
  bmk7_setup,
  NULL,
  bmk7_execute
};

/**
 * @page test_benchmarks_008 I/O Round-Robin voluntary reschedule.
 *
 * <h2>Description</h2>
 * Five threads are created at equal priority, each thread just increases a
 * variable and yields.<br>
 * The performance is calculated by measuring the number of iterations after
 * a second of continuous operations.
 */

static msg_t thread8(void *p) {

  do {
    chThdYield();
    chThdYield();
    chThdYield();
    chThdYield();
    (*(uint32_t *)p) += 4;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while(!chThdShouldTerminate());
  return 0;
}

static void bmk8_execute(void) {
  uint32_t n;

  n = 0;
  test_wait_tick();

  threads[0] = chThdCreateStatic(wa[0], WA_SIZE, chThdGetPriority()-1, thread8, (void *)&n);
  threads[1] = chThdCreateStatic(wa[1], WA_SIZE, chThdGetPriority()-1, thread8, (void *)&n);
  threads[2] = chThdCreateStatic(wa[2], WA_SIZE, chThdGetPriority()-1, thread8, (void *)&n);
  threads[3] = chThdCreateStatic(wa[3], WA_SIZE, chThdGetPriority()-1, thread8, (void *)&n);
  threads[4] = chThdCreateStatic(wa[4], WA_SIZE, chThdGetPriority()-1, thread8, (void *)&n);

  chThdSleepSeconds(1);
  test_terminate_threads();
  test_wait_threads();

  test_print("--- Score : ");
  test_printn(n);
  test_println(" ctxswc/S");
}

ROMCONST struct testcase testbmk8 = {
  "Benchmark, round robin context switching",
  NULL,
  NULL,
  bmk8_execute
};

/**
 * @page test_benchmarks_009 I/O Queues throughput
 *
 * <h2>Description</h2>
 * Four bytes are written and then read from an @p InputQueue into a continuous
 * loop.<br>
 * The performance is calculated by measuring the number of iterations after
 * a second of continuous operations.
 */

static void bmk9_execute(void) {
  uint32_t n;
  static uint8_t ib[16];
  static InputQueue iq;

  chIQInit(&iq, ib, sizeof(ib), NULL);
  n = 0;
  test_wait_tick();
  test_start_timer(1000);
  do {
    chSysLock();
    chIQPutI(&iq, 0);
    chIQPutI(&iq, 1);
    chIQPutI(&iq, 2);
    chIQPutI(&iq, 3);
    chSysUnlock();
    (void)chIQGet(&iq);
    (void)chIQGet(&iq);
    (void)chIQGet(&iq);
    (void)chIQGet(&iq);
    n++;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n * 4);
  test_println(" bytes/S");
}

ROMCONST struct testcase testbmk9 = {
  "Benchmark, I/O Queues throughput",
  NULL,
  NULL,
  bmk9_execute
};

/**
 * @page test_benchmarks_010 Virtual Timers set/reset performance
 *
 * <h2>Description</h2>
 * A virtual timer is set and immediately reset into a continuous loop.<br>
 * The performance is calculated by measuring the number of iterations after
 * a second of continuous operations.
 */

static void tmo(void *param) {(void)param;}

static void bmk10_execute(void) {
  static VirtualTimer vt1, vt2;
  uint32_t n = 0;

  test_wait_tick();
  test_start_timer(1000);
  do {
    chSysLock();
    chVTSetI(&vt1, 1, tmo, NULL);
    chVTSetI(&vt2, 10000, tmo, NULL);
    chVTResetI(&vt1);
    chVTResetI(&vt2);
    chSysUnlock();
    n++;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n * 2);
  test_println(" timers/S");
}

ROMCONST struct testcase testbmk10 = {
  "Benchmark, virtual timers set/reset",
  NULL,
  NULL,
  bmk10_execute
};

/**
 * @page test_benchmarks_011 Semaphores wait/signal performance
 *
 * <h2>Description</h2>
 * A counting semaphore is taken/released into a continuous loop, no Context
 * Switch happens because the counter is always non negative.<br>
 * The performance is calculated by measuring the number of iterations after
 * a second of continuous operations.
 */

static void bmk11_setup(void) {

  chSemInit(&sem1, 1);
}

static void bmk11_execute(void) {
  uint32_t n = 0;

  test_wait_tick();
  test_start_timer(1000);
  do {
    chSemWait(&sem1);
    chSemSignal(&sem1);
    chSemWait(&sem1);
    chSemSignal(&sem1);
    chSemWait(&sem1);
    chSemSignal(&sem1);
    chSemWait(&sem1);
    chSemSignal(&sem1);
    n++;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n * 4);
  test_println(" wait+signal/S");
}

ROMCONST struct testcase testbmk11 = {
  "Benchmark, semaphores wait/signal",
  bmk11_setup,
  NULL,
  bmk11_execute
};

#if CH_USE_MUTEXES || defined(__DOXYGEN__)
/**
 * @page test_benchmarks_012 Mutexes lock/unlock performance
 *
 * <h2>Description</h2>
 * A mutex is locked/unlocked into a continuous loop, no Context Switch happens
 * because there are no other threads asking for the mutex.<br>
 * The performance is calculated by measuring the number of iterations after
 * a second of continuous operations.
 */

static void bmk12_setup(void) {

  chMtxInit(&mtx1);
}

static void bmk12_execute(void) {
  uint32_t n = 0;

  test_wait_tick();
  test_start_timer(1000);
  do {
    chMtxLock(&mtx1);
    chMtxUnlock();
    chMtxLock(&mtx1);
    chMtxUnlock();
    chMtxLock(&mtx1);
    chMtxUnlock();
    chMtxLock(&mtx1);
    chMtxUnlock();
    n++;
#if defined(SIMULATOR)
    ChkIntSources();
#endif
  } while (!test_timer_done);
  test_print("--- Score : ");
  test_printn(n * 4);
  test_println(" lock+unlock/S");
}

ROMCONST struct testcase testbmk12 = {
  "Benchmark, mutexes lock/unlock",
  bmk12_setup,
  NULL,
  bmk12_execute
};
#endif

/**
 * @page test_benchmarks_013 RAM Footprint
 *
 * <h2>Description</h2>
 * The memory size of the various kernel objects is printed.
 */

static void bmk13_execute(void) {

  test_print("--- System: ");
  test_printn(sizeof(ReadyList) + sizeof(VTList) +
              PORT_IDLE_THREAD_STACK_SIZE +
              (sizeof(Thread) + sizeof(struct intctx) +
               sizeof(struct extctx) +
               PORT_INT_REQUIRED_STACK) * 2);
  test_println(" bytes");
  test_print("--- Thread: ");
  test_printn(sizeof(Thread));
  test_println(" bytes");
  test_print("--- Timer : ");
  test_printn(sizeof(VirtualTimer));
  test_println(" bytes");
  test_print("--- Semaph: ");
  test_printn(sizeof(Semaphore));
  test_println(" bytes");
#if CH_USE_EVENTS || defined(__DOXYGEN__)
  test_print("--- EventS: ");
  test_printn(sizeof(EventSource));
  test_println(" bytes");
  test_print("--- EventL: ");
  test_printn(sizeof(EventListener));
  test_println(" bytes");
#endif
#if CH_USE_MUTEXES || defined(__DOXYGEN__)
  test_print("--- Mutex : ");
  test_printn(sizeof(Mutex));
  test_println(" bytes");
#endif
#if CH_USE_CONDVARS || defined(__DOXYGEN__)
  test_print("--- CondV.: ");
  test_printn(sizeof(CondVar));
  test_println(" bytes");
#endif
#if CH_USE_QUEUES || defined(__DOXYGEN__)
  test_print("--- Queue : ");
  test_printn(sizeof(GenericQueue));
  test_println(" bytes");
#endif
#if CH_USE_MAILBOXES || defined(__DOXYGEN__)
  test_print("--- MailB.: ");
  test_printn(sizeof(Mailbox));
  test_println(" bytes");
#endif
}

ROMCONST struct testcase testbmk13 = {
  "Benchmark, RAM footprint",
  NULL,
  NULL,
  bmk13_execute
};

/**
 * @brief   Test sequence for benchmarks.
 */
ROMCONST struct testcase * ROMCONST patternbmk[] = {
#if !TEST_NO_BENCHMARKS
  &testbmk1,
  &testbmk2,
  &testbmk3,
  &testbmk4,
  &testbmk5,
  &testbmk6,
  &testbmk7,
  &testbmk8,
  &testbmk9,
  &testbmk10,
  &testbmk11,
#if CH_USE_MUTEXES || defined(__DOXYGEN__)
  &testbmk12,
#endif
  &testbmk13,
#endif
  NULL
};