From e150283e1f071673e1d3490cbf85651e5502d421 Mon Sep 17 00:00:00 2001
From: gdisirio <gdisirio@35acf78f-673a-0410-8e92-d51de3d6d3f4>
Date: Wed, 30 Jul 2008 11:01:56 +0000
Subject: Various optimizations to the scheduler.

git-svn-id: svn://svn.code.sf.net/p/chibios/svn/trunk@378 35acf78f-673a-0410-8e92-d51de3d6d3f4
---
 ports/ARMCM3/chcore.c |   2 +-
 readme.txt            |   6 +++
 src/chlists.c         |  19 ++++++--
 src/chmsg.c           |   8 ++--
 src/chschd.c          |   7 ++-
 src/chsem.c           |   8 ++--
 src/include/inline.h  |   9 +++-
 src/include/lists.h   |   3 +-
 test/testbmk.c        | 122 ++++++++++++++++++++++++++++++--------------------
 test/testbmk.h        |   3 +-
 10 files changed, 120 insertions(+), 67 deletions(-)

diff --git a/ports/ARMCM3/chcore.c b/ports/ARMCM3/chcore.c
index 1a631e36b..199c71fd9 100644
--- a/ports/ARMCM3/chcore.c
+++ b/ports/ARMCM3/chcore.c
@@ -167,8 +167,8 @@ void PendSVVector(void) {
   PUSH_CONTEXT(sp_thd);
 
   (otp = currp)->p_ctx.r13 = sp_thd;
-  chSchReadyI(otp);
   (currp = fifo_remove(&rlist.r_queue))->p_state = PRCURR;
+  chSchReadyI(otp);
 #ifdef CH_USE_ROUNDROBIN
   /* set the round-robin time quantum */
   rlist.r_preempt = CH_TIME_QUANTUM;
diff --git a/readme.txt b/readme.txt
index 87b4f3a5d..a272b62d2 100644
--- a/readme.txt
+++ b/readme.txt
@@ -80,9 +80,15 @@ Win32-MinGW            - ChibiOS/RT simulator and demo into a WIN32 process,
   faster if the feature is not required. Threads at the same priority level
   are still supported when the feature is disabled but the scheduling among
   them becomes cooperative.
+- OPT: Improved reschedulation time by reordering the sequence of operations,
+  now during enqueuing the ready list contains one less element. This change
+  also slightly improves the interrupt latency.
+- OPT: Optimization to the chSemReset(), reversed the order of dequeuing.
 - FIX: Fixed a bug in the chThdSetPriority() API.
 - FIX: Modified the structure names into nvic.h in order to not make them
   collide with external libraries.
+- Added a benchmark to the test suit that measures the mass reschedulation
+  performance.
 - Made the Cortex-M3 port preemption code more readable.
 
 *** 0.6.8 ***
diff --git a/src/chlists.c b/src/chlists.c
index 3b173c94d..dcb3b0412 100644
--- a/src/chlists.c
+++ b/src/chlists.c
@@ -47,19 +47,19 @@ void prio_insert(Thread *tp, ThreadsQueue *tqp) {
 }
 
 /*
- * Inserts a Thread into a FIFO queue.
+ * Inserts a Thread into a queue.
  *
  * @param tp the pointer to the thread to be inserted in the list
  * @param tqp the pointer to the threads list header
  */
-void fifo_insert(Thread *tp, ThreadsQueue *tqp) {
+void queue_insert(Thread *tp, ThreadsQueue *tqp) {
 
   tp->p_prev = (tp->p_next = (Thread *)tqp)->p_prev;
   tp->p_prev->p_next = tqp->p_prev = tp;
 }
 
 /*
- * Removes the first-out Thread from a FIFO queue and returns it.
+ * Removes the first-out Thread from a queue and returns it.
  *
  * @param tqp the pointer to the threads list header
  * @return the removed thread pointer
@@ -71,6 +71,19 @@ Thread *fifo_remove(ThreadsQueue *tqp) {
   return tp;
 }
 
+/*
+ * Removes the last-out Thread from a queue and returns it.
+ *
+ * @param tqp the pointer to the threads list header
+ * @return the removed thread pointer
+ */
+Thread *lifo_remove(ThreadsQueue *tqp) {
+  Thread *tp = tqp->p_next;
+
+  (tqp->p_next = tp->p_next)->p_prev = (Thread *)tqp;
+  return tp;
+}
+
 /*
  * Removes a Thread from a FIFO list and returns it.
  *
diff --git a/src/chmsg.c b/src/chmsg.c
index bcc7117e2..03b7edd01 100644
--- a/src/chmsg.c
+++ b/src/chmsg.c
@@ -40,9 +40,9 @@ msg_t chMsgSend(Thread *tp, msg_t msg) {
   if (tp->p_flags & P_MSGBYPRIO)
     prio_insert(currp, &tp->p_msgqueue);
   else
-    fifo_insert(currp, &tp->p_msgqueue);
+    queue_insert(currp, &tp->p_msgqueue);
 #else
-  fifo_insert(currp, &tp->p_msgqueue);
+  queue_insert(currp, &tp->p_msgqueue);
 #endif
   currp->p_msg = msg;
   currp->p_wtthdp = tp;
@@ -79,9 +79,9 @@ msg_t chMsgSendWithEvent(Thread *tp, msg_t msg, EventSource *esp) {
   if (tp->p_flags & P_MSGBYPRIO)
     prio_insert(currp, &tp->p_msgqueue);
   else
-    fifo_insert(currp, &tp->p_msgqueue);
+    queue_insert(currp, &tp->p_msgqueue);
 #else
-  fifo_insert(currp, &tp->p_msgqueue);
+  queue_insert(currp, &tp->p_msgqueue);
 #endif
   chEvtBroadcastI(esp);
   currp->p_wtthdp = tp;
diff --git a/src/chschd.c b/src/chschd.c
index c97eea1e8..8b606cd1b 100644
--- a/src/chschd.c
+++ b/src/chschd.c
@@ -174,18 +174,17 @@ void chSchWakeupS(Thread *ntp, msg_t msg) {
  * Intended to be called if \p chSchRescRequired() evaluates to \p TRUE.
  */
 void chSchDoRescheduleI(void) {
-  /* put the running thread on the ready queue */
+
   Thread *otp = currp;
-  chSchReadyI(otp);
-  /* pick the first thread from the ready queue */
+  /* pick the first thread from the ready queue and makes it current */
   (currp = fifo_remove(&rlist.r_queue))->p_state = PRCURR;
+  chSchReadyI(otp);
 #ifdef CH_USE_ROUNDROBIN
   rlist.r_preempt = CH_TIME_QUANTUM;
 #endif
 #ifdef CH_USE_TRACE
   chDbgTrace(otp, currp);
 #endif
-  /* switch thread context */
   chSysSwitchI(otp, currp);
 }
 
diff --git a/src/chsem.c b/src/chsem.c
index d1199f072..b5c197b6b 100644
--- a/src/chsem.c
+++ b/src/chsem.c
@@ -72,7 +72,7 @@ void chSemResetI(Semaphore *sp, cnt_t n) {
   cnt = sp->s_cnt;
   sp->s_cnt = n;
   while (cnt++ < 0)
-    chSchReadyI(fifo_remove(&sp->s_queue))->p_rdymsg = RDY_RESET;
+    chSchReadyI(lifo_remove(&sp->s_queue))->p_rdymsg = RDY_RESET;
 }
 
 /**
@@ -101,7 +101,7 @@ msg_t chSemWait(Semaphore *sp) {
 msg_t chSemWaitS(Semaphore *sp) {
 
   if (--sp->s_cnt < 0) {
-    fifo_insert(currp, &sp->s_queue);
+    queue_insert(currp, &sp->s_queue);
     currp->p_wtsemp = sp;
     chSchGoSleepS(PRWTSEM);
     return currp->p_rdymsg;
@@ -140,7 +140,7 @@ msg_t chSemWaitTimeout(Semaphore *sp, systime_t time) {
 msg_t chSemWaitTimeoutS(Semaphore *sp, systime_t time) {
 
   if (--sp->s_cnt < 0) {
-    fifo_insert(currp, &sp->s_queue);
+    queue_insert(currp, &sp->s_queue);
     currp->p_wtsemp = sp;
     return chSchGoSleepTimeoutS(PRWTSEM, time);
   }
@@ -201,7 +201,7 @@ msg_t chSemSignalWait(Semaphore *sps, Semaphore *spw) {
     chSchReadyI(fifo_remove(&sps->s_queue))->p_rdymsg = RDY_OK;
 
   if (--spw->s_cnt < 0) {
-    fifo_insert(currp, &spw->s_queue);
+    queue_insert(currp, &spw->s_queue);
     currp->p_wtsemp = spw;
     chSchGoSleepS(PRWTSEM);
     msg = currp->p_rdymsg;
diff --git a/src/include/inline.h b/src/include/inline.h
index 85de20f3f..9cc18974d 100644
--- a/src/include/inline.h
+++ b/src/include/inline.h
@@ -36,7 +36,7 @@ static INLINE void prio_insert(Thread *tp, ThreadsQueue *tqp) {
   tp->p_prev->p_next = cp->p_prev = tp;
 }
 
-static INLINE void fifo_insert(Thread *tp, ThreadsQueue *tqp) {
+static INLINE void queue_insert(Thread *tp, ThreadsQueue *tqp) {
 
   tp->p_prev = (tp->p_next = (Thread *)tqp)->p_prev;
   tp->p_prev->p_next = tqp->p_prev = tp;
@@ -49,6 +49,13 @@ static INLINE Thread *fifo_remove(ThreadsQueue *tqp) {
   return tp;
 }
 
+static INLINE Thread *lifo_remove(ThreadsQueue *tqp) {
+  Thread *tp = tqp->p_prev;
+
+  (tqp->p_prev = tp->p_prev)->p_next = (Thread *)tqp;
+  return tp;
+}
+
 static INLINE Thread *dequeue(Thread *tp) {
 
   tp->p_prev->p_next = tp->p_next;
diff --git a/src/include/lists.h b/src/include/lists.h
index d98892600..741648a03 100644
--- a/src/include/lists.h
+++ b/src/include/lists.h
@@ -61,8 +61,9 @@ typedef struct {
 extern "C" {
 #endif
   void prio_insert(Thread *tp, ThreadsQueue *tqp);
-  void fifo_insert(Thread *tp, ThreadsQueue *tqp);
+  void queue_insert(Thread *tp, ThreadsQueue *tqp);
   Thread *fifo_remove(ThreadsQueue *tqp);
+  Thread *lifo_remove(ThreadsQueue *tqp);
   Thread *dequeue(Thread *tp);
   void list_insert(Thread *tp, ThreadsList *tlp);
   Thread *list_remove(ThreadsList *tlp);
diff --git a/test/testbmk.c b/test/testbmk.c
index 392297782..f2fcb94dc 100644
--- a/test/testbmk.c
+++ b/test/testbmk.c
@@ -21,6 +21,10 @@
 
 #include "test.h"
 
+static Semaphore sem1;
+
+static void empty(void) {}
+
 static msg_t thread1(void *p) {
   msg_t msg;
 
@@ -52,12 +56,6 @@ static char *bmk1_gettest(void) {
   return "Benchmark, context switch #1, optimal";
 }
 
-static void bmk1_setup(void) {
-}
-
-static void bmk1_teardown(void) {
-}
-
 static void bmk1_execute(void) {
   uint32_t n;
 
@@ -74,8 +72,8 @@ static void bmk1_execute(void) {
 
 const struct testcase testbmk1 = {
   bmk1_gettest,
-  bmk1_setup,
-  bmk1_teardown,
+  empty,
+  empty,
   bmk1_execute
 };
 
@@ -84,12 +82,6 @@ static char *bmk2_gettest(void) {
   return "Benchmark, context switch #2, empty ready list";
 }
 
-static void bmk2_setup(void) {
-}
-
-static void bmk2_teardown(void) {
-}
-
 static void bmk2_execute(void) {
   uint32_t n;
 
@@ -106,8 +98,8 @@ static void bmk2_execute(void) {
 
 const struct testcase testbmk2 = {
   bmk2_gettest,
-  bmk2_setup,
-  bmk2_teardown,
+  empty,
+  empty,
   bmk2_execute
 };
 
@@ -121,12 +113,6 @@ static char *bmk3_gettest(void) {
   return "Benchmark, context switch #3, 4 threads in ready list";
 }
 
-static void bmk3_setup(void) {
-}
-
-static void bmk3_teardown(void) {
-}
-
 static void bmk3_execute(void) {
   uint32_t n;
 
@@ -147,8 +133,8 @@ static void bmk3_execute(void) {
 
 const struct testcase testbmk3 = {
   bmk3_gettest,
-  bmk3_setup,
-  bmk3_teardown,
+  empty,
+  empty,
   bmk3_execute
 };
 
@@ -157,12 +143,6 @@ static char *bmk4_gettest(void) {
   return "Benchmark, threads creation/termination, worst case";
 }
 
-static void bmk4_setup(void) {
-}
-
-static void bmk4_teardown(void) {
-}
-
 static void bmk4_execute(void) {
 
   uint32_t n = 0;
@@ -184,8 +164,8 @@ static void bmk4_execute(void) {
 
 const struct testcase testbmk4 = {
   bmk4_gettest,
-  bmk4_setup,
-  bmk4_teardown,
+  empty,
+  empty,
   bmk4_execute
 };
 
@@ -194,12 +174,6 @@ static char *bmk5_gettest(void) {
   return "Benchmark, threads creation/termination, optimal";
 }
 
-static void bmk5_setup(void) {
-}
-
-static void bmk5_teardown(void) {
-}
-
 static void bmk5_execute(void) {
 
   uint32_t n = 0;
@@ -221,23 +195,75 @@ static void bmk5_execute(void) {
 
 const struct testcase testbmk5 = {
   bmk5_gettest,
-  bmk5_setup,
-  bmk5_teardown,
+  empty,
+  empty,
   bmk5_execute
 };
 
+static msg_t thread3(void *p) {
+
+  while (!chThdShouldTerminate())
+    chSemWait(&sem1);
+  return 0;
+}
+
 static char *bmk6_gettest(void) {
 
-  return "Benchmark, I/O Queues throughput";
+  return "Benchmark, mass reschedulation, 5 threads";
 }
 
 static void bmk6_setup(void) {
-}
 
-static void bmk6_teardown(void) {
+  chSemInit(&sem1, 0);
 }
 
 static void bmk6_execute(void) {
+  uint32_t n;
+
+  threads[0] = chThdCreateFast(chThdGetPriority()+1, wa[0], STKSIZE, thread3);
+  threads[1] = chThdCreateFast(chThdGetPriority()+2, wa[1], STKSIZE, thread3);
+  threads[2] = chThdCreateFast(chThdGetPriority()+3, wa[2], STKSIZE, thread3);
+  threads[3] = chThdCreateFast(chThdGetPriority()+4, wa[3], STKSIZE, thread3);
+  threads[4] = chThdCreateFast(chThdGetPriority()+5, wa[4], STKSIZE, thread3);
+
+  n = 0;
+  test_wait_tick();
+  test_start_timer(1000);
+  do {
+    chSemReset(&sem1, 0);
+    n++;
+#if defined(WIN32)
+    ChkIntSources();
+#endif
+  } while (!test_timer_done);
+  chThdTerminate(threads[0]);
+  chThdTerminate(threads[1]);
+  chThdTerminate(threads[2]);
+  chThdTerminate(threads[3]);
+  chThdTerminate(threads[4]);
+  chSemReset(&sem1, 0);
+  test_wait_threads();
+
+  test_print("--- Score : ");
+  test_printn(n);
+  test_print(" reschedulations/S, ");
+  test_printn(n * 6);
+  test_println(" ctxswc/S");
+}
+
+const struct testcase testbmk6 = {
+  bmk6_gettest,
+  bmk6_setup,
+  empty,
+  bmk6_execute
+};
+
+static char *bmk7_gettest(void) {
+
+  return "Benchmark, I/O Queues throughput";
+}
+
+static void bmk7_execute(void) {
   static uint8_t ib[16];
   static Queue iq;
 
@@ -264,9 +290,9 @@ static void bmk6_execute(void) {
   test_println(" bytes/S");
 }
 
-const struct testcase testbmk6 = {
-  bmk6_gettest,
-  bmk6_setup,
-  bmk6_teardown,
-  bmk6_execute
+const struct testcase testbmk7 = {
+  bmk7_gettest,
+  empty,
+  empty,
+  bmk7_execute
 };
diff --git a/test/testbmk.h b/test/testbmk.h
index e52abee28..5e99dc3d5 100644
--- a/test/testbmk.h
+++ b/test/testbmk.h
@@ -21,6 +21,7 @@
 #define _TESTBMK_H_
 
 extern const struct testcase testbmk1, testbmk2, testbmk3,
-                             testbmk4, testbmk5, testbmk6;
+                             testbmk4, testbmk5, testbmk6,
+                             testbmk7;
 
 #endif /* _TESTBMK_H_ */
-- 
cgit v1.2.3