@q file: thread.w@>
@q%   Copyright Dave Bone 1998 - 2015@>
@q% /*@>
@q%    This Source Code Form is subject to the terms of the Mozilla Public@>
@q%    License, v. 2.0. If a copy of the MPL was not distributed with this@>
@q%    file, You can obtain one at http://mozilla.org/MPL/2.0/.@>
@q% */@>
@q yacco2's internal thread implementations@>
@** Yacco2's internal thread implementation.
@(wthread.cpp@>=
@<copyright notice@>;
@<iyacco2@>;
@<accrue thread code@>;
  
@*2 Thread control runtime environment.\fbreak
Thread control record for the thread pool table.
This is used by Yacco2's global runtime table of spawned threads.
This is a one-to-many relationship as the same thread can be running
within a nested call chain.
Very basic in its thread |worker_status|: working, waiting for work, and 
I'm out of here.
@<Type...@>+=
struct worker_thread_blk; @/
typedef std::list<yacco2::worker_thread_blk*> Parallel_thread_list_type;
typedef Parallel_thread_list_type::iterator Parallel_thread_list_iterator_type;
typedef std::vector<yacco2::Parallel_thread_list_type > Parallel_thread_tbl_type;
typedef Parallel_thread_tbl_type::iterator Parallel_thread_tbl_iterator_type;
struct called_proc_entry{
  bool proc_call_in_use__;
};
typedef called_proc_entry Parallel_thread_proc_call_table_type;

@*2 |worker_thread_blk| structure. \fbreak
|grammar_s_parser__| is the grammar's parser.
|status__| takes  one of 3 states:\fbreak
\ptindent{1) |THREAD_WAITING_FOR_WORK|}
\ptindent{2) |THREAD_WORKING|}
\ptindent{3) |THREAD_TO_EXIT|}

Of import:\fbreak
When the thread gets created, 
|worker_thread_blk| will enter the thread into the global
thread table list. The table is a vector of precalculated thread numbers
generated from Yacco2's linker.
The launching grammar has mutual access to |Parallel_thread_table|.
So the created thread can just deposit its |worker_thread_blk|
address into the list.   
 
@<Struct...@>+=
struct worker_thread_blk{
    worker_thread_blk();// monolithic grammar
	worker_thread_blk(yacco2::Parser* Grammar_s_parser,yacco2::Parser* Calling_parser);	
	yacco2::Parser* grammar_s_parser__;
	int status__;
	int run_cnt__;
	int thd_id__;
	void   set_waiting_for_work();
};
@*2 Global |Parallel_thread_table| declaration of use.\fbreak 
Maintains a list of launched threads with their availability. 
For efficiency, it is an array subscripted
by the thread's id number.
Why the list?
This is a 1:m situation. Due to nested thread calls, a thread could be
busy so another copy
of the threads needs creation. 
@<Global variables@>+=
extern Parallel_thread_tbl_type Parallel_thread_table;
extern Parallel_thread_proc_call_table_type Parallel_thread_proc_call_table[MAX_NO_THDS];
@*2 Global routines declaration of use. 
@<External rtns and variables@>+=
extern void  Parallel_threads_shutdown(yacco2::Parser& PP);
extern yacco2::THR 
_YACCO2_CALL_TYPE 
AR_for_manual_thread_spawning(yacco2::Parser*Caller_pp);
extern yacco2::Type_pp_fnct_ptr PTR_AR_for_manual_thread_spawning;

@*2 Global |Parallel_thread_table| definition. 
@<accrue thread code@>+=
yacco2::Parallel_thread_tbl_type yacco2::Parallel_thread_table(MAX_NO_THDS);
yacco2::Parallel_thread_proc_call_table_type yacco2::Parallel_thread_proc_call_table[MAX_NO_THDS];

@*2 Global Proxy arbitrator.\fbreak
Used for manual parallelism.
This is manually launched by the grammar writer's code within a grammar.
@<accrue thread code@>+=
extern
yacco2::THR 
_YACCO2_CALL_TYPE@/ 
 yacco2::AR_for_manual_thread_spawning(yacco2::Parser* Caller_pp){
 std::string ar_name("AR_yacco2");
@<iar begin@>;
@<No arbitration code present@>;
@<iar end@>;
}


@*2 No arbitration code present.\fbreak
This condition exists when the accept queue has more than 1 accept token in the queue.
What token should be accepted while the others are quitely put to heaven?
Within Yacco2, it checks when the configuration state has more than 1 thread being launched, and
there is no grammar writer code to select the winning token, before the throw code is emitted.
Determining how the select code is present is currently crude. It checks to see
that the |pp_accept_queue__| variable is present in the 
syntax directed code string: not present 
then emit the conditional wrapping of the throw condition. 
@<No arbitration code present@>=
if(Caller_pp->th_accepting_cnt__ > 1){
	char a[BUFFER_SIZE];
@.Err no arbitration code present ...@>
	yacco2::KCHARP msg = 
	"no arbitration code present in %s - accept token queue has %i > 1 tokens to arbitrate on";
	sprintf(a,msg,ar_name.c_str(),Caller_pp->th_accepting_cnt__);
	Yacco2_faulty_precondition(a,__FILE__,__LINE__);
	exit(1);
}

@*2 |worker_thread_blk| initialization: monolithic grammar.\fbreak
Part of its duties is to create the mutexs controling Yacco2's tables:
symbol and thread list. To serialize traced output, a mutex is used to throatle back
simultaneous multi-threads tracing into a single queue of buffer flush-out.
 STL does not control this.
It is at the mercy of how threads are executed
 and how the operating system tic-tacs the
clock and their output. 
Due to this whimsy of clock soundings, you can receive
from different threads interspersed mixed snippets of 
 traced code on the same line outputted.
This is why all atomic traces are bracketed by the acquire / release of the trace mutex.

The mutex creation is done by the birth of a grammar object: 
each grammar contains a |Parser| component containing a |worker_thread_blk|.
So there is no need for a special startup routine to use Yacco2's library.
@<accrue thread code@>+=
yacco2::worker_thread_blk::worker_thread_blk() // monolithic grammar
:grammar_s_parser__(0)
,status__(0),run_cnt__(1),thd_id__(0){
  static bool init_gbl(OFF);
  if(init_gbl == OFF){
	init_gbl = ON;
	CREATE_MUTEX(yacco2::TH_TBL_MU);
	CREATE_MUTEX(yacco2::TRACE_MU);
	CREATE_MUTEX(yacco2::TOKEN_MU);
	CREATE_MUTEX(yacco2::SYM_TBL_MU);
  }
}

@*2 |worker_thread_blk| initialization: threaded grammar.\fbreak
See |HP Alpha \CPLUSPLUS/ ``this'' object mis-address| describing bug. It
provides the reason for the change from
|i.push_back(this)| to
|i.push_back(&Grammar_s_parser->th_blk__)|.
|@<acquire global thread table...@>| and |@<release global thread table...@>|
are not used in this context as the grammar requesting the threads
to run has already acquired it!
@<accrue thread code@>+=
yacco2::worker_thread_blk::worker_thread_blk(yacco2::Parser* Grammar_s_parser
,yacco2::Parser* Calling_parser)// parallel grammar
:grammar_s_parser__(Grammar_s_parser)
,status__(THREAD_WAITING_FOR_WORK)
,run_cnt__(1)
,thd_id__(grammar_s_parser__->thread_entry__->thd_id__){
  status__ = THREAD_WORKING;
    Parallel_thread_list_type& i = 
Parallel_thread_table[grammar_s_parser__->thread_entry__->thd_id__];
    i.push_back(this);
  @<Trace MSG thread being created@>;
}
@*2 |set_waiting_for_work|.\fbreak
It is the running thread who
sets its own work status. 
Both |@<acquire global thread table...@>| and |@<release global thread table...@>|
are used by the running thread in their local procedures |parallel_parse_successful|
or |parallel_parse_unsuccessful|.
@<accrue thread code@>+=
void
yacco2::worker_thread_blk::set_waiting_for_work(){
@<Trace MSG thread idle before setting waiting for work@>;
  status__ = THREAD_WAITING_FOR_WORK;
@<Trace MSG thread idle after setting waiting for work@>;
}

@*2 Global shutdown of threads.\fbreak
Goes through the list of threads.
Before doing 2 passes on the table, 
the routine pauses for x seconds to let the swamp drain:
due to a single processor environment, there could still
be threads outstanding in their winddown to-wait-for-work sequence.
It then goes thru the thread list for threads waiting-for-work, these threads
are given their pink notice.

The last pause is to allow the draining of the threads' output:
flush those buffers.
The 2nd pass thru the table is a sanity check.
Any threads still outstanding are listed to Yacco2's output file |lrclog|.
This notification allows the compiler writer to check out why.
@<accrue thread code@>+=
extern void yacco2::Parallel_threads_shutdown(yacco2::Parser& PP){@/
@<acquire global thread table critical...@>;
int no_thds_to_shutdown(0);
int no_ths_exited(0);
  @<pause for x seconds@>;// let the other threads go into a wait state
  @<Threads in table to potentially shutdown@>;
  @<look for threads to shutdown@>;
  @<pause for x seconds@>;// allow the threads to close down
@<release global thread table critical region@>;
  DESTROY_MUTEX(yacco2::TH_TBL_MU);
  DESTROY_MUTEX(yacco2::TRACE_MU);
  DESTROY_MUTEX(yacco2::TOKEN_MU);
  DESTROY_MUTEX(yacco2::SYM_TBL_MU);
}

@*3 Pause for x seconds.
@<pause for x seconds@>=
#if THREAD_LIBRARY_TO_USE__ == 1
	  Sleep(1000);
#elif THREAD_LIBRARY_TO_USE__ == 0
	  sleep(1);// from guy steele c ref bk, in seconds.
#endif
   
@*3 Threads in table to potentially shutdown.\fbreak
@<Threads in table to potentially shutdown@>=
	Parallel_thread_tbl_iterator_type k = Parallel_thread_table.begin();
	Parallel_thread_tbl_iterator_type ke = Parallel_thread_table.end();
	for(;k!=ke;++k){
		Parallel_thread_list_iterator_type m = k->begin();
		Parallel_thread_list_iterator_type me = k->end();
		for(;m != me;++m){
		  ++no_thds_to_shutdown;
		}
	}
	yacco2::lrclog << "Number of threads in table to shutdown: " << no_thds_to_shutdown << __FILE__ << __LINE__<< std::endl;
	k = Parallel_thread_table.begin();
	for(;k!=ke;++k){
		Parallel_thread_list_iterator_type m = k->begin();
		Parallel_thread_list_iterator_type me = k->end();
		for(;m != me;++m){
			worker_thread_blk* tb = *m;
@<acquire trace mu@>;
		yacco2::lrclog << "worker task in table tb*: " << tb 
		<< " thread id: " 
		<< tb->grammar_s_parser__->thread_no__
		<< "::" << tb->grammar_s_parser__->thread_name()
		<< " run cnt: " << tb->run_cnt__;
		switch (tb->status__){
		case THREAD_WAITING_FOR_WORK:{
		yacco2::lrclog << " waiting for work";
		break;
		}
		case THREAD_WORKING:{
		yacco2::lrclog << " working";
		break;
		}
		case THREAD_TO_EXIT:{
		yacco2::lrclog << " thread to exit";
		break;
		}
		default:{
		yacco2::lrclog << " ??? thread status: "   << tb->status__;
		break;
		}
		}
		yacco2::lrclog	<< __FILE__ << __LINE__<< std::endl;
@<release trace mu@>;
		}
	}


@*3 Look for threads to shutdown.\fbreak
@<look for threads to shutdown@>=
	Parallel_thread_tbl_iterator_type i = Parallel_thread_table.begin();
	Parallel_thread_tbl_iterator_type ie = Parallel_thread_table.end();
	for(;i!=ie;++i){
		Parallel_thread_list_iterator_type j = i->begin();
		Parallel_thread_list_iterator_type je = i->end();
		for(;j != je;++j){
			worker_thread_blk* tb = *j;
			if(tb->status__ == THREAD_WAITING_FOR_WORK){
@<acquire trace mu@>;
				++no_ths_exited;
				yacco2::lrclog << "worker task to exit: " 
				<< tb->grammar_s_parser__->thread_no__
				<< "::" << tb->grammar_s_parser__->thread_name()
				<< " tb* " << tb 
				<< __FILE__ << __LINE__<< std::endl;
@<release trace mu@>;
	LOCK_MUTEX_OF_CALLED_PARSER(tb->grammar_s_parser__->mu__
,*tb->grammar_s_parser__," of called thread");
				tb->status__ = THREAD_TO_EXIT;
				PP.post_event_to_requesting_grammar(*tb->grammar_s_parser__,Shutdown,PP);
			}
			else{
@<acquire trace mu@>;
				yacco2::lrclog << "worker task not shutting down: " 
				<< tb->grammar_s_parser__->thread_no__
				<< "::" << tb->grammar_s_parser__->thread_name()
				<< " tb* " << tb
				<< " status: " <<  tb->status__
				<< __FILE__ << __LINE__<< std::endl;
@<release trace mu@>;
			}
		}
		i->clear();
	}
  Parallel_thread_table.clear();
	yacco2::lrclog << "Number of threads in table exiting: " 
	<< no_ths_exited << " number of threads not shutting down: "
	<< no_thds_to_shutdown - no_ths_exited  << __FILE__ << __LINE__<< std::endl;


@*2 |Caccept_parse| Structure --- Accept result from threads.\fbreak
Ahh, the smell of ??? Go tell it to cm. 
Jess the reality show syndrome. This message gets put into the accept queue
of the requesting pp.
This is a potential winner requiring the arbitrator to decide.
Lets hope the judge is not of
TVQ `star acadamie' tabloids variety.

Changed the |accept_queue| from a mapped sturcture of keyed by 
the accept terminal's enumeration id
to one of sequential list of local |Caccept_parse|.
As non-determinism is small: potentially 2 or 3 occassionally Tes in 
the queue, i felt the sequential
attitude appropriate instead of a mapped structure.
The big improvement is to remove malloced |Caccept_parse| and use the copy into the
local |Caccept_parse| of the accept queue.
@<Struct...@>+=
struct Caccept_parse{
   Caccept_parse
           (yacco2::Parser& Th_reporting_success@/
           ,yacco2::CAbs_lr1_sym& Accept_token@/
           ,yacco2::UINT Accept_token_pos@/
           ,yacco2::CAbs_lr1_sym& La_token@/
           ,yacco2::UINT La_token_pos);
   Caccept_parse();
   void initialize_it();
   void fill_it(Caccept_parse& Accept_parse);
   void fill_it(yacco2::Parser&    Th_reporting_success@/
           ,yacco2::CAbs_lr1_sym& Accept_token@/
           ,yacco2::UINT Accept_token_pos@/
           ,yacco2::CAbs_lr1_sym& La_token@/
           ,yacco2::UINT La_token_pos);
 
   ~Caccept_parse();
   yacco2::Parser*   th_reporting_success__;
   yacco2::CAbs_lr1_sym* accept_token__;
   yacco2::UINT accept_token_pos__;
   yacco2::CAbs_lr1_sym* la_token__;
   yacco2::UINT la_token_pos__;
};

@*2 |Caccept_parse| and |~Caccept_parse| implementation.\fbreak
@<accrue thread code@>+=
yacco2::Caccept_parse::@/
Caccept_parse@/
           (yacco2::Parser&    Th_reporting_success@/
           ,yacco2::CAbs_lr1_sym& Accept_token@/
           ,yacco2::UINT Accept_token_pos@/
           ,yacco2::CAbs_lr1_sym& La_token@/
           ,yacco2::UINT La_token_pos){
  th_reporting_success__ = &Th_reporting_success;
  accept_token__  = &Accept_token;
  accept_token_pos__ = Accept_token_pos;
  la_token__ = &La_token;
  la_token_pos__ = La_token_pos;
}

yacco2::Caccept_parse::@/
Caccept_parse(){
  th_reporting_success__ = 0;
  accept_token__  = 0;
  accept_token_pos__ = 0;
  la_token__ = 0;
  la_token_pos__ = 0;
}
void yacco2::Caccept_parse::initialize_it(){
  th_reporting_success__ = 0;
  accept_token__  = 0;
  accept_token_pos__ = 0;
  la_token__ = 0;
  la_token_pos__ = 0;

}

void yacco2::Caccept_parse::fill_it(Caccept_parse& Accept_parse){
  th_reporting_success__ = Accept_parse.th_reporting_success__;
  accept_token__  = Accept_parse.accept_token__;
  accept_token_pos__ = Accept_parse.accept_token_pos__;
  la_token__ = Accept_parse.la_token__;
  la_token_pos__ = Accept_parse.la_token_pos__;
}

void yacco2::Caccept_parse::fill_it@/
           (yacco2::Parser&    Th_reporting_success@/
           ,yacco2::CAbs_lr1_sym& Accept_token@/
           ,yacco2::UINT Accept_token_pos@/
           ,yacco2::CAbs_lr1_sym& La_token@/
           ,yacco2::UINT La_token_pos){
  th_reporting_success__ = &Th_reporting_success;
  accept_token__  = &Accept_token;
  accept_token_pos__ = Accept_token_pos;
  la_token__ = &La_token;
  la_token_pos__ = La_token_pos;
}

yacco2::Caccept_parse::
~Caccept_parse(){
}

@** Thread code for arbitrator, and parallel parse.\fbreak
The emitted files become the include files for the emitted threads and each
finite automton's arbitrator.
For the parallel parse thead, this is the core code loops that make it tick.
The arbitrator code is the two pieces of bread that sandwich the
grammar writer's selection code supplied from the |arbitrator-code| construct.
The produced files  are:\fbreak
\ptindent{1) |wpp_core.cpp| --- parallel parser include code for generated pp threads}
\ptindent{2) |war_begin_code.h| --- arbitrator's start code}
\ptindent{3) |war_end_code.h| --- arbitrator's end code}
 
@*2 Arbitrator code generator --- begin and end files: |war_xxx_code.h|.\fbreak
The emitted code is the |pp_accept_queue|'s iteration to walk thru the potential
tokens for consideration produced by the parallel threads inserted 
into the requesting grammar's accept queue.
It is structured into 2 parts:\fbreak
\ptindent{1) the startup variables to iterate thru the accept queue} 
\ptindent{2) the ending code of the iteration}

Sandwiched between these 2 pieces of code is the arbitration logic supplied
by the grammar writer that gets emitted for that specific
state's configuration. 
Normally there is no code as the parallel request is deterministic with
 at most only one token returned by one of the launched threads. 

@*2 Arbitrator begin code.\fbreak 
This is injected into the emitted arbitrators produced by Yacco2.
The grammar writer's code follows this code.
It is the discrimatory code used to select the winning accept terminal
within the accept queue.

Arbitration is needed when there are competing parallel parses that
return their accept terminals.
A single entry only is checked first and returned before going into the 
arbitrated code selection.
A sanity check is done on the accept queue whereby the accepted
thread count {\bf must equal} the number 
of accepted tokens placed into the queue.

The |Caller_pp| variable is the passed Parser pointer argument 
to the arbitration routine. 
It is the parser's context that includes the its critcal region
supporting threading and the accept queue. 
Arbitration routine(s) generated out of the grammar
have the following naming convention:\fbreak
\ptindent{|AR_| concatenated with the rule name}
An example of a routine is:\fbreak
\ptindent{yacco2::THR |_YACCO2_CALL_TYPE| |NS_pass3::AR_Rtok|(yacco2::Parser* |Caller_pp|);}
The |_YACCO2_CALL_TYPE| is an internal definition specific to Microsoft call
types. It is defined as |__stdcall| whereas in the other supported platforms it's
value is empty.
@(war_begin_code.h@>=
@<copyright notice@>;
  @<pp accept queue |war_begin_code|@>;

@ @<pp accept queue |war_begin_code|@>=
  @<uns@>;
  int i = 1;
  int ie = Caller_pp->th_accepting_cnt__;
  @<Trace AR trace the starting of arbitration@>;

@*3 Example of arbitrated grammar code.\fbreak
The accept queue is sequentially searched in arbitrating on the enumerated id
 of the potential accepting Tes.
The following example only gets executed when there are 2 or more
accepting terminals in the queue.
In this example, there are 2 independent parallelisms going on:\fbreak
\ptindent{keyword versus identifier}
\ptindent{floating point versus integer}
They never intersect!
\let\setuplistinghook = \linenumberedlisting
\listing{"/usr/local/yacco2/diagrams/arbitrator.txt"}
\fbreak
Lines 11 and 12 above show 3 things:\fbreak
\ptindent{1) |i| is the subscript to accept parse array's current contents}
\ptindent{2) |Caller_pp| (Parser*) points to the critical region of the grammar}
\ptindent{3) |pp_accept_queue__| contains the parallel results from the threads}
The decision code only gets executed if there are 2 or more
terminals placed into the accept queue for arbitration.
This case is very rare but the above 
example illustrates dealing with non-determinism from 2 or more successful parallel
parses. How can this come about?: Subset - superset --- common prefixes. 
The example gives 2 examples of this that are tested for. 
The integer recognizes the whole number while the floating point continues with the fraction.
One can argue that the grammar strategy was not very refined as the lookahead
on the integer should not accept ``.''. 
You're right but this example is instructive and it was drawn from a real
translator that was put together quickly. The moral is: u can be inefficient but 
effective with
non-determinism.

Note, the items placed into the accept queue
can contain error terminals forwarded to the calling grammar to do its own
abort sequence.

@*2 Arbitrator end code.\fbreak
Closes the iteration thru the accept queue.
Originally i optimized injection code in case the grammar writer missed selecting
the accepted T. This code was dependent on whether the specific
state had multiple threads to launch.
Now for clarity i have included a stopper procedure before the |arbitrated_parameter|
label
whereby it spews the gory details for the grammar writer's logic correction:
Competing threads within the grammar have their names displayed
while a thread with a ``NULL'' name is not a competing thread
but allows one to be specific to an accepting token 
returned by one of the named threads.

Where is the accept queue drained of its contents?
As potential terminals for arbitration are birthed from malloc (new),
their sending to heaven should be epiphaned by ``delete''.
This is done by the generic Parser code just after the call to the ``Arbitrator''. 
This is a code-bloat diet: Putting this in each generated arbitrator routine
across all grammars would have been fat people community 
like the works of Spanish sculptor/painter Botero. 

@(war_end_code.h@>=
@<copyright notice@>;
  @<pp accept queue |war_end_code|@>;


@ @<pp accept queue |war_end_code|@>=
Caller_pp->abort_no_selected_accept_parse_in_arbitrator();
arbitrated_parameter:@/
  Caller_pp->arbitrated_token__ = &Caller_pp->pp_accept_queue__[i];
  Caller_pp->pp_accept_queue_idx__ = i;
  @<Trace AR stopped arbitrating@>;
  return (THR)1;

@*2 Parallel thread code: injection code for emitted pp |wpp_core.h|.\fbreak
This is the injector code for the manufactured parallel thread.
Drawn from the just created file |wpp_core.h|.
If it has been launched as a thread, ``waiting-for-work'' has been 
removed from the run loop and placed in
the responding |parallel_parse_successful| and |parallel_parse_unsuccessful| procedures.
This is an optimization: Ahhh the dragon trace of threading...

Even better is the check as to calling it as a thread or as a procedure.
This depends on the number of threads to launch. If there is only one thread to run,
this is called as a procedure instead of a thread. Do u see the friskiness in Yacco2?
Well no, as threads now dominate.

Please see ``Notes to myself'' on running diatribe regarding optimization.
@(wpp_core.h@>=
@<copyright notice@>;
@<uns@>;
  @<create communication variables@>;
  @<create parser related variables and set them@>;
  @<set parameter passed to pp as a message@>;
do{
  @<establish initial parser's token setting@>;
  @<Trace pp start info@>;
  @<let's parallel parse. do u?@>;    
  @<Trace stop of parallel parse message@>;
  @<clean up parse stack but leave as ready to parse again@>;
  @<house clean the parser and local communication variables@>;
  @<Trace parallel thread waiting-to-do-work@>;
  @<pp wait for work or shutdown message@>;
  @<Trace pp received go start working message@>;
  }while(pp_parser.th_blk__.status__ != THREAD_TO_EXIT);
finished_working:@/
@<winddown duties of pp@>;
@<Trace pp finished working@>;
  UNLOCK_MUTEX_OF_CALLED_PARSER(pp_parser.mu__,pp_parser," of called thread");
  return (THR)1;

@ Winddown duties of pp.
@<winddown duties of pp@>=
  pp_parser.clear_parse_stack();

@ Pp wait for work or shutdown message.
@<pp wait for work or shutdown message@>=
    pp_parser.wait_for_event();
    
@ House clean the parser and local communication variables.\fbreak
Their procedure calls replaced for speed.
@<house clean the parser and local communication variables@>=
    pp_parser.use_all_shift__ = ON;
    pp_parser.abort_parse__ = OFF;
    pp_parser.stop_parse__ = OFF;
    pp_parser.has_questionable_shift_occured__ = OFF;
    
@ Clean up parse stack but leave as ready to parse again.
The following points are done:\fbreak
\ptindent{1) clean up trace activity: normally done when parse object destroyed}
\ptindent{2) leave first record on stack for efficiency}
\ptindent{3) make sure first stack symbol on stack checked for delete attribute} 
@<clean up parse stack but leave as ready to parse again@>=
    pp_parser.remove_from_stack(pp_parser.parse_stack__.top_sub__ - 1);
    CAbs_lr1_sym* sym = pp_parser.top_stack_record()->symbol__;
    if (sym != 0){
      if (sym->auto_delete__ == ON){ 
        @<Trace pp's last symbol on stack set as autodelete@>;
        delete sym;
      }
      pp_parser.top_stack_record()->set_symbol(0);// keeping a clean stack
    }
    pp_parser.parse_stack__.lr_stk_init(*pp_parser.fsm_tbl__->start_state__);

@ Let's parallel parse. do u?.    
@<let's parallel parse. do u?@>=    
  pp_parser.parallel_parse();

@ Establish initial parser's token setting.
   When the thread is established
   and waiting to be wakenned, the calling grammar sets the following variables within the
   critical region of the called thread: 
   |from_thread__|, |pp_requesting_parallelism__|,  and |no_competing_pp_ths__|.
@<establish initial parser's token setting@>=
    pp_parser.override_current_token(*pp_parser.pp_requesting_parallelism__->current_token()
    ,pp_parser.pp_requesting_parallelism__->current_token_pos__);
    pp_parser.set_start_token(*pp_parser.pp_requesting_parallelism__->current_token());
    pp_parser.set_start_token_pos(pp_parser.pp_requesting_parallelism__->current_token_pos__);
    pp_parser.top_stack_record()->set_symbol(pp_parser.current_token());
    pp_parser.token_supplier__ = pp_parser.pp_requesting_parallelism__->token_supplier__;
    pp_parser.token_producer__ = pp_parser.pp_requesting_parallelism__->token_producer__;
    pp_parser.error_queue__ = pp_parser.pp_requesting_parallelism__->error_queue__;
    pp_parser.recycle_bin__ = pp_parser.pp_requesting_parallelism__->recycle_bin__;
    pp_parser.sym_lookup_functor__ = pp_parser.pp_requesting_parallelism__->sym_lookup_functor__;
    pp_parser.supplier_r_w_cnt__ = pp_parser.pp_requesting_parallelism__->supplier_r_w_cnt__;

if(pp_parser.th_blk__.grammar_s_parser__ != &pp_parser){
    char a[BUFFER_SIZE];
@.Err parser's thd blk's pp addr !...@>
	yacco2::KCHARP msg = "parser's thd blk's pp addr != itself thd: %i::%s";
	sprintf(a,msg,pp_parser.thread_no__,pp_parser.thread_name());
	Yacco2_faulty_precondition(a,__FILE__,__LINE__);
	exit(1);
}
if(pp_parser.th_blk__.grammar_s_parser__->pp_requesting_parallelism__
 != pp_parser.pp_requesting_parallelism__){
    char a[BUFFER_SIZE];
@.Err caller pp addr |!=| in calle...@>
	yacco2::KCHARP msg = 
"caller's pp addr not = in called parser's thd blk ptr, and its parser thd: %i::%s";
	sprintf(a,msg,pp_parser.thread_no__,pp_parser.thread_name());
	Yacco2_faulty_precondition(a,__FILE__,__LINE__);
	exit(1);
}
@ Create communication variables.
@<create communication variables@>=
  char ma[SMALL_BUFFER_4K];
  const char* pp_start = "YACCO2_MSG__::%i::%s start parsing\n";
  const char* pp_stop = "YACCO2_MSG__::%i::%s stop parsing\n";
@<uns@>;
  
@ Set parameter passed to pp as a message.
@<set parameter passed to pp as a message@>=
   pp_parser.pp_requesting_parallelism__ = Caller_pp;
   pp_parser.from_thread__ = Caller_pp;
   pp_parser.no_competing_pp_ths__ = Caller_pp->no_requested_ths_to_run__;

@ Create parser related variables and set them.
@<create parser related variables and set them@>=
  Parser pp_parser(ssPARSE_TABLE,pp_thread_entry,Caller_pp);

@*2 Procedure call: injection code for emitted pp |wproc_pp_core.h|.\fbreak
This is the injector code for the manufactured called procedure instead of a thread.
Even better is the check as to calling it as a thread or as a procedure.
This depends on the number of threads to launch. If there is only one thread to run,
this is called as a procedure instead of a thread. Do u see the friskiness in Yacco2?
Well no, as threads now dominate.

Added improvements:\fbreak
A \TRAshift{} contruct has been added to do chained
procedure calls: the 1st thread's returned T becomes the chained T
for the next (chained) procedure call.
 I overloaded this symbol to support 2 contexts: \Olinker and chained 
parsing calls. Why the overload? I only have 8 symbols reserved
for the |LRk| symbol class and one context does not interfer with the other so
i'm a bit lazy to possibly remove |eof| and double duty |eog| symbol where the
file processing container templates us |eof|.
Some parsing adjustments must be
added to link the chained T with the chained procedure call
as the the chained procedure must reference the shifted T 
of the calling parser as its start T and not the current T of the calling parser.
|proc_call_funct__| has been added to the State's definition to support the chained call.
@(wproc_pp_core.h@>=
@<copyright notice@>;
@<uns@>;
  @<create procedure communication variables@>;
  @<set procedure parameter passed to pp as a message@>;

  @<establish procedure initial parser's token setting@>;
  @<Trace procedure pp start info@>;
  @<let's procedure parallel parse. do u?@>;    
  @<clean up procedure parse stack but leave as ready to parse again@>;
  @<house clean procedure the parser and local communication variables@>;
  
finished_working:@/
@<winddown duties of procedure pp@>;
@<Trace procedure pp finished working@>;
  return rslt;

@ Winddown duties of procedure pp.
@<winddown duties of procedure pp@>=
  proc_parser->clear_parse_stack();
    
@ House clean procedure the parser and local communication variables.
@<house clean procedure the parser and local communication variables@>=
    proc_parser->set_use_all_shift_on();
    proc_parser->set_abort_parse(OFF);
    proc_parser->set_stop_parse(OFF);
    proc_parser->has_questionable_shift_occured__ = OFF;
    
@ Clean up procedure parse stack but leave as ready to parse again.
The following points are done:\fbreak
\ptindent{1) clean up trace activity: normally done when parse object destroyed}
\ptindent{2) leave first record on stack for efficiency}
\ptindent{3) make sure first stack symbol on stack checked for delete attribute} 
@<clean up procedure parse stack but leave as ready to parse again@>=
    proc_parser->remove_from_stack(proc_parser->parse_stack__.top_sub__ - 1);
    CAbs_lr1_sym* sym = proc_parser->top_stack_record()->symbol__;
    if (sym != 0){
      if (sym->auto_delete__ == ON){ 
        @<Trace procedure pp's last symbol on stack set as autodelete@>;
        delete sym;
      }
      proc_parser->top_stack_record()->set_symbol(0);// keeping a clean stack
    }
    proc_parser->parse_stack__.lr_stk_init(*proc_parser->fsm_tbl__->start_state__);
@ Let's procedure parallel parse. do u?.    
@<let's procedure parallel parse. do u?@>=    
 THR_result rslt = proc_parser->parallel_parse();

@ Establish procedure parser's initial token setting.
   When the thread is established
   and waiting to be wakenned, the calling grammar sets the following variables within the
   critical region of the called thread: 
   |from_thread__|, |pp_requesting_parallelism__|,  and |no_competing_pp_ths__|.

Distinguish between chained procedure call and just a plain old thread call
optimized by a procedure call. The chained T is the Caller parser's previous
``go to'' state.
Its current token position is the tail character of the stacked T
as the caller parser's current token context is the lookahead token and
 position returned from the called thread.  
@<establish procedure initial parser's token setting@>=
    if(Caller_pp->top_stack_record()->state__->proc_call_addr__ == 0){// regular proc call
      proc_parser->override_current_token(*Caller_pp->current_token()
      ,Caller_pp->current_token_pos__);
      proc_parser->set_start_token(*Caller_pp->current_token());
      proc_parser->set_start_token_pos(Caller_pp->current_token_pos__);
      proc_parser->top_stack_record()->set_symbol(proc_parser->current_token());
    }else{//chained proc call
      Cparse_record* pr = // curr stk pos is rel. 1 but access is rel to 0 UGH!
            Caller_pp->get_stack_record(Caller_pp->current_stack_pos()-2); 
      int new_pos = Caller_pp->current_token_pos__-1;
      proc_parser->override_current_token(*pr->symbol__,new_pos);
      proc_parser->set_start_token(*pr->symbol__);// chained T
      proc_parser->set_start_token_pos(new_pos);
    }
    proc_parser->token_supplier__ = Caller_pp->token_supplier__;
    proc_parser->token_producer__ = Caller_pp->token_producer__;
    proc_parser->error_queue__ = Caller_pp->error_queue__;
    proc_parser->recycle_bin__ = Caller_pp->recycle_bin__;
    proc_parser->sym_lookup_functor__ = Caller_pp->sym_lookup_functor__;
@ Create procedure communication variables.
@<create procedure communication variables@>=
  char ma[SMALL_BUFFER_4K];
  const char* pp_start = "YACCO2_MSG__::PROC::%i::%s start parsing\n";
  const char* pp_stop = "YACCO2_MSG__::PROC::%i::%s stop parsing\n";
@<uns@>;
  
@ Set procedure parameter passed to pp as a message.
@<set procedure parameter passed to pp as a message@>=
   proc_parser->pp_requesting_parallelism__ = Caller_pp;
   proc_parser->launched_as_procedure__= true;
   proc_parser->from_thread__ = Caller_pp;
   proc_parser->no_competing_pp_ths__ = Caller_pp->no_requested_ths_to_run__;


@** Determine threads to launch by their first sets.\fbreak
As an optimization before launching the thread, 
the thread's first set is checked to see if the start token, or the meta terminals
\ALLshift{} and \INVshift{}  are present. 
Why are the meta terminals checked?
\ALLshift{} is the `all shift' terminal
used as a wild terminal facilty; it handles all terminals so even though the
start token is not found in the first set, the wild token faclity indicates its presence.
I do not check to see if the finite state automaton's ``all shift'' facility is on.
Its presence in the first set is sufficient: testing the
grammar's finite automaton to see if this facility is turned off is enough paranoia.


What about \INVshift{} the invisible shift meta terminal?
In this case it denotes  an epsilon rule within
the start state configuration of the grammar so you better launch the thread as you
do not know what's happening past that point when the token stream is being consumned.
Yacco2's linker goes through this transient chain of first sets: internal
discovery of what's after the \INVshift{} be it internal or 
external first sets from called threads.
I should rely on the first set but as a precaution,
I err to try it and if it doesn't work so what. It's a bit of overhead but at least it's
better then not trying out the thread and 
having an irrate grammar writer to deal with.
This type of grammatical situation is very rare but still needs checking.

This is a major optimization! The ``pp'' grammar checks in its parallel
table list for the eligible threads that have the current terminal in their |first set|.
If found, the parallel entry for those threads are added to the 
potential thread list. Only then does the parallel parse 
launch the threads.
By absorbing the optimization into the ``pp'' thread it eliminates
false thread starts.
Now it's zippy-do-da. Do u hear the sirens? 
Hey u putting jell in y're hair?: Not zippy or whatever adjective or adverb expressed.

Take ...

@<External rtns and variables@>+=
extern void find_threads_by_first_set
(yacco2::USINT Current_T_id
,yacco2::yacco2_threads_to_run_type& Th_list
,yacco2::State_s_thread_tbl& P_tbl);

@*2 |find_threads_by_first_set|.\fbreak
Work the global optimization of first sets and Terminals: See Yacco2's Linker.
State's thread list against the T's thread list.
@<accrue thread code@>+=
extern void yacco2::find_threads_by_first_set
(yacco2::USINT Current_T_id
,yacco2::yacco2_threads_to_run_type& Th_list
,yacco2::State_s_thread_tbl& P_tbl){
  yacco2::thread_array_record* thds = (yacco2::thread_array_record*)yacco2::THDS_STABLE__;
  @<determine if there is a bit map gened for state. no do it@>;
  @<define and set work variables of Terminal having threads @>;
  @<define and set state's dynamic work variables@>;
  @<search T's thd ids against State's thd id list...@>;
}

@*2 Determine if there is a bit map gened for state. no do it.\fbreak
As the grammar's state configuration is gened locally and has no knowledge
about the global number of threads, its configuration has an indirection towards
the thread entry having a pre-agreed to naming convention of
the letter ``I'' concatenated with the thread name without its namespace. 
For example |ITH_eol| would be the global thread entry object for the ``eol'' grammar.

To make the thread launching efficient, a thread id bit map is used and searched.
Cuz the state has just a list of |Thread_entry| pointers, this must be converted into the
global bit map configuration. This is done per parallelism request.
To offset each hit, the state's configuration contains a pointer for this dynamicly composed
environment. 
As threads are more efficient than procedure calls, this is a one time inefficiency per state
being gened on the fly.
Now why again are threads more efficient?
Cuz of objects and their rights of passage: Too much start-run-cleanup.
@<determine if there is a bit map gened for state. no do it@>=
	static int no_of_gbl_thds(0);
	static int no_bit_mapped_words(0);
	static bool one_time(false);
	if(one_time == false){
		one_time = true;
		no_of_gbl_thds = thds->no_entries__;
		div_t x = div(no_of_gbl_thds,BITS_PER_WORD);
		if(x.rem != 0) ++x.quot;
	no_bit_mapped_words = x.quot;
	}
	if(P_tbl.thd_id_bit_map__ == 0){
		@<define and set work variables of state threading table@>;
		yacco2::ULINT (*maps) = (yacco2::ULINT (*))yacco2::BIT_MAPS_FOR_SALE__;
		P_tbl.thd_id_bit_map__ = (yacco2::ULINT (*))&maps[yacco2::BIT_MAP_IDX__];
		yacco2::BIT_MAP_IDX__+= no_bit_mapped_words;
		if(yacco2::BIT_MAP_IDX__ > yacco2::TOTAL_NO_BIT_WORDS__){
		  char a[BUFFER_SIZE];
@.Err no more bit maps: in Linker @>
		  yacco2::KCHARP msg = "Err no more bit maps: %i; adjust TOTAL_NO_BIT_WORDS in Linker";
		  sprintf(a,msg,yacco2::BIT_MAP_IDX__);
		  Yacco2_faulty_precondition(a,__FILE__,__LINE__);  
	exit(1);
		} 
		div_t dd;
	    for(;S_no_thd_entries > 0;--S_no_thd_entries,++S_cur_thread_entry_ptr){
          yacco2::USINT S_thd_id = (*S_cur_thread_entry_ptr)->thd_id__;
	      dd = div(S_thd_id,BITS_PER_WORD);
          ULINT bit_pos_value = 1 << dd.rem;
		  P_tbl.thd_id_bit_map__[dd.quot] |= bit_pos_value;
	    }
	}

@ Define and set state's dynamic work variables.
@<define and set state's dynamic work variables@>=
  yacco2::ULINT S_cur_thd_id_map = P_tbl.thd_id_bit_map__[0];
  
@ Define and set work variables of state threading table.
@<define and set work variables of state threading table@>=
  yacco2::Thread_entry** S_cur_thread_entry_ptr = (yacco2::Thread_entry**)&P_tbl.first_entry__;
  yacco2::USINT S_no_thd_entries = P_tbl.no_entries__;

@ Define and set work variables of Terminal having threads.
@<define and set work variables of Terminal having threads @>=
  yacco2::thd_ids_having_T* T_cur_thd_id_having_T_ptr;
  yacco2::ULINT T_cur_thd_id_map;  
  T_array_having_thd_ids* t_array_having_thd_ids = 
  (T_array_having_thd_ids*)yacco2::T_ARRAY_HAVING_THD_IDS__;
  T_cur_thd_id_having_T_ptr = t_array_having_thd_ids->first_entry__[Current_T_id];
  T_cur_thd_id_map = T_cur_thd_id_having_T_ptr->first_thd_id__[0];
 
@ Search T's thread ids against the State's thread entry list. fnd add to thread list.
This is a linear search of segments. 
It is worked like a merge between two variable length
lists of points. Its cost is linear bounded depending where the state's 
thread ids are relative to T's thread ids: before, within, or after. 
This linear bound can be 1 to the
number of items in the largest list.
 
Both meta terminals \ALLshift{} and \INVshift{} first sets get
generated in Yacco2's linker.
It is much more efficient to go thru a State and T list once.
The expense is to explode the  \ALLshift{} meta terminal into all the terminals.
This should be a rare occurance to have a thread's first set contain this
meta terminal.

Bit maps are used: lets hear it for compression and possibly speed.
To extract more speed, the inline assembler directive is used when developed on
a Microsoft environment for the Intel 486 chipset.
Without it, the bit map strategy is slower than the linear list.
For the moment |@<extract thread ids from map and add their |thread_entry| to thread list@>|
is the portable piece of code until I improve the runtime strategy.
@<search T's thd ids against State's thd id list. fnd add to-run thread list@>=
  int base_idx_for_thd_id_calc(0);
  int cur_bit_word_idx(0);
  do{
	yacco2::ULINT bit_map = T_cur_thd_id_map & S_cur_thd_id_map;
	if(bit_map != 0){
      base_idx_for_thd_id_calc = cur_bit_word_idx*BITS_PER_WORD;
	  @<extract thread ids from map and add their |thread_entry| to thread list@>;
	}
	++cur_bit_word_idx;
	T_cur_thd_id_map = T_cur_thd_id_having_T_ptr->first_thd_id__[cur_bit_word_idx];
	S_cur_thd_id_map = P_tbl.thd_id_bit_map__[cur_bit_word_idx];
  } while (cur_bit_word_idx < no_bit_mapped_words);

@ Extract thread ids from map and add their |thread_entry| to thread list.
Now the fun begins. What threads are to be run. The bits must be tested individually
and their bit position converted into the their bit map vector co-ordinates:
quotient * 32 + bit position.

For example, word 0, bit position 0 is thread id 0. Word 1 bit position 0 is thread id 32. 
@<extract thread ids from map and add their |thread_entry| to thread list@>=
  yacco2::ULINT bit(1);
  for(int bit_pos=0;bit_pos <= BITS_PER_WORD_REL_0;++bit_pos){
    if(bit_map & bit){
		@<add thread entry whose first set contains the current token@>;
	}
	bit <<= 1; // next bit: rt to left order; insignificant to significant order
  }

@ Add thread entry whose first set contains the current token.
@<add thread entry whose first set contains the current token@>=  
  yacco2::USINT thd_id =  base_idx_for_thd_id_calc + bit_pos;
  Th_list.push_back(thds->first_entry__[thd_id]);

@ Ms Intel 486 Assembler extract thread ids from map and add their |thread_entry| to thread list.
@<Ms Intel 486 assembler extract ids from map and add their |thread_entry| to thread list@>=
 yacco2::Thread_entry*(*pte)[]= &thds->first_entry__;
 yacco2::Thread_entry* te;
  __asm {@/
  pushad@/
  mov ebx,pte; // addr of thread stable[] of thread entries
  mov esi,bit_map; // copy of bit map
  mov edi,base_idx_for_thd_id_calc;@/
scn_bits:  bsf eax,esi; // aex: idx of bit, esi: copied map to search
  jz end_of_scan; // map completely scanned
  btr esi,eax; // clear the fnd bit in map esi: the bit map, eax: the fnd bit pos to turn off
  add eax,edi; // calced thd id
  mov edx,[ebx][eax*4];// fetch addr of thread entry
  mov te,edx;// store the thread entry address
}@/
  Th_list.push_back(te);@/
  __asm {@/
    jmp scn_bits; // go scan more bits
  }@/
  end_of_scan:@/
  __asm {@/
      popad; // clean up the dodos
	}