perm filename BITS.SAI[1,LMM] blob sn#131277 filedate 1974-11-17 generic text, type C, neo UTF8
COMMENT ⊗   VALID 00005 PAGES
C REC  PAGE   DESCRIPTION
C00001 00001
C00002 00002	begin  "pattern finder"
C00006 00003	!  		Set manipulation routines 		
C00009 00004	 ! This is the main procedure of the program.  It is given two sets A and B
C00015 00005	str BESTREE int BESTCOST
C00018 ENDMK
C⊗;
begin  "pattern finder"

require 1000 system!pdl;
require "⊂⊃" delimiters;
define !=⊂ COMMENT ⊃;
define ref=⊂ REFERENCE ⊃;
define proc=⊂ PROCEDURE ⊃;
define int=⊂ INTEGER ⊃;
define str=⊂ STRING ⊃;
define crlf=⊂ ('15&'12) ⊃;

int DUMMY;

int NUMBITS,NODECNT,ALL; ! NUMBITS is the number of bits in the input.
			  2*NUMBITS is the number of data items.
			  NODECNT is a counter of the number of
			   times the main procedure is called
			  ALL is a word with bits 1 to NUMBITS turned on;

str ITSA, ITSB;       ! The tree is stored as a string (since in SAIL strings
                        are the only easily manipulatable variable length data
                        types).  A decision tree can be defined by:

			   <tree>::= IT'S A | IT'S B |
				      if <bit> then <tree> else <tree>.

			The string representation is thus
			   <treerep>::= NUMBITS+1 | NUMBITS+2 |
					<bitnumber>&<treerep>&<treerep>

			NUMBITS+1 is the code for "ITSA" and
			NUMBITS+2 for "ITSB" (since those numbers
			are never legal bit numbers).

			see the procedure OUTREE for the decoding algorithm;


!					I/O routines		;
str INFILE; int INJFN, OUTJFN;

define SAY(A)=⊂ out(OUTJFN, A & CRLF) ⊃;

str proc ASK(str Q);
  if INFILE="TTY:" then
	begin outstr(Q); return(intty) end
   else return(sini(INJFN,200,'12));

str proc CONVERT(int BITNUM); 
    begin 	! Convert bit number counting from right
		  to 2 char column number, counting from left ;
	BITNUM←NUMBITS+1-BITNUM;
	return ((if BITNUM<10 then " " else NULL) & CVS(BITNUM))
    end;

recursive proc OUTREE(int LEVEL; ref str TREE);
       begin int I,V;
	     V←LOP(TREE);
	     if      V=ITSA then say("it's A")
	     else if V=ITSB then say("it's B")
	     else begin
		out(outjfn,CONVERT(V) & " on  => ");
		OUTREE(LEVEL+1,TREE);
		for I←1 step 1 until LEVEL do out(outjfn,"          ");
		out(outjfn,CONVERT(V) & " off => ");
		OUTREE(LEVEL+1,TREE);
	      end;
       end;

!  		Set manipulation routines 		;

define RESTBITS(X)=⊂ (X land (X-1)) ⊃; ! X with the low order 1 bit turned off;

define FIRSTBIT(X)=⊂(X xor RESTBITS(X))⊃; ! word with low order bit of X only;

define ONBIT(X)=⊂ (1 lsh (X-1)) ⊃;       ! word with bit number X turned on ;

define NOTEMPTY(X)=⊂ (X≠0) ⊃;
define EMPTY(X)=⊂ (X=0) ⊃;		 ! for notational convenience;

simple int proc SIZE(int X);      ! Returns the number of bits in X which are on;
	begin int R;
	 R←0;
	 while NOTEMPTY(X) do
	   begin X←RESTBITS(X); R←R+1 end;
	 return(R);
	end;

!		Open files and get the number of bits 	;

int STARTIM;

outstr("OUTPUT TO: "); OUTJFN←openfile(NULL,"WC");
outstr("INPUT FROM: "); INFILE←jfns(INJFN←openfile(NULL,"ROC"),0);

STARTIM←runtm('400000,DUMMY);
NODECNT←0;

NUMBITS←cvd(ASK("SIZE? "));

begin "inner block"
safe int array ONA,ONB[1:NUMBITS];
        ! ONA[i] is a set representation of those elements of A which have bit I
        turned on - similarly for ONB and B.  By splitting up the representation
        into separate words for A and B, we can fit the problem into the PDP10's
        36 bit words (20 bits for A and 20 for B);

proc INITIALIZE;
 begin "init"			Initialize ON arrays from data		;
  int INSET,J,I;
  ALL←(1 lsh NUMBITS)-1;
  ITSA←NUMBITS+1; ITSB←NUMBITS+2;
  for I←1 step 1 until NUMBITS do
   ONA[I]←ONB[I]←0;
  for I←1 step 1 until NUMBITS do
   begin
    INSET←cvo(ASK("A ?"));
    for J←1 step 1 until NUMBITS do
	if EMPTY(ONBIT(J) land INSET) then ONA[J]←ONA[J] lor ONBIT(I);
    INSET←cvo(ASK("B ?"));
    for J←1 step 1 until NUMBITS do
 	if EMPTY(ONBIT(J) land INSET) then ONB[J]←ONB[J] lor ONBIT(I);
    end;
  end "init";
 ! This is the main procedure of the program.  It is given two sets A and B
   (subsets of the original A and B respectively) and a set BITS of those bits
   which can be looked at to distinguish them.  SIZES is the size of A + the size
   of B.  COST is the "cost" of reaching this point in the tree (initially zero).
   BEST is the cost of the best tree found so far (initially infinity).  If a
   better tree than BEST is found, it returns the cost of that tree (to which COST
   has been added) and resets BESTREE to a representation for that tree. If no
   better tree is found, it just returns BEST and leaves BESTREE alone;

recursive int proc MINTREE(int A,B,SIZES,BITS,COST,BEST;ref str BESTREE);
 begin "mintree"
	NODECNT←NODECNT+1;	
	if COST ≥ BEST then return(BEST);	
	if EMPTY(A) or EMPTY(B) then
	 begin		  ! one of the sets is empty. Thus the added cost is 0,
			    and the "tree" is a single node;
	      BESTREE←(if EMPTY(A) then ITSB else ITSA);
	      return (COST);
	 end;
	begin "recur"
	     int BIT,BITNO,NEWB,REST,ADDED,ONSIZ,AON,AOFF,BON,BOFF,C,D; 
	     str ONTREE,OFFTREE;
                   ! loop thru all of BITS, trying for each bit the decision tree
                   generated by "deciding" on that bit.  The minimum cost tree we
                   can generate has a cost of 
                    |A|+|B|+cost(min tree for A ∩ ON(bit) and B ∩ ON(bit))
                           +cost(min tree for A ∩ OFF(bit) and B ∩ OFF(bit))
                   The |A|+|B| is the number of bits we look at this level and is
                   passed to us as the parameter SIZES;
	COST←COST+SIZES;
	if COST ≥ BEST then return(BEST);
	REST←BITS;		! REST is those bits not yet considered;
	BITNO←0; 		! BITNO will be bit number under consideration;
	while NOTEMPTY(REST) do begin "bitloop"
		BIT←FIRSTBIT(REST);	
		do BITNO←BITNO+1
		 until NOTEMPTY (REST land ONBIT(BITNO));
		REST←RESTBITS(REST);
		NEWB←BITS xor BIT;	
		AON←ONA[BITNO] land A;
		BON←ONB[BITNO] land B;
		AOFF←A xor AON;
		BOFF←B xor BON;
		if (AON or BON) and (AOFF or BOFF)
		  then begin  ! if this bit is either on or off in all of A or B,
				don't bother looking at it;

			ONSIZ←SIZE(AON)+SIZE(BON);
			ADDED←(if AOFF and BOFF then SIZES-ONSIZ else 0);

                 ! if some decision must be made amongst the "off" data
                   items, the cost will be at least |AOFF|+|BOFF|. This is added
                   to the cost of the "on" tree here to improve the pruning of
                   the search tree;

 			C←MINTREE(AON,BON,ONSIZ,NEWB,COST+ADDED,BEST,ONTREE);
			if C < BEST then
			 begin	D←MINTREE(AOFF,BOFF,SIZES-ONSIZ,
					NEWB,C-ADDED,BEST,OFFTREE);
				if D<BEST then
				 begin BEST←D;
				       BESTREE←BITNO & ONTREE & OFFTREE;
				 end;
			 end;
		      end;
	   end "bitloop";
	return (BEST);
	end "recur";
 end "mintree";
str BESTREE; int BESTCOST;

INITIALIZE;

BESTCOST←MINTREE(ALL,ALL,2*NUMBITS,ALL,0,NUMBITS*NUMBITS*2,BESTREE);

say(" #NODES SEARCHED = " & cvs (NODECNT));
say(" DECISION COST = " & cvs(BESTCOST));
OUTREE(0,BESTREE);
say(cvs(runtm('400000,dummy)-STARTIM) & " MS IN EXECUTION");
closf(OUTJFN); closf(INJFN);
end "inner block";
end "pattern finder";




------------------------------------------------------------------------



 #NODES SEARCHED = 93752
 DECISION COST = 130
 3 on  => 19 on  => 13 on  => 20 on  => it's A
                              20 off => it's B
                    13 off => it's A
          19 off => it's B
 3 off => 14 on  =>  6 on  => it's A
                     6 off => 20 on  => it's A
                              20 off => 15 on  => it's A
                                        15 off => it's B
          14 off => 15 on  => 20 on  => it's A
                              20 off => it's B
                    15 off => it's B
55515 MS IN EXECUTION