-- package to construct and manipulate suffix trees -- construction is by Ukkonen's on-line algorithm -- Authors: Rob Irving, Lorna Love -- Final version: 24 January 2001 with Ada.Text_Io; use Ada.Text_Io; with Unchecked_Deallocation; with Char_Set_Package; use Char_Set_Package; package body Suffix_Tree is pragma Suppress(All_Checks); procedure Give_Back is new Unchecked_Deallocation(Node_Type, Tree_Type); -------------------------------------------------------------------------- function Unused_Char (S : String) return Character is -- returns an ASCII character that does not appear in S, -- giving priority to printable characters; in the case where there -- is no such character, raises the exception No_Dummy_Char_Available Cs : Char_Set_Type; Next : Integer; C : Character; First_Printable : constant := 32; Last_Printable : constant := 127; First_Non_Printable : constant := 0; Last_Non_Printable : constant := 31; begin Cs := Empty_Char_Set; for I in S'range loop Add(S(I), Cs); end loop; Next := First_Printable; while Next <= Last_Printable loop C := Character'Val(Next); if not Is_Member(C, Cs) then return C; else Next := Next + 1; end if; end loop; Next := Last_Non_Printable; while Next >= First_Non_Printable loop C := Character'Val(Next); if not Is_Member(C, Cs) then return C; else Next := Next - 1; end if; end loop; raise No_Dummy_Char_Available; end Unused_Char; -------------------------------------------------------------------------- Infinity : constant := Integer'Last; -- reference type for use during tree construction type Ref_Type is record Pointer : Tree_Type; Left_Label, Right_Label : Natural; end record; -------------------------------------------------------------------------- procedure Build_Suffix_Tree (S : in String; T : out Tree_Type) is Z : String(1..S'Length+1); procedure Search_List (In_Na : in Tree_Type; Ch : in Character; Out_Na : out Tree_Type) is Next : Tree_Type := In_Na; begin Out_Na := null; while Next /= null loop if Z(Next.Left_Label) = Ch then Out_Na := Next; Next := null; else Next := Next.Sibling; end if; end loop; end Search_List; function Is_End_Point (R : Ref_Type; K : Natural) return Boolean is Ch : Character; Na : Tree_Type; begin if R.Pointer = null then return True; elsif R.Left_Label <= R.Right_Label then Ch := Z(R.Left_Label); else Ch := Z(K); end if; Search_List(R.Pointer.Child, Ch, Na); if Na = null then return False; elsif Z(Na.Left_Label + R.Right_Label - R.Left_Label + 1) = Z(K) then return True; end if; return False; end Is_End_Point; procedure Make_Node_Explicit (R : in out Ref_Type; Na : out Tree_Type) is X : Positive; Pred_Tna, Tna : Tree_Type; begin if R.Left_Label > R.Right_Label then Na := R.Pointer; else Tna := R.Pointer.Child; Pred_Tna := null; while Z(Tna.Left_Label) /= Z(R.Left_Label) loop Pred_Tna := Tna; Tna := Tna.Sibling; end loop; X := Tna.Left_Label; Na := new Node_Type'(X, X + R.Right_Label - R.Left_Label, Tna, Tna.Sibling, (Branch, null)); Tna.Left_Label := X + R.Right_Label - R.Left_Label + 1; Tna.Sibling := null; if Pred_Tna = null then R.Pointer.Child := Na; else Pred_Tna.Sibling := Na; end if; end if; end Make_Node_Explicit; procedure Make_Canonical (R : in out Ref_Type) is Tna : Tree_Type; begin if R.Left_Label <= R.Right_Label then Search_List(R.Pointer.Child, Z(R.Left_Label), Tna); while Tna.Right_Label - Tna.Left_Label <= R.Right_Label - R.Left_Label loop R.Left_Label := R.Left_Label + Tna.Right_Label - Tna.Left_Label + 1; R.Pointer := Tna; if R.Left_Label <= R.Right_Label then Search_List(R.Pointer.Child, Z(R.Left_Label), Tna); end if; end loop; end if; end Make_Canonical; procedure Update (R : in out Ref_Type; K : in Natural) is Tna, Old_Tna, New_Tna, Pred_Tna, Curr_Tna : Tree_Type; begin Old_Tna := null; while not Is_End_Point(R, K) loop Make_Node_Explicit(R, Tna); New_Tna := new Node_Type'(K, Infinity, null, null, (Branch, null)); Pred_Tna := null; Curr_Tna := Tna.Child; while Curr_Tna /= null loop Pred_Tna := Curr_Tna; Curr_Tna := Curr_Tna.Sibling; end loop; if Pred_Tna = null then Tna.Child := New_Tna; else Pred_Tna.Sibling := New_Tna; end if; if Old_Tna /= null then Old_Tna.Var.Suffix_Link := Tna; end if; Old_Tna := Tna; R.Pointer := R.Pointer.Var.Suffix_Link; if R.Pointer = null and R.Left_Label <= R.Right_Label then R.Pointer := T; R.Left_Label := R.Left_Label + 1; end if; Make_Canonical(R); end loop; if Old_Tna /= null then Old_Tna.Var.Suffix_Link := R.Pointer; end if; end Update; procedure Traverse_Suffix_Tree (T : in out Tree_Type; B, D : in Integer) is Tna : Tree_Type; begin Tna := T.Child; if Tna = null then -- T.all := (Leaf, T.Left_Label, T.Right_Label, T.Child, T.Sibling, B); T.Var := (Leaf, B); else while Tna /= null loop if Tna.Right_Label = Infinity then Tna.Right_Label := S'Length; end if; Traverse_Suffix_Tree(Tna, Tna.Left_Label - D, D + Tna.Right_Label - Tna.Left_Label + 1); Tna := Tna.Sibling; end loop; end if; end Traverse_Suffix_Tree; I : Natural := 0; R : Ref_Type; begin Z := S & Unused_Char(S); T := new Node_Type'(1, 1, null, null, (Branch, null)); R := (T, 1, 0); while I < Z'Length loop if R.Pointer = null then R.Pointer := T; R.Left_Label := R.Left_Label + 1; end if; R.Right_Label := I; I := I + 1; Make_Canonical(R); Update(R, I); end loop; Traverse_Suffix_Tree(T, 0, 0); exception when No_Dummy_Char_Available => Put_Line("Cannot build suffix tree."); Put_Line("Every ASCII character appears in the string, so " & "no unique terminating character is available"); raise; end Build_Suffix_Tree; ----------------------------------------------------------------------------- procedure Build_Gen_Suffix_Tree ( S1, S2 : in String; T : out Tree_Type) is begin Build_Suffix_Tree(S1 & Unused_Char(S1 & S2) & S2, T); end Build_Gen_Suffix_Tree; ----------------------------------------------------------------------------- procedure Search_Suffix_Tree( T : in Tree_Type; S : in String; X : in String; Found : out Boolean; Pos : out Natural) is Tna : Tree_Type := T; I : Natural := X'First; J, Y : Natural; begin while I <= X'Last loop Tna := Tna.Child; loop if Tna = null then exit; elsif Tna.Left_Label > S'Length or else S(Tna.Left_label) /= X(I) then Tna := Tna.Sibling; else exit; end if; end loop; if Tna = null then Found := False; return; else Y := Tna.Left_Label + 1; J := Tna.Right_Label; I := I+1; while I <= X'Last and Y <= J loop if X(I) /= S(Y) then Found := False; return; else I := I+1; Y := Y+1; end if; end loop; if I > X'Last then while Tna.Child /= null loop Tna := Tna.Child; end loop; Pos := Tna.Var.Suffix; Found := True; return; end if; end if; end loop; end Search_Suffix_Tree; ----------------------------------------------------------------------------- procedure Traverse_Leaves(Tna : in Tree_Type ; l : in out Integer_List_Type ) is begin if Tna /= null then if Tna.Var.Status = Leaf then Insert(TNA.Var.Suffix, L); Traverse_Leaves(Tna.Sibling, L); else Traverse_Leaves(Tna.Child, L); Traverse_Leaves(Tna.Sibling, L); end if; end if; end Traverse_Leaves; ----------------------------------------------------------------------------- procedure Fully_Search_Suffix_Tree( T : in Tree_Type; S : in String; X : in String ; L : out Integer_List_Type ) is Tna : Tree_Type := T; I : Natural := X'First; J, Y : Natural; begin Create_Empty_List(L); while I <= X'Last loop Tna := Tna.Child; loop if Tna = null then exit; elsif Tna.Left_Label > S'Length or else S(Tna.Left_label) /= X(I) then Tna := Tna.Sibling; else exit; end if; end loop; if Tna = null then return; else Y := Tna.Left_Label + 1; J := Tna.Right_Label; I := I+1; while I <= X'Last and Y <= J loop if X(I) /= S(Y) then return; else I := I+1; Y := Y+1; end if; end loop; end if; end loop; -- Visit all leaf descendants of TNA to collect all occurrences if Tna.Var.Status = Leaf then Insert(TNA.Var.Suffix, L); else Traverse_Leaves(Tna.Child, L); end if; end Fully_Search_Suffix_Tree; ----------------------------------------------------------------------------- procedure Count_Leaves(Tna : in Tree_Type ; Count : in out Integer) is begin if Tna /= null then if Tna.Var.Status = Leaf then Count := Count + 1; Count_Leaves(Tna.Sibling, Count); else Count_Leaves(Tna.Child, Count); Count_Leaves(Tna.Sibling, Count); end if; end if; end Count_Leaves; ----------------------------------------------------------------------------- function Num_Occurrences(T : Tree_Type; S, X : String) return Natural is Tna : Tree_Type := T; I : Natural := X'First; J, Y : Natural; Count : Natural := 0; begin while I <= X'Last loop Tna := Tna.Child; loop if Tna = null then exit; elsif Tna.Left_Label > S'Length or else S(Tna.Left_label) /= X(I) then Tna := Tna.Sibling; else exit; end if; end loop; if Tna = null then return 0; else Y := Tna.Left_Label + 1; J := Tna.Right_Label; I := I+1; while I <= X'Last and Y <= J loop if X(I) /= S(Y) then return 0; else I := I+1; Y := Y+1; end if; end loop; end if; end loop; -- Visit all leaf descendants of TNA to count all occurrences if Tna.Var.Status = Leaf then return 1; else Count_Leaves(Tna.Child, Count); return Count; end if; end Num_Occurrences; ----------------------------------------------------------------------------- procedure Traverse_For_Lrs(T : in Tree_Type; Pos1, Pos2, Len : out Natural) is Last_Suffix, Penult_Suffix : Natural; -- suffix numbers of the last -- two leaf nodes visited procedure Traverse(T : in Tree_Type; Sd : in Natural) is -- traverses the subtree rooted at T; SD is the string depth -- of the root of this subtree; updates Len, Pos1 and Pos2 -- if this is a branch node with highest string depth seen Tna : Tree_Type; begin Tna := T.Child; while Tna /= null loop Traverse(Tna, Sd + Tna.Right_Label - Tna.Left_Label + 1); if Tna.Var.Status = Leaf then Penult_Suffix := Last_Suffix; Last_Suffix := Tna.Var.Suffix; end if; Tna := Tna.Sibling; end loop; if T.Var.Status = Branch and Sd > Len then Len := Sd; Pos1 := Penult_Suffix; Pos2 := Last_Suffix; end if; end Traverse; begin Len := 0; Traverse(T, 0); end Traverse_For_Lrs; ----------------------------------------------------------------------------- procedure Traverse_For_Lcs(T : in Tree_Type; S1_Len : in Integer; Pos1, Pos2, Len : out Natural) is Last_Suffix1, Last_Suffix2 : Natural; -- suffix numbers of the last leaf -- nodes visited representing suffixes -- in strings S1 and S2 respectively Dummy1, Dummy2 : Boolean; procedure Traverse(T : in Tree_Type; Sd : in Natural; First, Second : out Boolean) is -- traverses the subtree rooted at T; SD is the string depth of the -- root of this subtree; returns Boolean values in First and Second -- to indicate whether that subtree contains suffixes from the first -- string and from the second string respectively Tna : Tree_Type; S1_Suffix, S2_Suffix : Boolean; New_Sd : Natural; begin First := False; Second := False; Tna := T.Child; while Tna /= null loop New_Sd := Sd + Tna.Right_Label - Tna.Left_Label + 1; Traverse(Tna, New_Sd, S1_Suffix, S2_Suffix); if New_Sd > Len and S1_Suffix and S2_Suffix then Len := New_Sd; Pos1 := Last_Suffix1; Pos2 := Last_Suffix2 - S1_Len - 1; end if; if Tna.Var.Status = Leaf then if Tna.Var.Suffix <= S1_Len then Last_Suffix1 := Tna.Var.Suffix; First := True; else Last_Suffix2 := Tna.Var.Suffix; Second := True; end if; else First := First or S1_Suffix; Second := Second or S2_Suffix; end if; Tna := Tna.Sibling; end loop; end Traverse; begin Len := 0; Traverse(T, 0, Dummy1, Dummy2); end Traverse_For_Lcs; -------------------------------------------------------------------------- procedure Destroy_Suffix_Tree(T : in out Tree_Type) is begin if T /= null then if T.Child /= null then Destroy_Suffix_Tree(T.Child); end if; if T.Sibling /= null then Destroy_Suffix_Tree(T.Sibling); end if; Give_Back(T); end if; end Destroy_Suffix_Tree; -------------------------------------------------------------------------- procedure Traverse_For_Stats(T : in Tree_Type; Num_Nodes : out Natural; Path_Length_1, Path_Length_2 : out Natural) is procedure Rec_Traverse(Na : in Tree_Type; Depth, Dist : in Natural) is begin if Na /= null then Num_Nodes := Num_Nodes + 1; Path_Length_1 := Path_Length_1 + Depth; Path_Length_2 := Path_Length_2 + Dist; Rec_Traverse(Na.Child, Depth + 1, Dist + 1); Rec_Traverse(Na.Sibling, Depth, Dist + 1); end if; end Rec_Traverse; begin Num_Nodes := 0; Path_Length_1 := 0; Path_Length_2 := 0; Rec_Traverse(T, 0, 0); end Traverse_For_Stats; end Suffix_Tree;