-- package to construct and manipulate suffix arrays -- Authors: Rob Irving, Lorna Love -- Final version: 24 January 2001 with Char_Set_Package; use Char_Set_Package; with Integer_List_Package; use Integer_List_Package; package Sa_Standard is type Sa_Element is private; type Sa_Type is array(Integer range <>) of Sa_Element; -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Output_Sa(Sa : in Sa_Type; F_Name : in String); -- Outputs the suffix array Sa (of string S) to the file F_Name -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Build_Sa (S : in String; A : out Sa_Type); -- Builds the suffix array A for the string S. -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Build_Gen_Sa (S1, S2 : in String; A : out Sa_Type); -- Builds a 'generalised' suffix array, A, for strings S1 and S2. -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Build_Partial_Sa (S : in String; A : out Sa_Type; C : in Char_Set_Type; N : out Natural); -- Builds a partial suffix array A holding all -- suffixes of S that begin with a character of C and -- succeed a suffix not beginning with a character of C. -- N is the number of such suffixes, so the suffix array -- is passed out in A(1..N) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Build_Gen_Partial_Sa (S1, S2 : in String; A : out Sa_Type; C : in Char_Set_Type; N : out Natural); -- Builds a generalised partial suffix array A holding -- all suffixes of S1 & S2 that begin with a character of -- C and succeed a suffix not beginning with a character of C. -- N is the number of such suffixes, so the suffix array -- is passed out in A(1..N) -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Search_Sa(A : in Sa_Type; S, X : in String; Found : out Boolean; Pos : out Natural); -- Searches the suffix array A, of string S, for a target string X; -- Returns Found = False if X is not a substring of S, otherwise -- returns Found = True, and Pos such that X occurs at S(Pos). -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Fully_Search_Sa(A : in Sa_Type; S, X : in String; Pos_List : out Integer_List_Type ); -- Searches the suffix array A, of string S, for a target string X; returns -- in Pos_List a list of all the positions of S where X occurs as a substring. -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - function Num_Occurrences(A : Sa_Type; S, X : String) return Natural; -- Searches the suffix array A, of string S, for a target string X; -- returns the number of occurrences of X in S -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Traverse_For_Lrs(A : in Sa_Type; Pos1, Pos2, Len : out Natural); -- Traverses the suffix array A (of a string S), and returns Pos1, Pos2 and -- Len, such that S(Pos1..Pos1+Len-1) = S(Pos2..Pos2+Len-1)is an occurrence -- of a longest repeated substring of S. -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - procedure Traverse_For_Lcs(A : in Sa_Type; Len_S1 : in Natural; Pos1, Pos2, Len : out Natural); -- Traverses the generalised suffix array A (of strings S1 and S2), and returns -- Pos1, Pos2 and Len such that S1(Pos1..Pos1+Len-1) =S2(Pos2..Pos2+Len-1) -- is an occurrence of a longest common substring of S1 and S2. -- Len_S1 is the length of string S1 (eesential information for the algorithm) No_Dummy_Char_Available : exception; -- dummy (unused) character necessary for construction of generalised suffix array -- - - - - - - - - - - - - - - - - - - - - - - - - - - - - private -- this is the version of a suffix array in which each array -- position has a single associated lcp and direction value type Dirn_Type is (Left, Right); type Sa_Element is record Suffix : Natural; Lcp : Natural; Dir : Dirn_Type; end record; end Sa_Standard;