/* * Copyright 2004 Richard A DeVenezia * Generate a library of tables having many interacting constraints * 10 May 2004 * * Table names will be * table1 - table * primary key name of table will be pk__1 * foreign key names of table will be fk__2 to fk__ * * There is no guarantee the schema will *not* contain confounding * constraints that prevent the insertion of a single row */ options ls=128; %macro randomSchema ( numberOfTables = 60 , minSatelliteVars = 5 , maxSatelliteVars = 40 , pForeignKey = .9 /* probability a table has a foreign key */ , maxNumberOfForeignKeySets = 8 , numberOfKeyPatterns = 12 , maxPatternLength = 8 , createPrimaryKey = DuringCreateTable /* AfterCreateTable | DuringCreateTable */ , createForeignKeys = AllAtOnce /* AllAtOnce | OneAtATime */ , deleteOSFilesIfDatasetsKillFails = 1 , seed = 27182818 , path = c:\temp\random\schemas , schema = foo , onlyKill = 0 ); %let createPrimaryKey = %upcase (&createPrimaryKey); %if 0 = %index (|AFTERCREATETABLE|DURINGCREATETABLE|,|&createPrimaryKey.|) %then %do; %put ERROR: argument createPrimaryKey should be AfterCreateTable or DuringCreateTable; %goto EndMacro; %end; %let createForeignKeys = %upcase (&createForeignKeys); %if 0 = %index (|ONEATATIME|ALLATONCE|,|&createForeignKeys.|) %then %do; %put ERROR: argument createForeignKeys should be OneAtATime or AllAtOnce; %goto EndMacro; %end; %if &numberOfTables < 2 %then %do; %put ERROR: numberOfTables must be > 1; %goto EndMacro; %end; %local zlen zfmt; %let zlen = %sysfunc (log10(&numberOfTables)); %let zlen = %sysevalf (&zlen+1,floor); %let zfmt = z&zlen..; %local did rc; filename schema "&path.\&schema."; %let did = %sysfunc (dopen(schema)); %if &did %then %let rc = %sysfunc (dclose(&did)); filename schema; %if &did = 0 %then %do; %put ERROR: Folder &path.\&schema. is not a directory.; %goto EndMacro; %end; libname &schema v8 "&path./&schema"; %*----------------------------------------------------------; %* Before the library can be refilled with a random schema, %* all its members need to be deleted. %* All the members can be deleted only *after* all the %* foreign key constraints have been removed %* from any tables that might previously exist in the %* library; proc delete data=indices; proc datasets nolist ; contents data=&schema.._all_ noprint out2=indices; ; run; quit; * Normally the type of interest would be 'Foreign Key' * However, in 8.2 out2= does not contain columns reporting * the libname and member. Thus, we will use type 'Referential' * and the column Ref as the meta information to be used as the * basis for deleting foreign keys. *; proc sort data=indices (keep=ref name type); by ref name; where type = 'Referential'; run; %local drop_fks nobs ; %let nobs=0; %let ds = %sysfunc (open (indices)); %if &ds %then %let nobs = %sysfunc (attrn(&ds,nobs)); %let ds = %sysfunc (close(&ds)); %if &nobs > 0 %then %do; /* * 8.2. datasets contents out2= table does not have libname and member column! ; proc sql noprint; select 'alter table ' || trim(libname) || '.' || trim(member) || ' drop foreign key ' || trim(name) || ';' into :drop_fks separated by ' ' from indices where type = 'Foreign Key' ; &drop_fks quit; */ %* Construct a series of statements that drop foreign keys %* of a referer table; proc sql noprint; %let nobs = 0; select count(distinct (ref)) into :nobs from indices; %let nobs = &nobs; quit; %if &nobs > 0 %then %do; %do i = 1 %to &nobs; %local drop&i; %end; data _null_; set indices; by ref; length drop $2500; retain drop; if first.ref then drop = 'alter table ' || trim(ref) || ' drop foreign key '; else drop = trim(drop) || ','; drop = trim(drop) || ' ' || name; if last.ref then do; i + 1; call symput ('drop' || put(i,4.-L), trim(drop)); end; run; proc sql; %do i = 1 %to &nobs; &&drop&i; %end; quit; %end; %end; proc datasets nolist lib=&schema kill; quit; %put syserr=&syserr; %if &syserr ne 0 %then %do; %if (not &deleteOSFilesIfDatasetsKillFails) %then %do; %put WARNING: Something did not go well when trying to delete everything in library &schema; %put NOTE: You might try deleteOSFilesIfDatasetsKillFails=1; %goto EndMacro; %end; %put NOTE: An error occurred in Proc DATASETS; %put NOTE: An attempt will be made to delete files directly; data _null_; length diref fileref $8; rc = filename (diref, "&path.\&schema."); if rc = 0; did = dopen (diref); if did then do; do i = 1 to dnum (did); name = dread (did, i); rc = filename (fileref, "&path.\&schema.\" || name); if rc = 0 and fexist (fileref) then do; rc = fdelete (fileref); fullpath = pathname (fileref); if rc = 0 then put "NOTE: Deleted " fullpath; end; rc = fileref (fileref); end; did = close (did); end; rc = filename (diref); run; %end; %if (&onlyKill) %then %goto EndMacro; %*----------------------------------------------------------; %* Generate a list of random column signatures. %* A signature is the pattern of variable types and lengths for a set of columns; data patterns; do n = 1 to &numberOfKeyPatterns; nvars = 1 + int ( &maxPatternLength * ranuni (&seed) ); do varnum = 1 to nvars; index + 1; if ranuni (&seed) < 0.5 then do; vartype = 'char'; varlen = 5 + int (75 * ranuni(&seed)); end; else do; vartype = 'num '; varlen = 8; end; output; end; end; keep n nvars varnum vartype varlen; run; %*----------------------------------------------------------; %* For each table randomly select a primary key signature. %* For each table select a random number of foreign keys; %* For each foreign key randomly select which primary key signature it will refer to. %* Ensure the foreign key signature is the primary key signature of a %* table different than oneself. (If this was not done, it would %* be possible that this code could create a table that %* self references... which is disallowed by SAS) %*; data tables; array pkps [ &numberOfTables ] _temporary_; %* select a primary key signature for each table; do tabnum = 1 to &numberOfTables; pattern_set = 'pk'; pattern = 1 + int(&numberOfKeyPatterns * ranuni(&seed)); set_seq = 1; output; pkps [tabnum] = pattern; end; %* randomly assign foreign keys signatures; do tabnum = 1 to &numberOfTables; if ranuni(&seed) > &pForeignKey then continue; %* select a random number of foreign keys; nfks = int (&maxNumberOfForeignKeySets * ranuni(&seed)); do fk = 0 to nfks; pattern_set = 'fk'; %* SAS does not allow self referential constraints; %* thus random selection must be to a table different than tabnum; do until (index ne tabnum); index = 1 + int(&numberOfTables * ranuni(&seed)); end; pattern = pkps [ index ]; set_seq = 2 + fk; output; end; end; stop; keep tabnum pattern_set pattern set_seq; run; %*----------------------------------------------------------; %* Each tables foreign keys may have more than one %* possible table it may validly reference. %* Amongst all the tables having the same primary key signature %* as a foreign key, randomly select one for referential use; proc sql; create table relations0 as select B.tabnum, B.set_seq, A.tabnum as refersTo , count(*) as nCandidates from tables as A, tables as B where A.pattern_set = 'pk' and B.pattern_set = 'fk' and A.pattern = B.pattern and A.tabnum ne B.tabnum /* dis-allow self-referential tables */ group by B.tabnum, B.set_seq order by B.tabnum, B.set_seq ; quit; data relations; set relations0; by tabnum set_seq; if first.set_seq then do; outputOffset = int(nCandidates * ranuni(&seed)); outputOffset + 1; offset = 1; end; else offset + 1; if offset = outputOffset then output; keep tabnum set_seq refersto; run; %*----------------------------------------------------------; %* Combine the referential mappings with the signatory %* meta data to create data that will be used for code generating; proc sql; create table meta as select J.tabnum, J.set_seq, R.refersTo,pattern_set , varnum, vartype, varlen , pattern from ( select * from tables T , patterns P where P.n = T.pattern ) as J left join relations R on J.tabnum = R.tabnum and J.set_seq = R.set_seq order by tabnum, set_seq, varnum ; quit; %*----------------------------------------------------------; %* Generate the SQL statements that declare each tables %* columns and its primary key; filename create catalog "work.schema.create_&schema..source"; data _null_; set meta; by tabnum set_seq; file create; retain comma prefix ; array k [ &maxPatternLength ] $32 _temporary_; if first.tabnum then do; put / "create table &schema..table" tabnum &zfmt. ; comma = '('; end; if first.set_seq then do; length prefix $10; prefix = pattern_set; if prefix = 'pk' then prefix = 'id'; else prefix = byte(63+set_seq); kindex = 0; end; vname = trim(prefix) || '_' || put(varnum,3.-L); put @3 comma vname @; put @12 vartype @16 '(' varlen 2. ')' @; put ; comma = ','; kindex + 1; k [ kindex ] = vname; if last.set_seq and pattern_set = 'pk' then do; %if (&createPrimaryKey = DURINGCREATETABLE) %then %do; keytype = 'primary'; put ', constraint ' pattern_set +(-1) '_' tabnum +(-1) '_' set_seq keytype 'key'; put @3 @; comma = '('; do i = 1 to kindex; put comma k[i] @; comma = ','; end; put ') ' ; %end; end; if last.tabnum then do; nsat = &minSatelliteVars + int ((&maxSatelliteVars-&minSatelliteVars+1) * ranuni(&seed)); do i = 1 to nsat; vname = 'v_' || put(i,3.-L); if ranuni(&seed) < 0.5 then vartype = 'char'; else vartype = 'num '; if vartype = 'char' then varlen = 5 + int (76 * ranuni(&seed)); else varlen = 8; put @3 comma vname @; put @12 vartype @16 '(' varlen 2. ')' ; end; put +2 ');'; end; run; %*----------------------------------------------------------; %* Generate the SQL statements that define each tables %* primary key constraints; %if (&createPrimaryKey = AFTERCREATETABLE) %then %do; data _null_; set meta; where pattern_set = 'pk'; by tabnum set_seq; file create mod; retain comma clause_comma prefix; retain groupfks 0; if (first.tabnum and groupfks) or (first.set_seq and not groupfks) then do; put "alter table &schema..table" tabnum &zfmt. ; put ' add constraint '; clause_comma = ' '; end; if first.set_seq then do; put clause_comma 'pk_' tabnum +(-1) '_' set_seq 'primary key ' @; clause_comma = ','; prefix = 'id'; comma = '('; end; vname = trim(prefix) || '_' || put(varnum,3.-L); put comma vname +(-1) @; comma = ','; if last.set_seq then do; put ')' ; if not groupfks then put ";"@; put ; end; if last.tabnum and groupfks then do; put ';'; end; run; %end; %*----------------------------------------------------------; %* Generate the SQL statements that define each tables %* foreign key constraints; data _null_; set meta; where pattern_set = 'fk'; by tabnum set_seq; file create mod; retain comma clause_comma prefix; retain groupfks %eval (&createForeignKeys = ALLATONCE); if (first.tabnum and groupfks) or (first.set_seq and not groupfks) then do; put "alter table &schema..table" tabnum &zfmt. ; put ' add constraint '; clause_comma = ' '; end; if first.set_seq then do; put clause_comma 'fk_' tabnum +(-1) '_' set_seq 'foreign key ' @; clause_comma = ','; prefix = byte(63+set_seq); comma = '('; end; vname = trim(prefix) || '_' || put(varnum,3.-L); put comma vname +(-1) @; comma = ','; if last.set_seq then do; put ')' +2 "references &schema..table" refersto &zfmt. @; %if %sysevalf(&sysver < 9.1) %then %do; put " on update restrict on delete restrict " @; %end; if not groupfks then put ";"@; put ; end; if last.tabnum and groupfks then do; put ';'; end; run; %*----------------------------------------------------------; %* Inverse relations for those interested; proc sql; create table referers as select A.tabnum , B.tabnum as referer , B.set_seq as set_seq from ( select distinct tabnum from relations ) as A , relations as B where A.tabnum = B.refersTo order by A.tabnum, B.tabnum, B.set_seq ; quit; %*----------------------------------------------------------; %* Include the code that creates the tables, the primary keys %* and foreign keys; proc sql; %include create; quit; %put syserr = &syserr; %put sqlrc = &sqlrc; * dm "notepad %sysfunc(pathname(create))"; %EndMacro: %mend; /**/ options mprint; %randomSchema (pForeignKey=.15) /**/