Download split.sas split.sasSubmit a comment

%macro split (

    array
  , items
  , bounding = Q
  , delim = %str( )
  , scope = RESOLVE
  , lets =
  , locals =
  );

  %* Richard A. DeVenezia
  %* 20041230 - Revised as [split], add bounding
  %* 20031119 - Revised as [array], add scope, lets and locals
  %* 19931109 - Initial coding as [makelist]
  %*
  %* Create a macro array from a delimited list of items
  %*
  %* This macro relies on SAS rx* functions to handle quoted items.
  %* However, there are some problems with how the rx* system handles macro quoting tokens.  Thus not ready for primetime.
  %*
  %*            value is
  %*            ----------------------------------------------------------------
  %* array    - macro array name, prefix of numbered macro variables that
  %*            will contain the item values
  %* items    - original list of items, separated by sep
  %* bounding - characters an item can be bounded by for the cases when an item contains
  %*            the delimiter,
  %*            Q: single or double quote
  %*            P: parenthesis ()
  %*            C: curly braces {}
  %*            B: brackets []
  %*            A: angle brackets <>
  %* delim    - character delimiting each item in items
  %* scope    - scope of macro array variables.
  %*            GLOBAL, INHERIT, RESOLVE
  %*            GLOBAL - avoid if possible
  %*            INHERIT - the invoker _must_ ensure the variables
  %*              <&array>_size <&array>_1 ... <&array>_n exist
  %*              prior to invoking %array.
  %*              Why? because a macro is not allowed to create a macro variable
  %*              in a local scope above its own.
  %*              (It may however, access any macro variables in scope above itself)
  %*              If this macro implicitly 'creates' a macro variable, it will
  %*              be destroyed when the macro ends, and thus will not be available
  %*              to invoker.
  %*            RESOLVE - macro var named in lets will receive a quoted macro
  %*              statement which is a series of %lets.  The invoker is responsible
  %*              for unquoting the statement to get macro vars in its scope.
  %* lets     - name of macro var existing in invokers scope.
  %*            upon return, the invoker should unquote the value
  %*            to cause the macro array variables to be assigned.
  %* locals   - name of macro var existing in invokers scope.
  %*            upon return, the invoker should resolve this variable in a
  %*            %local statement to ensure the variables in the lets variable
  %*            will not accidently overwrite an existing macro variable in scope
  %*            higher than invoker. (See examples at bottom)
  %*;

  %if (&array. =) %then %do;
    %put ERROR: array name is missing;
    %goto EndMacro;
  %end;

  %let bounding = %upcase(&bounding);
  %if 0 = %index (|Q|P|C|B|A| ||, |&bounding.|) %then %do;
    %put ERROR: bounding = &bounding is unknown, use Q, P, C, B, A or blank;
    %goto EndMacro;
  %end;

  %let scope = %upcase(&scope);

  %if 0 = %index (|GLOBAL|INHERIT|RESOLVE|, |&scope.|) %then %do;
    %put ERROR: scope = &scope is unknown, use GLOBAL, INHERIT or RESOLVE;
    %goto EndMacro;
  %end;

  %if (&scope = RESOLVE) and (&lets = ) %then %do;
    %put ERROR: scope=&scope requires an lets=<macro-var>;
    %goto EndMacro;
  %end;

  %let lets = %upcase(&lets);

  %if (&scope = RESOLVE) and (&lets = LETS) %then %do;
    %put ERROR: lets= can not be LETS, try lets=_let;
    %goto EndMacro;
  %end;

  %let locals = %upcase(&locals);
  %if (&locals=LOCALS) %then %do;
    %put ERROR: locals= can not be LOCALS, try locals=_local;
    %goto EndMacro;
  %end;

  %local pQ pP pC pB pA;

  %let pQ = $Q;
  %let pP = $(1);
  %let pC = ${1};
  %let pB = $[1];
  %let pA = $<1>;

  %local rxB1 rxB2 rxD;

  %* parse patterns that will locate bounded items;
  %if (&bounding eq ) %then %do;
    %let rxB1 = 0;
    %let rxB2 = 0;
  %end;
  %else %do;
    %let rxB1 = %sysfunc ( rxParse ( &&p&bounding "&delim." ) ) ;
    %let rxB2 = %sysfunc ( rxParse ( &&p&bounding @0 ) ) ;
  %end;

  %* parse pattern that will locate unbounded items;
  %let rxD = %sysfunc ( rxParse ( ^"&delim."+ "&delim." ) );

  %local items_length item_position item_length item item_count new_start;

  %let item_count = 0;

  %if  (&scope = GLOBAL) %then
    %global &array._size;

  %if (&scope = RESOLVE) %then
    %let &lets = ;

  %if (&locals ^= ) %then
    %let &locals = &array._size;

  %let items = %superq (items);

  %do %while (1);

%put |&items.|;

    %let item_position = 0;
    %let item_length = 0;
    %let items_length = %length (&items);

    %* check if next item is bounded ;
    %if &rxB1 %then %do;

      %syscall rxSubstr (rxB1, items, item_position, item_length);

      %if &item_position = 1 %then %do;
        %* next item is bounded and followed by the delimiter;
        %let item = %qsubstr (&items, &item_position, &item_length-1);
        %if &items_length = &item_length %then
          %let items = ;
        %else %do;
          %let new_start = &item_position + &item_length;
%put new_start=&new_start;
          %let items = %qsubstr (&items, &new_start);
        %end;

        %goto assign;
      %end;

      %syscall rxSubstr (rxB2, items, item_position, item_length);

      %if &item_position = 1 %then %do;
        %* final item is bounded and is items;
        %let item  = &items;
        %let items = ;

        %goto assign;
      %end;
    %end;

    %* locate next item;
    %syscall rxSubstr (rxD, items, item_position, item_length);

    %if &item_position = 0 %then %do;
      %* delimiter not found;
      %let item  = &items;
      %let items = ;
    %end;
    %else
    %if &item_length = 1 %then %do;
      %* delimiter found, but no item prior to it;
      %let item = ;
      %if &items_length = &item_length %then
        %let items = ;
      %else %do;
        %let new_start = &item_position + &item_length;
%put new_start=&new_start;
        %let items = %qsubstr (&items, &item_position+&item_length);
      %end;
    %end;
    %else %do;
      %* delimiter found, and an item is prior to it;
      %let item = %qsubstr (&items, &item_position, &item_length-1);

      %if &items_length = &item_length %then
        %let items = ;
      %else %do;
        %let new_start = &item_position + &item_length;
%put new_start=&new_start;
        %let items = %qsubstr (&items, &item_position+&item_length);
      %end;
    %end;

%assign:

    %let item_count = %eval(&item_count + 1);

%put &item_count.: &item_position &item_length |&item.|;

    %if &scope = GLOBAL %then
      %global &array.&item_count;

    %if (&scope = GLOBAL) or (&scope = INHERIT) %then
      %let &array.&item_count = &item;
    %else
      %let &lets = %nrquote(&&&lets)%nrstr(%let )&array.&item_count=&item%str(;);

    %if (&locals ^= ) %then
      %let &locals = &&&locals &array&item_count;

    %if %length (&items) = 0 %then %goto EndScan;

  %end;
  %EndScan:

  %if (&scope = GLOBAL) or (&scope = INHERIT) %then
    %let &array._size = &item_count;
  %else
    %let &lets = %nrquote(&&&lets)%nrstr(%let )&array._size=&item_count%str(;);

  %syscall rxfree(rxB1);
  %syscall rxfree(rxB2);
  %syscall rxfree(rxD);

%EndMacro:

%mend;

Sample code

/**/
%split (bob, a b c d e, scope=GLOBAL)
data _null_;
  do i = 1 to &bob_size;
    name = "BOB"||put(i,3.-L);
    value = symget (name);
    put i= name= value=;
  end;
run;

%* mis-application of INHERIT
%* macro vars that will be the macro array should exist in invokers scope
%* prior to using %split;
%* In a fresh session, x1 to x5 and x_size do not exist in
%* invoking scope (open code -aka- global) prior to invocation;
%split (x, a b c d e, scope=INHERIT)
%put &x_size;


%* mis-application of INHERIT;
%* dangerous because only y1, y3 and y_size are available as GLOBAL,
%* and y2, y4 and y5 are not;
%let y_size=;
%let y1=;
%let y3=;
%split (y, a b c d e, scope=INHERIT)
%put &y_size;
%put &y1;
%put &y2;
%put &y3;
%put &y4;
%put &y5;


%* mis-application of RESOLVE, init should exist prior to invocation;
%split (z, a b c d e, scope=RESOLVE, lets=init);
%put &init;


%* proper application of RESOLVE, init exists prior to invocation;
%let init=;
%split (z, a b c d e, scope=RESOLVE, lets=init);
%put &init;
%unquote(&init)
%put &z_size;
%put &z1;
%put &z2;
%put &z3;
%put &z4;
%put &z5;


%* proper application of RESOLVE, _let and _local exist prior to invocation;
%macro foo;
  %local _let _local array;

  %let array=xyz;

  %split (&array, a b c d e, scope=RESOLVE, lets=_let, locals=_local);

  options symbolgen;

  %local &_local;

  %unquote (&_let)

  options nosymbolgen;

  %do i = 1 %to &&&array._size;
    %put &array&i=&&&array.&i;
  %end;
%mend;
%foo

options nosymbolgen;

/**/


/*
%split (path, "A|B"|A|B|C|D|E, scope=GLOBAL, delim=|)
*/
%split (path, "C:\Program Files" "C:\Foo Bar" foo bar blah 'C:\Bar Foo', scope=GLOBAL)
data _null_;
  do i = 1 to &path_size;
    name = "PATH"||put(i,3.-L);
    value = symget (name);
    put name= value=;
  end;
run;

%split (path, "C:\Program Files" "C:\Foo Bar" foo bar blah 'C:\Bar Foo', scope=GLOBAL, bounding=)


options nosymbolgen;

/**/