/* Richard A. DeVenezia * www.devenezia.com * * A collection of like clauses likely overlaps. What * are the fewest like clauses that would match the * same things as all the like clauses. * * Original content posted to SAS-L on June 23, 2003 */ data foo; id + 1; length like $20 ; input like ; cards; jabba jabba% jabba%the%hut xyz%abc xyz%abc%def 132%xyz 132%xyz%npq 132%dbc%asd%ijk 132 123%abc 12%3% 12345 12 234 23456 ; run; proc sql ; * likematrix is not used, but it demonstrates what the outer join looks * like as well as the 'like-ness' measures; create table likematrix as select x.like as x , y.like as y , trim(x.like) like trim(y.like) as x_like_y , x.like eq y.like as x_equal_y from foo as x , foo as y order by x, y ; reset undo_policy=none; * delete all likes that are explicitly like a different like; create table bar as select * from foo where NOT ( like in (select x.like from foo as x , foo as y where trim(x.like) like trim(y.like) and (x.like ne y.like) ) ) ; quit; options nonumber nodate nocenter; options formdlim = ' ' pagesize=300; dm 'clear output' output; title; footnote; title 'x things like y things can be ignored'; proc print data=likematrix; by y; id y; where trim(x) like trim(y) and (x ne y); run; title 'These things are all the likes'; proc print data=foo noobs; run; title 'These things minimally span the things I need to like'; proc print data=bar noobs; run; /* x things like y things can be ignored x_equal_ y x x_like_y y 12%3% 123%abc 1 0 12345 1 0 jabba% jabba 1 0 jabba%the%hut 1 0 These are all the things I need to like id like 1 jabba 2 jabba% 3 jabba%the%hut 4 xyz%abc 5 xyz%abc%def 6 132%xyz 7 132%xyz%npq 8 132%dbc%asd%ijk 9 132 10 123%abc 11 12%3% 12 12345 13 12 14 234 15 23456 The minimal span of things I need to like id like 2 jabba% 4 xyz%abc 5 xyz%abc%def 6 132%xyz 7 132%xyz%npq 8 132%dbc%asd%ijk 9 132 11 12%3% 13 12 14 234 15 23456 */