Re: Help with Regular Expressions
Date: Wed, 24 Dec 2008 08:30:01 -0800 (PST)
Message-ID: <8fcdb77d-fbe0-4290-bda2-ca368e5cb05d@u18g2000pro.googlegroups.com>
On Dec 23, 2:19 pm, Charles Hooper <hooperc2..._at_yahoo.com> wrote:
> On Dec 23, 11:46 am, "Dereck L. Dietz" <diet..._at_ameritech.net> wrote:
>
>
>
>
>
> > Oracle 10.20.03.0, 64 bit
> > Windows 2003 Server
>
> > I'm trying to use regular expressions (REGEXP_REPLACE) to transform rows
> > like the following
> > from rows with multiple values to one value per row. I've used similar
> > syntax to extract
> > first and last names but this is not working at all this time.
>
> > I'm not too experienced with regular expressions so I know it's probably due
> > to my lack of
> > experience and would appreciate any help that can be offered. I thought
> > regular expressions suited what
> > I need to accomplish but if there is an easier/better way ...
>
> > The syntax I've been trying to use (which didn't work) was:
>
> > SELECT REGEXP_REPLACE(product_desc,'(.+),(.+),(.+),(.+),(.+),(.+)','\1')
> > FROM work_table;
>
> > PRODUCT_DESC
> > ----------------------------------------------------------------------
> > XXX10, XX10, XX50, 10XX15, XXX250
> > XXX1, 10XXXX, XX25, XX15, XXXX
> > XXX10, XX20, XX75, XX10
> > XXX5, XXXX10, 500XX, XX25, XX150, XX50, 100XX, 40XX,XXX20
> > XXX10, XX50, XX10, XX35, 30XX
> > XXX5, XXX20, XX5, XX5, XX120
> > XXX10, XX100, XX10, XX35, 1500X, 30%XX, 1500XX
> > XXX10, XX10, XX100, XX50, XX20, 30XX, 500XXX, 10%XX, 1500XX
> > XXX5
> > XXX5, XX120, XXXXXX, XXXX, XX5, XX5, XXX5
> > XXX10, XX50, XX10, XXXXXX
>
> > PRODUCT_DESC
> > ----------------------------------------------------------------------
> > XXX10
> > XX10
> > XX50
> > 10XX15
> > XXX250
> > XXX1
>
> Take a look at the final posting by Maxim Demenko in this thread to
> see if it meets your needs:http://groups.google.com/group/comp.databases.oracle.misc/browse_thre...
>
> Charles Hooper
> IT Manager/Oracle DBA
> K&M Machine-Fabricating, Inc.- Hide quoted text -
>
> - Show quoted text -
Nice suggestion, Charles, as it works quite nicely:
SQL> -- SQL> -- Create the table SQL> -- SQL> create table work_table( 2 product_cat varchar2(10), 3 product_desc varchar2(100)
4 );
Table created.
SQL> SQL> -- SQL> -- Load the sample data SQL> -- SQL> insert all
2 into work_table
3 values('Cat1','XXX10, XX10, XX50, 10XX15, XXX250') 4 into work_table
5 values('Cat2','XXX1, 10XXXX, XX25, XX15, XXXX') 6 into work_table
7 values('Cat3','XXX10, XX20, XX75, XX10') 8 into work_table
9 values('Cat4','XXX5, XXXX10, 500XX, XX25, XX150, XX50, 100XX, 40XX, XXX20')
10 into work_table
11 values('Cat5','XXX10, XX50, XX10, XX35, 30XX') 12 into work_table
13 values('Cat6','XXX5, XXX20, XX5, XX5, XX120') 14 into work_table
15 values('Cat7','XXX10, XX100, XX10, XX35, 1500X, 30%XX, 1500XX') 16 into work_table
17 values('Cat8','XXX10, XX10, XX100, XX50, XX20, 30XX, 500XXX, 10%XX, 1500XX')
18 into work_table
19 values('Cat9','XXX5')
20 into work_table
21 values('Cat10','XXX5, XX120, XXXXXX, XXXX, XX5, XX5, XXX5') 22 into work_table
23 values('Cat11','XXX10, XX50, XX10, XXXXXX') 24 select * From dual;
11 rows created.
SQL> SQL> column prod_desc format a20 SQL> column column_value format a20 SQL> column word format a20 SQL> column product_desc format a65 SQL> SQL> set linesize 132 trimspool on SQL> SQL> -- SQL> -- Verify the data SQL> -- SQL> select *
2 from work_table;
PRODUCT_CA PRODUCT_DESC
Cat1 XXX10, XX10, XX50, 10XX15, XXX250 Cat2 XXX1, 10XXXX, XX25, XX15, XXXX Cat3 XXX10, XX20, XX75, XX10 Cat4 XXX5, XXXX10, 500XX, XX25, XX150, XX50, 100XX, 40XX, XXX20 Cat5 XXX10, XX50, XX10, XX35, 30XX Cat6 XXX5, XXX20, XX5, XX5, XX120 Cat7 XXX10, XX100, XX10, XX35, 1500X, 30%XX, 1500XX Cat8 XXX10, XX10, XX100, XX50, XX20, 30XX, 500XXX, 10%XX, 1500XX Cat9 XXX5 Cat10 XXX5, XX120, XXXXXX, XXXX, XX5, XX5, XXX5 Cat11 XXX10, XX50, XX10, XXXXXX
11 rows selected.
SQL> SQL> -- SQL> -- Attempt using regular expressions SQL> -- to create multiple rows from a single source SQL> -- SQL> -- It doesn't work as it only embeds linefeeds in SQL> -- the existing string SQL> -- SQL> SELECT REGEXP_REPLACE(product_desc,'+, ',chr(10)) prod_desc2 FROM work_table;
PROD_DESC
XXX10
XX10
XX50
10XX15
XXX250 XXX1
10XXXX
XX25
XX15
XXXX PROD_DESC
XXX10
XX20
XX75
XX10
XXX5
XXXX10
500XX
XX25
XX150
PROD_DESC
XX50
100XX
40XX
XXX20 XXX10
XX50
XX10
XX35
30XX
PROD_DESC
XXX5
XXX20
XX5
XX5
XX120
XXX10
XX100
XX10
XX35
1500X
PROD_DESC
30%XX
1500XX
XXX10
XX10
XX100
XX50
XX20
30XX
500XXX
10%XX
PROD_DESC
1500XX
XXX5
XXX5
XX120
XXXXXX
XXXX
XX5
XX5
XXX5
PROD_DESC
XXX10
XX50
XX10
XXXXXX 11 rows selected.
SQL> SQL> -- SQL> -- Let's try a different approach modifying a query SQL> -- provided by Michel Cadot SQL> -- SQL> SQL> with t as ( 2 select product_cat, product_desc,length(product_desc)-length (replace(product_desc,','))+1 3 words 4 from work_table), 5 g as (select rownum rn 6 from dual 7 connect by level<=(select 8 max(length(product_desc)-length(replace(product_desc,',')))+1 from work_table))
9 select regexp_substr(product_desc,'\w+',1,rn) word 10 from t,g
11 where g.rn<=t.words
12 order by t.product_cat,g.rn
13 /
WORD
XXX10
XX10
XX50
10XX15
XXX250
XXX5
XX120
XXXXXX
XXXX
XX5
XX5
WORD
XXX5
XXX10
XX50
XX10
XXXXXX
XXX1
10XXXX
XX25
XX15
XXXX
XXX10 WORD
XX20
XX75
XX10
XXX5
XXXX10
500XX
XX25
XX150
XX50
100XX
40XX
WORD
XXX20
XXX10
XX50
XX10
XX35
30XX
XXX5
XXX20
XX5
XX5
XX120
WORD
XXX10
XX100
XX10
XX35
1500X
30
XX
XXX10
XX10
XX100
XX50
WORD
XX20
30XX
500XXX
10
XX
XXX5 61 rows selected.
SQL> SQL> -- SQL> -- Let's see how this works SQL> -- SQL> SQL> -- SQL> -- Count the number of 'words' in each record SQL> -- SQL> SQL> select product_cat, product_desc,length(product_desc)-length(replace(product_desc,','))+1 words
2 from work_table;
PRODUCT_CA
PRODUCT_DESC
WORDS
Cat1 XXX10, XX10, XX50, 10XX15, XXX250 5 Cat2 XXX1, 10XXXX, XX25, XX15, XXXX 5 Cat3 XXX10, XX20, XX75, XX10 4 Cat4 XXX5, XXXX10, 500XX, XX25, XX150, XX50, 100XX, 40XX, XXX20 9 Cat5 XXX10, XX50, XX10, XX35, 30XX 5 Cat6 XXX5, XXX20, XX5, XX5, XX120 5 Cat7 XXX10, XX100, XX10, XX35, 1500X, 30%XX, 1500XX 7 Cat8 XXX10, XX10, XX100, XX50, XX20, 30XX, 500XXX, 10%XX, 1500XX 9
Cat9
XXX5
1
Cat10 XXX5, XX120, XXXXXX, XXXX, XX5, XX5, XXX5 7 Cat11 XXX10, XX50, XX10, XXXXXX 4
11 rows selected.
SQL> SQL> -- SQL> -- Find the length of the longest line of 'words' SQL> -- SQL> select 2 max(length(product_desc)-length(replace(product_desc,',')))+1 from work_table;
MAX(LENGTH(PRODUCT_DESC)-LENGTH(REPLACE(PRODUCT_DESC,',')))+1
9 SQL>
SQL> --
SQL> -- Generate a master list of word numbers based upon the longest list
SQL> --
SQL> select rownum rn
2 from dual
3 connect by level<=(select
4 max(length(product_desc)-length(replace(product_desc,','))) +1 from work_table);
RN
1 2 3 4 5 6 7 8 9
9 rows selected.
SQL> SQL> -- SQL> -- Put it all together SQL> -- SQL> -- Separate the words in each record based upon the word count SQL> -- using the REGEXP_SUBSTR function SQL> -- SQL> -- Use the generated row number to step through the word lists SQL> -- by ensuring the rn returned is no greater than the max number SQL> -- of words in each list SQL> -- SQL> -- Report the final result SQL> -- SQL> with t as ( 2 select product_cat, product_desc,length(product_desc)-length (replace(product_desc,','))+1 3 words 4 from work_table), 5 g as (select rownum rn 6 from dual 7 connect by level<=(select 8 max(length(product_desc)-length(replace(product_desc,',')))+1 from work_table))
9 select regexp_substr(product_desc,'\w+',1,rn) word 10 from t,g
11 where g.rn<=t.words
12 order by t.product_cat,g.rn
13 /
WORD
XXX10
XX10
XX50
10XX15
XXX250
XXX5
XX120
XXXXXX
XXXX
XX5
XX5
WORD
XXX5
XXX10
XX50
XX10
XXXXXX
XXX1
10XXXX
XX25
XX15
XXXX
XXX10 WORD
XX20
XX75
XX10
XXX5
XXXX10
500XX
XX25
XX150
XX50
100XX
40XX
WORD
XXX20
XXX10
XX50
XX10
XX35
30XX
XXX5
XXX20
XX5
XX5
XX120
WORD
XXX10
XX100
XX10
XX35
1500X
30
XX
XXX10
XX10
XX100
XX50
WORD
XX20
30XX
500XXX
10
XX
XXX5 61 rows selected.
SQL> David Fitzjarrell Received on Wed Dec 24 2008 - 10:30:01 CST