Re: Help with Regular Expressions

From: ddf <oratune_at_msn.com>
Date: Wed, 24 Dec 2008 08:30:01 -0800 (PST)
Message-ID: <8fcdb77d-fbe0-4290-bda2-ca368e5cb05d@u18g2000pro.googlegroups.com>


On Dec 23, 2:19 pm, Charles Hooper <hooperc2..._at_yahoo.com> wrote:
> On Dec 23, 11:46 am, "Dereck L. Dietz" <diet..._at_ameritech.net> wrote:
>
>
>
>
>
> > Oracle 10.20.03.0, 64 bit
> > Windows 2003 Server
>
> > I'm trying to use regular expressions (REGEXP_REPLACE) to transform rows
> > like the following
> > from rows with multiple values to one value per row.  I've used similar
> > syntax to extract
> > first and last names but this is not working at all this time.
>
> > I'm not too experienced with regular expressions so I know it's probably due
> > to my lack of
> > experience and would appreciate any help that can be offered.  I thought
> > regular expressions suited what
> > I need to accomplish but if there is an easier/better way ...
>
> > The syntax I've been trying to use (which didn't work) was:
>
> > SELECT REGEXP_REPLACE(product_desc,'(.+),(.+),(.+),(.+),(.+),(.+)','\1')
> > FROM work_table;
>
> > PRODUCT_DESC
> > ----------------------------------------------------------------------
> > XXX10, XX10, XX50, 10XX15, XXX250
> > XXX1, 10XXXX, XX25, XX15, XXXX
> > XXX10, XX20, XX75, XX10
> > XXX5, XXXX10, 500XX, XX25, XX150, XX50, 100XX, 40XX,XXX20
> > XXX10, XX50, XX10, XX35, 30XX
> > XXX5, XXX20, XX5, XX5, XX120
> > XXX10, XX100, XX10, XX35, 1500X, 30%XX, 1500XX
> > XXX10,  XX10, XX100, XX50, XX20, 30XX, 500XXX, 10%XX, 1500XX
> > XXX5
> > XXX5, XX120, XXXXXX, XXXX, XX5, XX5, XXX5
> > XXX10, XX50, XX10, XXXXXX
>
> > PRODUCT_DESC
> > ----------------------------------------------------------------------
> > XXX10
> > XX10
> > XX50
> > 10XX15
> > XXX250
> > XXX1
>
> Take a look at the final posting by Maxim Demenko in this thread to
> see if it meets your needs:http://groups.google.com/group/comp.databases.oracle.misc/browse_thre...
>
> Charles Hooper
> IT Manager/Oracle DBA
> K&M Machine-Fabricating, Inc.- Hide quoted text -
>
> - Show quoted text -

Nice suggestion, Charles, as it works quite nicely:

SQL> --
SQL> -- Create the table
SQL> --
SQL> create table work_table(
  2  	     product_cat varchar2(10),
  3  	     product_desc varchar2(100)

  4 );

Table created.

SQL>
SQL> --
SQL> -- Load the sample data
SQL> --
SQL> insert all

  2 into work_table
  3 values('Cat1','XXX10, XX10, XX50, 10XX15, XXX250')   4 into work_table
  5 values('Cat2','XXX1, 10XXXX, XX25, XX15, XXXX')   6 into work_table
  7 values('Cat3','XXX10, XX20, XX75, XX10')   8 into work_table
  9 values('Cat4','XXX5, XXXX10, 500XX, XX25, XX150, XX50, 100XX, 40XX, XXX20')
 10 into work_table
 11 values('Cat5','XXX10, XX50, XX10, XX35, 30XX')  12 into work_table
 13 values('Cat6','XXX5, XXX20, XX5, XX5, XX120')  14 into work_table
 15 values('Cat7','XXX10, XX100, XX10, XX35, 1500X, 30%XX, 1500XX')  16 into work_table
 17 values('Cat8','XXX10, XX10, XX100, XX50, XX20, 30XX, 500XXX, 10%XX, 1500XX')
 18 into work_table
 19 values('Cat9','XXX5')
 20 into work_table
 21 values('Cat10','XXX5, XX120, XXXXXX, XXXX, XX5, XX5, XXX5')  22 into work_table
 23 values('Cat11','XXX10, XX50, XX10, XXXXXX')  24 select * From dual;

11 rows created.

SQL>
SQL> column prod_desc format a20
SQL> column column_value format a20
SQL> column word format a20
SQL> column product_desc format a65
SQL>
SQL> set linesize 132 trimspool on
SQL>
SQL> --
SQL> -- Verify the data
SQL> --
SQL> select *

  2 from work_table;

PRODUCT_CA PRODUCT_DESC



Cat1       XXX10, XX10, XX50, 10XX15, XXX250
Cat2       XXX1, 10XXXX, XX25, XX15, XXXX
Cat3       XXX10, XX20, XX75, XX10
Cat4       XXX5, XXXX10, 500XX, XX25, XX150, XX50, 100XX, 40XX, XXX20
Cat5       XXX10, XX50, XX10, XX35, 30XX
Cat6       XXX5, XXX20, XX5, XX5, XX120
Cat7       XXX10, XX100, XX10, XX35, 1500X, 30%XX, 1500XX
Cat8       XXX10, XX10, XX100, XX50, XX20, 30XX, 500XXX, 10%XX, 1500XX
Cat9       XXX5
Cat10      XXX5, XX120, XXXXXX, XXXX, XX5, XX5, XXX5
Cat11      XXX10, XX50, XX10, XXXXXX

11 rows selected.

SQL>
SQL> --
SQL> -- Attempt using regular expressions
SQL> -- to create multiple rows from a single source
SQL> --
SQL> -- It doesn't work as it only embeds linefeeds in
SQL> -- the existing string
SQL> --
SQL> SELECT REGEXP_REPLACE(product_desc,'+, ',chr(10)) prod_desc
  2 FROM work_table;

PROD_DESC



XXX10
XX10
XX50
10XX15
XXX250 XXX1
10XXXX
XX25
XX15
XXXX PROD_DESC

XXX10
XX20
XX75
XX10

XXX5
XXXX10
500XX
XX25
XX150

PROD_DESC



XX50
100XX
40XX
XXX20 XXX10
XX50
XX10
XX35
30XX

PROD_DESC



XXX5
XXX20
XX5
XX5
XX120

XXX10
XX100
XX10
XX35
1500X

PROD_DESC



30%XX
1500XX

XXX10
XX10
XX100
XX50
XX20
30XX
500XXX
10%XX

PROD_DESC



1500XX

XXX5
XXX5
XX120
XXXXXX
XXXX
XX5
XX5
XXX5 PROD_DESC



XXX10
XX50
XX10
XXXXXX 11 rows selected.
SQL>
SQL> --
SQL> -- Let's try a different approach modifying a query
SQL> -- provided by Michel Cadot
SQL> --
SQL>
SQL> with t as (
  2  	 select product_cat, product_desc,length(product_desc)-length
(replace(product_desc,','))+1
  3  	     words
  4  	 from work_table),
  5  	 g as (select rownum rn
  6  	       from dual
  7  	       connect by level<=(select
  8  		     max(length(product_desc)-length(replace(product_desc,',')))
+1 from work_table))
  9 select regexp_substr(product_desc,'\w+',1,rn) word  10 from t,g
 11 where g.rn<=t.words
 12 order by t.product_cat,g.rn
 13 /

WORD



XXX10
XX10
XX50
10XX15
XXX250
XXX5
XX120
XXXXXX
XXXX
XX5
XX5

WORD



XXX5
XXX10
XX50
XX10
XXXXXX
XXX1
10XXXX
XX25
XX15
XXXX
XXX10 WORD

XX20
XX75
XX10
XXX5
XXXX10
500XX
XX25
XX150
XX50
100XX
40XX

WORD



XXX20
XXX10
XX50
XX10
XX35
30XX
XXX5
XXX20
XX5
XX5
XX120

WORD



XXX10
XX100
XX10
XX35
1500X
30
XX
XXX10
XX10
XX100
XX50

WORD



XX20
30XX
500XXX
10
XX
XXX5 61 rows selected.
SQL>
SQL> --
SQL> -- Let's see how this works
SQL> --
SQL>
SQL> --
SQL> -- Count the number of 'words' in each record
SQL> --
SQL>
SQL> select product_cat, product_desc,length(product_desc)-length
(replace(product_desc,','))+1 words
  2 from work_table;

PRODUCT_CA
PRODUCT_DESC
WORDS




Cat1       XXX10, XX10, XX50, 10XX15,
XXX250                                          5
Cat2       XXX1, 10XXXX, XX25, XX15,
XXXX                                             5
Cat3       XXX10, XX20, XX75,
XX10                                                    4
Cat4       XXX5, XXXX10, 500XX, XX25, XX150, XX50, 100XX, 40XX,
XXX20                 9
Cat5       XXX10, XX50, XX10, XX35,
30XX                                              5
Cat6       XXX5, XXX20, XX5, XX5,
XX120                                               5
Cat7       XXX10, XX100, XX10, XX35, 1500X, 30%XX,
1500XX                             7
Cat8       XXX10, XX10, XX100, XX50, XX20, 30XX, 500XXX, 10%XX,
1500XX                9

Cat9
XXX5
1
Cat10      XXX5, XX120, XXXXXX, XXXX, XX5, XX5,
XXX5                                  7
Cat11      XXX10, XX50, XX10,
XXXXXX                                                  4

11 rows selected.

SQL>
SQL> --
SQL> -- Find the length of the longest line of 'words'
SQL> --
SQL> select
  2  	     max(length(product_desc)-length(replace(product_desc,',')))
+1 from work_table;

MAX(LENGTH(PRODUCT_DESC)-LENGTH(REPLACE(PRODUCT_DESC,',')))+1


                                                            9

SQL>

SQL> --
SQL> -- Generate a master list of word numbers based upon the longest list
SQL> --
SQL> select rownum rn
  2 from dual
  3 connect by level<=(select
  4 max(length(product_desc)-length(replace(product_desc,','))) +1 from work_table);

        RN


         1
         2
         3
         4
         5
         6
         7
         8
         9

9 rows selected.

SQL>
SQL> --
SQL> -- Put it all together
SQL> --
SQL> -- Separate the words in each record based upon the word count
SQL> -- using the REGEXP_SUBSTR function
SQL> --
SQL> -- Use the generated row number to step through the word lists
SQL> -- by ensuring the rn returned is no greater than the max number
SQL> -- of words in each list
SQL> --
SQL> -- Report the final result
SQL> --
SQL> with t as (
  2  	 select product_cat, product_desc,length(product_desc)-length
(replace(product_desc,','))+1
  3  	     words
  4  	 from work_table),
  5  	 g as (select rownum rn
  6  	       from dual
  7  	       connect by level<=(select
  8  		     max(length(product_desc)-length(replace(product_desc,',')))
+1 from work_table))
  9 select regexp_substr(product_desc,'\w+',1,rn) word  10 from t,g
 11 where g.rn<=t.words
 12 order by t.product_cat,g.rn
 13 /

WORD



XXX10
XX10
XX50
10XX15
XXX250
XXX5
XX120
XXXXXX
XXXX
XX5
XX5

WORD



XXX5
XXX10
XX50
XX10
XXXXXX
XXX1
10XXXX
XX25
XX15
XXXX
XXX10 WORD

XX20
XX75
XX10
XXX5
XXXX10
500XX
XX25
XX150
XX50
100XX
40XX

WORD



XXX20
XXX10
XX50
XX10
XX35
30XX
XXX5
XXX20
XX5
XX5
XX120

WORD



XXX10
XX100
XX10
XX35
1500X
30
XX
XXX10
XX10
XX100
XX50

WORD



XX20
30XX
500XXX
10
XX
XXX5 61 rows selected.

SQL> David Fitzjarrell Received on Wed Dec 24 2008 - 10:30:01 CST

Original text of this message