Home » SQL & PL/SQL » SQL & PL/SQL » Oracle regex_substr parsing a clob (19.2)
Oracle regex_substr parsing a clob [message #685245] Tue, 23 November 2021 07:41 Go to next message
Unclefool
Messages: 21
Registered: August 2021
Junior Member
I have a situation where an application is storing chunks of text in a CLOB.

Each chunk of text is surrounded by a tag
[SYSDATE] (see below for test CASE).

I am able to produce some of my output using regex_substr() but I can use some help making the full query work.


ALTER SESSION SET NLS_DATE_FORMAT = 'MMDDYYYY HH24:MI:SS';

CREATE table t(
seq_num integer  GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
c CLOB,
create_date DATE DEFAULT SYSDATE
);
/

insert into t (c) values (' ')
/



CREATE OR REPLACE PROCEDURE lob_append(
  p_clob IN OUT CLOB,
  p_text IN     VARCHAR2
)
AS
  l_text varchar2(32760);
  l_date_string VARCHAR2(50);
BEGIN

select '[' || TO_CHAR (SYSDATE, 'MM-DD-YYYY HH24:MI:SS') || ']'
    into l_date_string  from dual;

 
-- newline each time code is appended for clarity.
  l_text :=chr(10) || l_date_string || chr(10)
            || p_text || chr(10)
            || l_date_string||chr(10);

  dbms_lob.writeappend(p_clob, length(l_text), l_text );
END;
/


DECLARE
  l_clob CLOB := empty_clob();
lTime date;

BEGIN
lTime := sysdate;

  SELECT c INTO l_clob FROM t WHERE seq_num = 1 FOR UPDATE;

 lob_append(l_clob, rpad('Z',20,'Z'));

loop
        exit when sysdate = lTime + interval '2' second;
     end loop;

l_clob  := empty_clob();
SELECT c INTO l_clob FROM t WHERE seq_num = 1 FOR UPDATE;


lob_append(l_clob, rpad('Y',10,'Y'));


END;
/

-- Note there can be any data,
-- multiple lines, newlines, between the 
-- encapsulating tags

SELECT * from t


SEQ_NUM    C    CREATE_DATE
1     
[11-23-2021 13:10:37]
ZZZZZZZZZZZZZZZZZZZZ
[11-23-2021 13:10:37]

[11-23-2021 13:10:39]
YYYYYYYYYY
[11-23-2021 13:10:39]

select t.seq_num, l.ord, l.token
from   t cross join lateral
       (
         select level as ord,
                regexp_substr(c, '(\[\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}])'
                                  || chr(10) || '(.*?)' || chr(10) || '\1'
                             , 1, level, null, 2) 
                as token
                           from   dual
         connect by level <= 
                 regexp_count(c, '(\[\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}])'
                                  || chr(10) || '(.*?)' || chr(10) || '\1')
       ) l
order  by seq_num, ord;

current output 
SEQ_NUM    ORD    TOKEN
1    1    ZZZZZZZZZZZZZZZZZZZZ
1    2    YYYYYYYYYY

Desired output. Note only 1 date tag per distinct time in any CLOB and the brackets are removed in the output.

SEQ_NUM    ORD CREATE_DATE    TOKEN
1    1 11-23-2021 13:10:37
ZZZZZZZZZZZZZZZZZZZZ
1    2 11-23-2021 13:10:39
YYYYYYYYYY

Secondly, is there a better way to handle this situation as this seems like a clunky design? Perhaps maybe using XML and appending newly added data? All input, ideas and working examples would be greatly appreciated.

Re: Oracle regex_substr parsing a clob [message #685246 is a reply to message #685245] Tue, 23 November 2021 11:32 Go to previous message
Solomon Yakobson
Messages: 3106
Registered: January 2010
Location: Connecticut, USA
Senior Member
SELECT  T.SEQ_NUM,
        L.ORD,
        L.CREATE_DATE,
        L.TOKEN
  FROM  T,
        LATERAL(
                SELECT  LEVEL ORD,
                        REGEXP_SUBSTR(
                                      C,
                                      '\[(.{19})\]',
                                      1,
                                      LEVEL * 2 - 1,
                                      null,
                                      1
                                     ) CREATE_DATE,
                        REGEXP_SUBSTR(
                                      C,
                                      '(\[.{19}\])(' ||
                                      CHR(10) || '|' || CHR(13) || ')*(.*)(' ||
                                      CHR(10) || '|' || CHR(13) || ')\1',
                                      1,
                                      LEVEL,
                                      'n',
                                      3
                                     ) TOKEN
                  FROM  DUAL
                  CONNECT BY LEVEL <= REGEXP_COUNT(
                                                   C,
                                                   '(\[.{19}\])(' ||
                                                   CHR(10) || '|' || CHR(13) || ')*(.*)(' ||
                                                   CHR(10) || '|' || CHR(13) || ')\1'
                                                  )
               ) L
/

   SEQ_NUM        ORD CREATE_DATE         TOKEN
---------- ---------- ------------------- -------------------------
         1          1 11-23-2021 11:53:37 ZZZZZZZZZZZZZZZZZZZZ
         1          2 11-23-2021 11:53:39 YYYYYYYYYY

SQL>
SY.

[Updated on: Tue, 23 November 2021 11:37]

Report message to a moderator

Previous Topic: Data history
Next Topic: DBA_HIST_SQLTEXT string buffer too small
Goto Forum:
  


Current Time: Mon Dec 06 09:49:52 CST 2021