Home » SQL & PL/SQL » SQL & PL/SQL » Oracle regex_substr parsing a clob (19.2)
Oracle regex_substr parsing a clob [message #685245] |
Tue, 23 November 2021 07:41  |
 |
Unclefool
Messages: 65 Registered: August 2021
|
Member |
|
|
I have a situation where an application is storing chunks of text in a CLOB.
Each chunk of text is surrounded by a tag
[SYSDATE] (see below for test CASE).
I am able to produce some of my output using regex_substr() but I can use some help making the full query work.
ALTER SESSION SET NLS_DATE_FORMAT = 'MMDDYYYY HH24:MI:SS';
CREATE table t(
seq_num integer GENERATED BY DEFAULT AS IDENTITY (START WITH 1) NOT NULL,
c CLOB,
create_date DATE DEFAULT SYSDATE
);
/
insert into t (c) values (' ')
/
CREATE OR REPLACE PROCEDURE lob_append(
p_clob IN OUT CLOB,
p_text IN VARCHAR2
)
AS
l_text varchar2(32760);
l_date_string VARCHAR2(50);
BEGIN
select '[' || TO_CHAR (SYSDATE, 'MM-DD-YYYY HH24:MI:SS') || ']'
into l_date_string from dual;
-- newline each time code is appended for clarity.
l_text :=chr(10) || l_date_string || chr(10)
|| p_text || chr(10)
|| l_date_string||chr(10);
dbms_lob.writeappend(p_clob, length(l_text), l_text );
END;
/
DECLARE
l_clob CLOB := empty_clob();
lTime date;
BEGIN
lTime := sysdate;
SELECT c INTO l_clob FROM t WHERE seq_num = 1 FOR UPDATE;
lob_append(l_clob, rpad('Z',20,'Z'));
loop
exit when sysdate = lTime + interval '2' second;
end loop;
l_clob := empty_clob();
SELECT c INTO l_clob FROM t WHERE seq_num = 1 FOR UPDATE;
lob_append(l_clob, rpad('Y',10,'Y'));
END;
/
-- Note there can be any data,
-- multiple lines, newlines, between the
-- encapsulating tags
SELECT * from t
SEQ_NUM C CREATE_DATE
1
[11-23-2021 13:10:37]
ZZZZZZZZZZZZZZZZZZZZ
[11-23-2021 13:10:37]
[11-23-2021 13:10:39]
YYYYYYYYYY
[11-23-2021 13:10:39]
select t.seq_num, l.ord, l.token
from t cross join lateral
(
select level as ord,
regexp_substr(c, '(\[\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}])'
|| chr(10) || '(.*?)' || chr(10) || '\1'
, 1, level, null, 2)
as token
from dual
connect by level <=
regexp_count(c, '(\[\d{2}-\d{2}-\d{4} \d{2}:\d{2}:\d{2}])'
|| chr(10) || '(.*?)' || chr(10) || '\1')
) l
order by seq_num, ord;
current output
SEQ_NUM ORD TOKEN
1 1 ZZZZZZZZZZZZZZZZZZZZ
1 2 YYYYYYYYYY
Desired output. Note only 1 date tag per distinct time in any CLOB and the brackets are removed in the output.
SEQ_NUM ORD CREATE_DATE TOKEN
1 1 11-23-2021 13:10:37
ZZZZZZZZZZZZZZZZZZZZ
1 2 11-23-2021 13:10:39
YYYYYYYYYY
Secondly, is there a better way to handle this situation as this seems like a clunky design? Perhaps maybe using XML and appending newly added data? All input, ideas and working examples would be greatly appreciated.
|
|
|
Re: Oracle regex_substr parsing a clob [message #685246 is a reply to message #685245] |
Tue, 23 November 2021 11:32  |
Solomon Yakobson
Messages: 3214 Registered: January 2010 Location: Connecticut, USA
|
Senior Member |
|
|
SELECT T.SEQ_NUM,
L.ORD,
L.CREATE_DATE,
L.TOKEN
FROM T,
LATERAL(
SELECT LEVEL ORD,
REGEXP_SUBSTR(
C,
'\[(.{19})\]',
1,
LEVEL * 2 - 1,
null,
1
) CREATE_DATE,
REGEXP_SUBSTR(
C,
'(\[.{19}\])(' ||
CHR(10) || '|' || CHR(13) || ')*(.*)(' ||
CHR(10) || '|' || CHR(13) || ')\1',
1,
LEVEL,
'n',
3
) TOKEN
FROM DUAL
CONNECT BY LEVEL <= REGEXP_COUNT(
C,
'(\[.{19}\])(' ||
CHR(10) || '|' || CHR(13) || ')*(.*)(' ||
CHR(10) || '|' || CHR(13) || ')\1'
)
) L
/
SEQ_NUM ORD CREATE_DATE TOKEN
---------- ---------- ------------------- -------------------------
1 1 11-23-2021 11:53:37 ZZZZZZZZZZZZZZZZZZZZ
1 2 11-23-2021 11:53:39 YYYYYYYYYY
SQL>
SY.
[Updated on: Tue, 23 November 2021 11:37] Report message to a moderator
|
|
|
Goto Forum:
Current Time: Fri Mar 31 01:24:04 CDT 2023
|