More complex CONTEXT index

From: Bill Zakrzewski <bill_at_intactus.com>
Date: Fri, 23 Jan 2009 16:12:43 -0500
Message-Id: <0AB5213C-052B-4F9E-AFDC-076B487D9981_at_intactus.com>



After reading all of the requirements this even more complex. I need to search 9 columns across 4 tables for specific words. Here is what I have, but the BLOB data does not get indexed properly and may be due to Oracle not understanding how to translate a PDF, MS Word doc or MS Excel spreadsheet. Any advice will be appreciated. I am not going to list out the tables to avoid making this too large, but here is the procedure I used to create the USER_DATASTORE and the commands used to build the index.
  • Build the Data Store and Index
    begin ctx_ddl.create_preference( 'mitss_data_user_datastore', 'USER_DATASTORE' ); ctx_ddl.set_attribute( 'mitss_data_user_datastore', 'procedure', 'ctxsys.mitss_dataSearch' ); end; /

create index idx_mitss_data_search

     on mitss_data (dummy_search) indextype is ctxsys.context
        parameters ('datastore mitss_data_user_datastore');

--

  • rebuild 'sync' should be run to catch up on new inserts and updates
    --
    alter index idx_mitss_data_search rebuild online parameters ('sync memory 20M');

--

  • rebuild 'full' should be run every so often to prevent fragmentation of the
  • index
    --
    alter index idx_mitss_data_search rebuild online parameters ('optimize full');
  • Here is the major part of the package used to build the data store
    procedure mitss_dataSearch( p_id in rowid, p_lob IN OUT nocopy clob ) is v_separator char(1) := ' '; v_mitss_id number;
     cursor search_mitss_data_cur is
       select id, proj_title, application_proj_number
         from mitss_data where rowid = p_id;
     cursor search_mitss_data_long (m_id number) is
       select document
         frommitss_data_long where type = 4 and mitss_id = m_id;
     cursor search_bullets (m_id number) is
       select bullet_item
         frommitss_bullets where mitss_id = m_id;
     cursor search_mitss_files_data (m_id number) is
       select title, short_desc, long_desc
         from mitss_files where mitss_id = m_id;
     cursor search_mitss_files_document (m_id number) is
       select document
         from mitss_files where mitss_id = m_id and content_type not  
like 'image%';
begin
     --
     -- mitss_data text
     --
     for r in search_mitss_data_cur
     loop
         v_mitss_id := r.id;
         append(p_lob, nvl(r.proj_title, v_separator));
         append(p_lob, v_separator);
         append(p_lob, nvl(r.application_proj_number, v_separator));
         append(p_lob, v_separator);
     end loop;

     --
     -- mitss_data_long
     --
     for r in search_mitss_data_long(v_mitss_id)
     loop
        if dbms_lob.getlength (r.document) > 1 then
         append(p_lob, r.document);
         append(p_lob, v_separator);
        end if;
     end loop;

     --
     -- bullets
     --
     for r in search_bullets(v_mitss_id)
     loop
         append(p_lob, nvl(r.bullet_item, v_separator));
         append(p_lob, v_separator);
     end loop;

     --
     -- mitss_files_data
     --
     for r in search_mitss_files_data(v_mitss_id)
     loop
         append(p_lob, nvl(r.title, v_separator));
         append(p_lob, v_separator);
         append(p_lob, nvl(r.short_desc, v_separator));
         append(p_lob, v_separator);
         append(p_lob, nvl(r.long_desc, v_separator));
         append(p_lob, v_separator);
     end loop;

     --
     -- mitss_files_document
     --
     for r in search_mitss_files_document(v_mitss_id)
     loop
        if dbms_lob.getlength (r.document) > 1 then
         append(p_lob, r.document);
         append(p_lob, v_separator);
        end if;
     end loop;


end mitss_dataSearch;

  • from Ryan McGeary RAMP db
  • This is used to facilitate the call to dmbs_lob.writeappend(...) -- just
  • makes for cleaner syntax.
    --
    procedure append( p_lob IN OUT NOCOPY CLOB CHARACTER SET ANY_CS, p_append IN VARCHAR2 CHARACTER SET p_lob%CHARSET ) is begin dbms_lob.writeappend(p_lob, length(p_append), p_append); end append;

--

  • Attempt to convert Blob to Clob to Varchar and add to search
    --
    procedure append( p_lob IN OUT NOCOPY CLOB CHARACTER SET ANY_CS, p_blob IN BLOB ) is t_clob clob; v_maxsize integer :=dbms_lob.lobmaxsize; v_dest_offset integer := 1; v_src_offset integer := 1; v_default_csid integer := dbms_lob.default_csid; v_lang_context integer := dbms_lob.default_lang_ctx; v_warning integer; v_append VARCHAR2(32767) CHARACTER SET p_lob%CHARSET; clob_length INTEGER; clob_offset INTEGER; clob_amount INTEGER; begin DBMS_LOB.CREATETEMPORARY(t_clob, TRUE); dbms_lob.converttoclob (t_clob ,p_blob ,v_maxsize ,v_dest_offset,v_src_offset,v_default_csid,v_lang_context,v_warning);
     clob_length := dbms_lob.getlength(t_clob);
     clob_offset := 1;
     clob_amount := 32767;
     while clob_length > 32767
     loop
        clob_length := clob_length - 32767;
        v_append := dbms_lob.substr(t_clob,clob_amount,clob_offset);
        dbms_lob.writeappend(p_lob, length(v_append), v_append);
        clob_offset := clob_offset + 32767;
     end loop;
     clob_amount := dbms_lob.getlength(t_clob) - clob_offset;
     v_append := dbms_lob.substr(t_clob,clob_amount,clob_offset);
     dbms_lob.writeappend(p_lob, length(v_append), v_append);
end append;

procedure append(

     p_lob    IN OUT NOCOPY CLOB CHARACTER SET ANY_CS,
     p_clob   IN CLOB

)
is

    v_append VARCHAR2(32767) CHARACTER SET p_lob%CHARSET;

    clob_length INTEGER;
    clob_offset INTEGER;
    clob_amount INTEGER;
begin
     clob_length := dbms_lob.getlength(p_clob);
     clob_offset := 1;
     clob_amount := 32767;
     while clob_length > 32767
     loop
        clob_length := clob_length - 32767;
        v_append := dbms_lob.substr(p_clob,clob_amount,clob_offset);
        dbms_lob.writeappend(p_lob, length(v_append), v_append);
        clob_offset := clob_offset + 32767;
     end loop;
     clob_amount := dbms_lob.getlength(p_clob) - clob_offset;
     v_append := dbms_lob.substr(p_clob,clob_amount,clob_offset);
     dbms_lob.writeappend(p_lob, length(v_append), v_append);
end append;

Thanks
Bill
--

http://www.freelists.org/webpage/oracle-l Received on Fri Jan 23 2009 - 15:12:43 CST

Original text of this message