-/* $Id: extract.c,v 1.209 2006-05-10 08:13:21 adam Exp $
- Copyright (C) 1995-2005
+/* $Id: extract.c,v 1.210 2006-05-10 12:31:08 adam Exp $
+ Copyright (C) 1995-2006
Index Data ApS
This file is part of the Zebra server.
*sysno = rec->sysno;
if (zh->records_processed < zh->m_file_verbose_limit)
- if (matchStr)
- yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T
- " " ZINT_FORMAT " %s" ,
- zh->m_record_type,
- fname, recordOffset, *sysno, matchStr);
- else
- yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T
- " " ZINT_FORMAT ,
- zh->m_record_type,
- fname, recordOffset, *sysno);
-
+ {
+ if (matchStr)
+ yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT " %s" ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno, matchStr);
+ else
+ yaz_log(YLOG_LOG, "add %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno);
+ }
recordAttr = rec_init_attr (zh->reg->zei, rec);
recordAttr->staticrank = extractCtrl.staticrank;
else
{
if (zh->records_processed < zh->m_file_verbose_limit)
- if (matchStr)
- yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T
- " " ZINT_FORMAT " %s" ,
- zh->m_record_type,
- fname, recordOffset, *sysno, matchStr);
- else
- yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T
- " " ZINT_FORMAT ,
- zh->m_record_type,
- fname, recordOffset, *sysno);
-
-
-
+ {
+ if (matchStr)
+ yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT " %s" ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno, matchStr);
+ else
+ yaz_log(YLOG_LOG, "delete %s %s " PRINTF_OFF_T
+ " " ZINT_FORMAT ,
+ zh->m_record_type,
+ fname, recordOffset, *sysno);
+ }
zh->records_deleted++;
if (matchStr)
{
{
/* flush new keys for sort&search etc */
if (zh->records_processed < zh->m_file_verbose_limit)
- if (matchStr)
+ {
+ if (matchStr)
yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T
" " ZINT_FORMAT " %s" ,
zh->m_record_type,
fname, recordOffset, *sysno, matchStr);
- else
+ else
yaz_log(YLOG_LOG, "update %s %s " PRINTF_OFF_T
" " ZINT_FORMAT ,
zh->m_record_type,
fname, recordOffset, *sysno);
-
+ }
recordAttr->staticrank = extractCtrl.staticrank;
#if NATTR
extract_flushSortKeys (zh, *sysno, 1, zh->reg->sortKeys);
return 0;
}
+void extract_rec_keys_adjust(ZebraHandle zh, int is_insert,
+ zebra_rec_keys_t reckeys)
+{
+ ZebraExplainInfo zei = zh->reg->zei;
+ struct ord_stat {
+ int no;
+ int ord;
+ struct ord_stat *next;
+ };
+
+ if (zebra_rec_keys_rewind(reckeys))
+ {
+ struct ord_stat *ord_list = 0;
+ struct ord_stat *p;
+ size_t slen;
+ const char *str;
+ struct it_key key_in;
+ while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in))
+ {
+ int ord = key_in.mem[0];
+
+ for (p = ord_list; p ; p = p->next)
+ if (p->ord == ord)
+ {
+ p->no++;
+ break;
+ }
+ if (!p)
+ {
+ p = xmalloc(sizeof(*p));
+ p->no = 1;
+ p->ord = ord;
+ p->next = ord_list;
+ ord_list = p;
+ }
+ }
+
+ p = ord_list;
+ while (p)
+ {
+ struct ord_stat *p1 = p;
+
+ if (is_insert)
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, p->no, 1);
+ else
+ zebraExplain_ord_adjust_occurrences(zei, p->ord, - p->no, -1);
+ p = p->next;
+ xfree(p1);
+ }
+ }
+}
+
void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno,
int cmd,
zebra_rec_keys_t reckeys,
{
ZebraExplainInfo zei = zh->reg->zei;
+ extract_rec_keys_adjust(zh, cmd, reckeys);
+
if (!zh->reg->key_buf)
{
int mem= 1024*1024* atoi( res_get_def( zh->res, "memmax", "8"));
zh->reg->key_buf_used +=
key_SU_encode(ch, (char*)zh->reg->key_buf +
zh->reg->key_buf_used);
-
+
/* copy the 0-terminated stuff from str to output */
memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, str, slen);
zh->reg->key_buf_used += slen;
-/* $Id: zinfo.c,v 1.59 2006-05-10 09:08:55 adam Exp $
- Copyright (C) 1995-2005
+/* $Id: zinfo.c,v 1.60 2006-05-10 12:31:08 adam Exp $
+ Copyright (C) 1995-2006
Index Data ApS
This file is part of the Zebra server.
} su;
} u;
int ordinal;
+ zint doc_occurrences;
+ zint term_occurrences;
};
struct zebSUInfoB {
data1_node *node_str = NULL;
data1_node *node_ordinal = NULL;
data1_node *node_type = NULL;
+ data1_node *node_doc_occurrences = NULL;
+ data1_node *node_term_occurrences = NULL;
data1_node *np2;
char oid_str[128];
int oid_str_len;
node_ordinal = np2->child;
else if (!strcmp(np2->u.tag.tag, "type"))
node_type = np2->child;
+ else if (!strcmp(np2->u.tag.tag, "dococcurrences"))
+ node_doc_occurrences = np2->child;
+ else if (!strcmp(np2->u.tag.tag, "termoccurrences"))
+ node_term_occurrences = np2->child;
+ else
+ {
+ yaz_log(YLOG_LOG, "Unknown tag '%s' in attributeDetails",
+ np2->u.tag.tag);
+ }
}
assert(node_ordinal);
(*zsuip)->info.index_type = 'w';
}
+ if (node_doc_occurrences)
+ {
+ data1_node *np = node_doc_occurrences;
+ (*zsuip)->info.doc_occurrences = atoi_zn(np->u.data.data,
+ np->u.data.len);
+ }
+ if (node_term_occurrences)
+ {
+ data1_node *np = node_term_occurrences;
+ (*zsuip)->info.term_occurrences = atoi_zn(np->u.data.data,
+ np->u.data.len);
+ }
if (node_set && node_use)
{
(*zsuip)->info.which = ZEB_SU_SET_USE;
}
data1_mk_tag_data_int (zei->dh, node_attr, "ordinal",
zsui->info.ordinal, zei->nmem);
+
+ data1_mk_tag_data_zint (zei->dh, node_attr, "dococcurrences",
+ zsui->info.doc_occurrences, zei->nmem);
+ data1_mk_tag_data_zint (zei->dh, node_attr, "termoccurrences",
+ zsui->info.term_occurrences, zei->nmem);
}
/* convert to "SGML" and write it */
#if ZINFO_DEBUG
}
return 0;
}
-
-int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord,
- int *index_type,
- const char **db,
- int *set, int *use,
- const char **string_index)
+
+
+struct zebSUInfoB *zebraExplain_get_sui_info (ZebraExplainInfo zei, int ord,
+ int dirty_mark,
+ const char **db)
{
struct zebDatabaseInfoB *zdb;
+ for (zdb = zei->databaseInfo; zdb; zdb = zdb->next)
+ {
+ struct zebSUInfoB **zsui;
+
+ if (zdb->attributeDetails->readFlag)
+ zebraExplain_readAttributeDetails (zei, zdb->attributeDetails);
+
+ for (zsui = &zdb->attributeDetails->SUInfo; *zsui;
+ zsui = &(*zsui)->next)
+ if ((*zsui)->info.ordinal == ord)
+ {
+ struct zebSUInfoB *zsui_this = *zsui;
+
+ /* take it out of the list and move to front */
+ *zsui = (*zsui)->next;
+ zsui_this->next = zdb->attributeDetails->SUInfo;
+ zdb->attributeDetails->SUInfo = zsui_this;
+
+ if (dirty_mark)
+ zdb->attributeDetails->dirty = 1;
+ if (db)
+ *db = zdb->databaseName;
+ return zsui_this;
+ }
+ }
+ return 0;
+}
+
+
+
+int zebraExplain_ord_adjust_occurrences(ZebraExplainInfo zei, int ord,
+ int term_delta, int doc_delta)
+{
+ struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 1, 0);
+ if (zsui)
+ {
+ zsui->info.term_occurrences += term_delta;
+ zsui->info.doc_occurrences += doc_delta;
+ return 0;
+ }
+ return -1;
+}
+
+int zebraExplain_ord_get_occurrences(ZebraExplainInfo zei, int ord,
+ zint *term_occurrences,
+ zint *doc_occurrences)
+{
+ struct zebSUInfoB *zsui = zebraExplain_get_sui_info(zei, ord, 0, 0);
+ if (zsui)
+ {
+ *term_occurrences = zsui->info.term_occurrences;
+ *doc_occurrences = zsui->info.doc_occurrences;
+ return 0;
+ }
+ return -1;
+}
+
+int zebraExplain_lookup_ord(ZebraExplainInfo zei, int ord,
+ int *index_type,
+ const char **db,
+ int *set, int *use,
+ const char **string_index)
+{
+ struct zebSUInfoB *zsui;
+
if (set)
*set = -1;
if (use)
if (string_index)
*string_index = 0;
- for (zdb = zei->databaseInfo; zdb; zdb = zdb->next)
+ zsui = zebraExplain_get_sui_info(zei, ord, 0, db);
+ if (zsui)
{
- struct zebSUInfoB *zsui;
-
- if (zdb->attributeDetails->readFlag)
- zebraExplain_readAttributeDetails (zei, zdb->attributeDetails);
-
- for (zsui = zdb->attributeDetails->SUInfo; zsui; zsui = zsui->next)
- if (zsui->info.ordinal == ord)
- {
- if (db)
- *db = zdb->databaseName;
- if (zsui->info.which == ZEB_SU_SET_USE)
- {
- if (set)
- *set = zsui->info.u.su.set;
- if (use)
- *use = zsui->info.u.su.use;
- }
-
- if (zsui->info.which == ZEB_SU_STR)
- if (string_index)
- *string_index = zsui->info.u.str;
-
- if (index_type)
- *index_type = zsui->info.index_type;
- return 0;
- }
+ if (zsui->info.which == ZEB_SU_SET_USE)
+ {
+ if (set)
+ *set = zsui->info.u.su.set;
+ if (use)
+ *use = zsui->info.u.su.use;
+ }
+
+ if (zsui->info.which == ZEB_SU_STR)
+ if (string_index)
+ *string_index = zsui->info.u.str;
+
+ if (index_type)
+ *index_type = zsui->info.index_type;
+ return 0;
}
return -1;
}
+
+
zebAccessObject zebraExplain_announceOid (ZebraExplainInfo zei,
zebAccessObject *op,
Odr_oid *oid)
zsui->info.u.su.set = set;
zsui->info.u.su.use = use;
zsui->info.ordinal = (zei->ordinalSU)++;
+ zsui->info.doc_occurrences = 0;
+ zsui->info.term_occurrences = 0;
return zsui->info.ordinal;
}