2 * Copyright (C) 1994-1998, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.7 1998-03-05 08:45:13 adam
8 * New result set model and modular ranking system. Moved towards
9 * descent server API. System information stored as "SGML" records.
11 * Revision 1.6 1998/02/17 10:29:27 adam
12 * Moved towards 'automatic' EXPLAIN database.
14 * Revision 1.5 1997/10/27 14:33:05 adam
15 * Moved towards generic character mapping depending on "structure"
16 * field in abstract syntax file. Fixed a few memory leaks. Fixed
17 * bug with negative integers when doing searches with relational
20 * Revision 1.4 1997/09/25 14:57:08 adam
23 * Revision 1.3 1996/05/22 08:21:59 adam
24 * Added public ZebDatabaseInfo structure.
26 * Revision 1.2 1996/05/14 06:16:41 adam
27 * Compact use/set bytes used in search service.
29 * Revision 1.1 1996/05/13 14:23:07 adam
30 * Work on compaction of set/use bytes in dictionary.
50 struct zebSUInfo info;
51 struct zebSUInfoB *next;
54 struct zebDatabaseInfoB {
55 struct zebSUInfoB *SUInfo;
57 data1_node *data1_database;
58 int recordCount; /* records in db */
59 int recordBytes; /* size of records */
60 int sysno; /* sysno of database info */
61 int readFlag; /* 1: read is needed when referenced; 0 if not */
62 int dirty; /* 1: database is dirty: write is needed */
63 struct zebDatabaseInfoB *next;
66 struct zebraExplainAttset {
69 struct zebraExplainAttset *next;
72 struct zebraExplainInfo {
78 struct zebraExplainAttset *attsets;
80 data1_node *data1_target;
81 struct zebDatabaseInfoB *databaseInfo;
82 struct zebDatabaseInfoB *curDatabaseInfo;
85 static data1_node *read_sgml_rec (data1_handle dh, NMEM nmem, Record rec)
87 return data1_read_sgml (dh, nmem, rec->info[recInfo_storeData]);
90 static data1_node *data1_search_tag (data1_handle dh, data1_node *n,
93 logf (LOG_DEBUG, "data1_search_tag %s", tag);
94 for (; n; n = n->next)
95 if (n->which == DATA1N_tag && n->u.tag.tag &&
96 !yaz_matchstr (tag, n->u.tag.tag))
98 logf (LOG_DEBUG, " found");
101 logf (LOG_DEBUG, " not found");
105 static data1_node *data1_add_tag (data1_handle dh, data1_node *at,
106 const char *tag, NMEM nmem)
108 data1_node *partag = get_parent_tag(dh, at);
109 data1_node *res = data1_mk_node (dh, nmem);
110 data1_element *e = NULL;
113 res->which = DATA1N_tag;
114 res->u.tag.tag = data1_insert_string (dh, res, nmem, tag);
115 res->u.tag.node_selected = 0;
116 res->u.tag.make_variantlist = 0;
117 res->u.tag.no_data_requested = 0;
118 res->u.tag.get_bytes = -1;
121 e = partag->u.tag.element;
123 data1_getelementbytagname (dh, at->root->u.root.absyn,
125 res->root = at->root;
130 assert (at->last_child);
131 at->last_child->next = res;
133 at->last_child = res;
137 static data1_node *data1_make_tag (data1_handle dh, data1_node *at,
138 const char *tag, NMEM nmem)
142 node = data1_search_tag (dh, at->child, tag);
144 node = data1_add_tag (dh, at, tag, nmem);
146 node->child = node->last_child = NULL;
150 static data1_node *data1_add_tagdata_int (data1_handle dh, data1_node *at,
151 const char *tag, int num,
154 data1_node *node_data;
156 node_data = data1_add_taggeddata (dh, at->root, at, tag, nmem);
159 node_data->u.data.what = DATA1I_num;
160 node_data->u.data.data = node_data->lbuf;
161 sprintf (node_data->u.data.data, "%d", num);
162 node_data->u.data.len = strlen (node_data->u.data.data);
166 static data1_node *data1_add_tagdata_text (data1_handle dh, data1_node *at,
167 const char *tag, const char *str,
170 data1_node *node_data;
172 node_data = data1_add_taggeddata (dh, at->root, at, tag, nmem);
175 node_data->u.data.what = DATA1I_text;
176 node_data->u.data.data = node_data->lbuf;
177 strcpy (node_data->u.data.data, str);
178 node_data->u.data.len = strlen (node_data->u.data.data);
182 static void zebraExplain_writeDatabase (ZebraExplainInfo zei,
183 struct zebDatabaseInfoB *zdi);
184 static void zebraExplain_writeTarget (ZebraExplainInfo zei);
186 void zebraExplain_close (ZebraExplainInfo zei, int writeFlag)
188 struct zebDatabaseInfoB *zdi, *zdi_next;
190 logf (LOG_DEBUG, "zebraExplain_close wr=%d", writeFlag);
193 /* write each database info record */
194 for (zdi = zei->databaseInfo; zdi; zdi = zdi->next)
195 zebraExplain_writeDatabase (zei, zdi);
196 zebraExplain_writeTarget (zei);
198 for (zdi = zei->databaseInfo; zdi; zdi = zdi_next)
200 struct zebSUInfoB *zsui, *zsui_next;
202 zdi_next = zdi->next;
203 for (zsui = zdi->SUInfo; zsui; zsui = zsui_next)
205 zsui_next = zsui->next;
210 nmem_destroy (zei->nmem);
215 ZebraExplainInfo zebraExplain_open (Records records, data1_handle dh,
219 ZebraExplainInfo zei;
220 struct zebDatabaseInfoB **zdip;
222 logf (LOG_DEBUG, "zebraExplain_open wr=%d", writeFlag);
223 zei = xmalloc (sizeof(*zei));
225 zei->curDatabaseInfo = NULL;
226 zei->records = records;
227 zei->nmem = nmem_create ();
230 zdip = &zei->databaseInfo;
231 trec = rec_get (records, 1);
235 data1_node *node_tgtinfo, *node_zebra, *node_list, *np;
237 zei->data1_target = read_sgml_rec (zei->dh, zei->nmem, trec);
240 data1_pr_tree (zei->dh, zei->data1_target, stderr);
242 node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target->child,
244 node_zebra = data1_search_tag (zei->dh, node_tgtinfo->child,
246 node_list = data1_search_tag (zei->dh, node_zebra->child,
248 for (np = node_list->child; np; np = np->next)
250 data1_node *node_name = NULL;
251 data1_node *node_id = NULL;
253 if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "database"))
255 for (np2 = np->child; np2; np2 = np2->next)
257 if (np2->which != DATA1N_tag)
259 if (!strcmp (np2->u.tag.tag, "name"))
260 node_name = np2->child;
261 else if (!strcmp (np2->u.tag.tag, "id"))
262 node_id = np2->child;
264 assert (node_id && node_name);
266 *zdip = xmalloc (sizeof(**zdip));
268 (*zdip)->readFlag = 1;
270 (*zdip)->data1_database = NULL;
271 (*zdip)->recordCount = 0;
272 (*zdip)->recordBytes = 0;
273 (*zdip)->SUInfo = NULL;
275 (*zdip)->databaseName = nmem_malloc (zei->nmem,
276 1+node_name->u.data.len);
277 memcpy ((*zdip)->databaseName, node_name->u.data.data,
278 node_name->u.data.len);
279 (*zdip)->databaseName[node_name->u.data.len] = '\0';
280 (*zdip)->sysno = atoi_n (node_id->u.data.data,
281 node_id->u.data.len);
282 zdip = &(*zdip)->next;
284 np = data1_search_tag (zei->dh, node_zebra->child,
287 assert (np && np->which == DATA1N_data);
288 zei->ordinalSU = atoi_n (np->u.data.data, np->u.data.len);
290 np = data1_search_tag (zei->dh, node_zebra->child,
293 assert (np && np->which == DATA1N_data);
294 zei->runNumber = atoi_n (np->u.data.data, np->u.data.len);
306 data1_read_sgml (zei->dh, zei->nmem,
307 "<explain><targetInfo>targetInfo\n"
309 "<namedResultSets>1</>\n"
310 "<multipleDBSearch>1</>\n"
311 "<nicknames><name>Zebra</></>\n"
313 /* write now because we want to be sure about the sysno */
314 trec = rec_new (records);
315 trec->info[recInfo_fileType] =
316 rec_strdup ("grs.sgml", &trec->size[recInfo_fileType]);
317 trec->info[recInfo_databaseName] =
318 rec_strdup ("IR-Explain-1", &trec->size[recInfo_databaseName]);
320 sgml_buf = data1_nodetoidsgml(dh, zei->data1_target, 0, &sgml_len);
321 trec->info[recInfo_storeData] = xmalloc (sgml_len);
322 memcpy (trec->info[recInfo_storeData], sgml_buf, sgml_len);
323 trec->size[recInfo_storeData] = sgml_len;
325 rec_put (records, &trec);
330 zebraExplain_newDatabase (zei, "IR-Explain-1");
335 static void zebraExplain_readDatabase (ZebraExplainInfo zei,
336 struct zebDatabaseInfoB *zdi)
339 data1_node *node_dbinfo, *node_zebra, *node_list, *np;
340 struct zebSUInfoB **zsuip = &zdi->SUInfo;
343 rec = rec_get (zei->records, zdi->sysno);
345 zdi->data1_database = read_sgml_rec (zei->dh, zei->nmem, rec);
347 node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child,
350 node_zebra = data1_search_tag (zei->dh, node_dbinfo->child,
352 np = data1_search_tag (zei->dh, node_dbinfo->child,
354 if (np && np->child && np->child->which == DATA1N_data)
356 zdi->recordBytes = atoi_n (np->child->u.data.data,
357 np->child->u.data.len);
359 node_list = data1_search_tag (zei->dh, node_zebra->child,
361 for (np = node_list->child; np; np = np->next)
363 data1_node *node_set = NULL;
364 data1_node *node_use = NULL;
365 data1_node *node_ordinal = NULL;
367 if (np->which != DATA1N_tag || strcmp (np->u.tag.tag, "attr"))
369 for (np2 = np->child; np2; np2 = np2->next)
371 if (np2->which != DATA1N_tag || !np2->child ||
372 np2->child->which != DATA1N_data)
374 if (!strcmp (np2->u.tag.tag, "set"))
375 node_set = np2->child;
376 else if (!strcmp (np2->u.tag.tag, "use"))
377 node_use = np2->child;
378 else if (!strcmp (np2->u.tag.tag, "ordinal"))
379 node_ordinal = np2->child;
381 assert (node_set && node_use && node_ordinal);
383 *zsuip = xmalloc (sizeof(**zsuip));
384 (*zsuip)->info.set = atoi_n (node_set->u.data.data,
385 node_set->u.data.len);
386 (*zsuip)->info.use = atoi_n (node_use->u.data.data,
387 node_use->u.data.len);
388 (*zsuip)->info.ordinal = atoi_n (node_ordinal->u.data.data,
389 node_ordinal->u.data.len);
390 logf (LOG_DEBUG, "set=%d use=%d ordinal=%d",
391 (*zsuip)->info.set, (*zsuip)->info.use, (*zsuip)->info.ordinal);
392 zsuip = &(*zsuip)->next;
396 if ((np = data1_search_tag (zei->dh, node_dbinfo->child,
398 (np = data1_search_tag (zei->dh, np->child,
399 "recordCountActual")) &&
400 np->child->which == DATA1N_data)
402 zdi->recordCount = atoi_n (np->child->u.data.data,
403 np->child->u.data.len);
409 int zebraExplain_curDatabase (ZebraExplainInfo zei, const char *database)
411 struct zebDatabaseInfoB *zdi;
414 if (zei->curDatabaseInfo &&
415 !strcmp (zei->curDatabaseInfo->databaseName, database))
417 for (zdi = zei->databaseInfo; zdi; zdi=zdi->next)
419 if (!strcmp (zdi->databaseName, database))
425 zebraExplain_readDatabase (zei, zdi);
426 zei->curDatabaseInfo = zdi;
430 int zebraExplain_newDatabase (ZebraExplainInfo zei, const char *database)
432 struct zebDatabaseInfoB *zdi;
433 data1_node *node_dbinfo;
436 for (zdi = zei->databaseInfo; zdi; zdi=zdi->next)
438 if (!strcmp (zdi->databaseName, database))
443 /* it's new really. make it */
444 zdi = xmalloc (sizeof(*zdi));
445 zdi->next = zei->databaseInfo;
446 zei->databaseInfo = zdi;
448 zdi->recordCount = 0;
449 zdi->recordBytes = 0;
451 zdi->databaseName = nmem_strdup (zei->nmem, database);
457 zdi->data1_database =
458 data1_read_sgml (zei->dh, zei->nmem,
459 "<explain><databaseInfo>databaseInfo\n"
464 node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child,
466 assert (node_dbinfo);
468 data1_add_tagdata_text (zei->dh, node_dbinfo, "name",
469 database, zei->nmem);
472 data1_pr_tree (zei->dh, zdi->data1_database, stderr);
476 zei->curDatabaseInfo = zdi;
480 static void zebraExplain_writeDatabase (ZebraExplainInfo zei,
481 struct zebDatabaseInfoB *zdi)
486 data1_node *node_dbinfo, *node_list, *node_count, *node_zebra;
487 struct zebSUInfoB *zsui;
494 drec = rec_get (zei->records, zdi->sysno);
495 xfree (drec->info[recInfo_storeData]);
499 drec = rec_new (zei->records);
500 zdi->sysno = drec->sysno;
502 drec->info[recInfo_fileType] =
503 rec_strdup ("grs.sgml", &drec->size[recInfo_fileType]);
504 drec->info[recInfo_databaseName] =
505 rec_strdup ("IR-Explain-1",
506 &drec->size[recInfo_databaseName]);
508 assert (zdi->data1_database);
509 node_dbinfo = data1_search_tag (zei->dh, zdi->data1_database->child,
512 node_count = data1_make_tag (zei->dh, node_dbinfo,
513 "recordCount", zei->nmem);
514 data1_add_tagdata_int (zei->dh, node_count, "recordCountActual",
515 zdi->recordCount, zei->nmem);
517 /* zebra info (private) */
518 node_zebra = data1_make_tag (zei->dh, node_dbinfo,
519 "zebraInfo", zei->nmem);
520 node_list = data1_make_tag (zei->dh, node_zebra,
521 "attrlist", zei->nmem);
522 for (zsui = zdi->SUInfo; zsui; zsui = zsui->next)
524 data1_node *node_attr;
525 node_attr = data1_add_tag (zei->dh, node_list,
527 data1_add_tagdata_int (zei->dh, node_attr, "set",
528 zsui->info.set, zei->nmem);
529 data1_add_tagdata_int (zei->dh, node_attr, "use",
530 zsui->info.use, zei->nmem);
531 data1_add_tagdata_int (zei->dh, node_attr, "ordinal",
532 zsui->info.ordinal, zei->nmem);
534 data1_add_tagdata_int (zei->dh, node_zebra,
535 "recordBytes", zdi->recordBytes, zei->nmem);
536 /* convert to "SGML" and write it */
538 data1_pr_tree (zei->dh, zdi->data1_database, stderr);
540 sgml_buf = data1_nodetoidsgml(zei->dh, zdi->data1_database,
542 drec->info[recInfo_storeData] = xmalloc (sgml_len);
543 memcpy (drec->info[recInfo_storeData], sgml_buf, sgml_len);
544 drec->size[recInfo_storeData] = sgml_len;
546 rec_put (zei->records, &drec);
549 static void trav_attset (data1_handle dh, ZebraExplainInfo zei,
550 data1_attset *p_this)
552 struct zebraExplainAttset *p_reg = zei->attsets;
558 if (!strcmp (p_this->name, p_reg->name))
564 p_reg = nmem_malloc (zei->nmem, sizeof (*p_reg));
565 p_reg->name = nmem_strdup (zei->nmem, p_this->name);
566 p_reg->ordinal = p_this->ordinal;
567 p_reg->next = zei->attsets;
568 zei->attsets = p_reg;
570 trav_attset (dh, zei, p_this->children);
573 static void trav_absyn (data1_handle dh, void *h, data1_absyn *a)
575 logf (LOG_LOG, "absyn %s", a->name);
576 trav_attset (dh, (ZebraExplainInfo) h, a->attset);
579 static void zebraExplain_writeTarget (ZebraExplainInfo zei)
581 struct zebDatabaseInfoB *zdi;
582 data1_node *node_tgtinfo, *node_list, *node_zebra;
590 trec = rec_get (zei->records, 1);
591 xfree (trec->info[recInfo_storeData]);
593 node_tgtinfo = data1_search_tag (zei->dh, zei->data1_target->child,
595 assert (node_tgtinfo);
597 node_zebra = data1_make_tag (zei->dh, node_tgtinfo,
598 "zebraInfo", zei->nmem);
599 data1_add_tagdata_text (zei->dh, node_zebra, "version",
600 ZEBRAVER, zei->nmem);
601 node_list = data1_add_tag (zei->dh, node_zebra,
602 "databaseList", zei->nmem);
603 for (zdi = zei->databaseInfo; zdi; zdi = zdi->next)
606 node_db = data1_add_tag (zei->dh, node_list,
607 "database", zei->nmem);
608 data1_add_tagdata_text (zei->dh, node_db, "name",
609 zdi->databaseName, zei->nmem);
610 data1_add_tagdata_int (zei->dh, node_db, "id",
611 zdi->sysno, zei->nmem);
613 data1_add_tagdata_int (zei->dh, node_zebra, "ordinalSU",
614 zei->ordinalSU, zei->nmem);
616 data1_add_tagdata_int (zei->dh, node_zebra, "runNumber",
617 zei->runNumber, zei->nmem);
619 node_list = data1_add_tag (zei->dh, node_zebra,
620 "attsetList", zei->nmem);
621 /* convert to "SGML" and write it */
623 data1_pr_tree (zei->dh, zei->data1_target, stderr);
625 sgml_buf = data1_nodetoidsgml(zei->dh, zei->data1_target,
627 trec->info[recInfo_storeData] = xmalloc (sgml_len);
628 memcpy (trec->info[recInfo_storeData], sgml_buf, sgml_len);
629 trec->size[recInfo_storeData] = sgml_len;
631 rec_put (zei->records, &trec);
634 int zebraExplain_lookupSU (ZebraExplainInfo zei, int set, int use)
636 struct zebSUInfoB *zsui;
638 assert (zei->curDatabaseInfo);
639 for (zsui = zei->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next)
640 if (zsui->info.use == use && zsui->info.set == set)
641 return zsui->info.ordinal;
645 int zebraExplain_addSU (ZebraExplainInfo zei, int set, int use)
647 struct zebSUInfoB *zsui;
649 assert (zei->curDatabaseInfo);
650 for (zsui = zei->curDatabaseInfo->SUInfo; zsui; zsui=zsui->next)
651 if (zsui->info.use == use && zsui->info.set == set)
653 zsui = xmalloc (sizeof(*zsui));
654 zsui->next = zei->curDatabaseInfo->SUInfo;
655 zei->curDatabaseInfo->SUInfo = zsui;
656 zei->curDatabaseInfo->dirty = 1;
658 zsui->info.set = set;
659 zsui->info.use = use;
660 zsui->info.ordinal = (zei->ordinalSU)++;
661 return zsui->info.ordinal;
664 void zebraExplain_recordBytesIncrement (ZebraExplainInfo zei, int adjust_num)
666 assert (zei->curDatabaseInfo);
668 zei->curDatabaseInfo->recordBytes += adjust_num;
669 zei->curDatabaseInfo->dirty = 1;
672 void zebraExplain_recordCountIncrement (ZebraExplainInfo zei, int adjust_num)
674 assert (zei->curDatabaseInfo);
676 zei->curDatabaseInfo->recordCount += adjust_num;
677 zei->curDatabaseInfo->dirty = 1;
680 int zebraExplain_runNumberIncrement (ZebraExplainInfo zei, int adjust_num)
684 return zei->runNumber += adjust_num;
687 RecordAttr *rec_init_attr (ZebraExplainInfo zei, Record rec)
689 RecordAttr *recordAttr;
691 if (rec->info[recInfo_attr])
692 return (RecordAttr *) rec->info[recInfo_attr];
693 recordAttr = xmalloc (sizeof(*recordAttr));
694 rec->info[recInfo_attr] = (char *) recordAttr;
695 rec->size[recInfo_attr] = sizeof(*recordAttr);
697 recordAttr->recordSize = 0;
698 recordAttr->recordOffset = 0;
699 recordAttr->runNumber = zei->runNumber;