From d9a9544ebf605f5141ced91c4cf2c2fc97b93539 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 13 May 2002 14:13:43 +0000 Subject: [PATCH] XML reader for data1 (EXPAT) --- CHANGELOG | 2 + index/zinfo.c | 45 ++++++---- recctrl/Makefile.am | 4 +- recctrl/grsread.h | 31 +------ recctrl/marcread.c | 13 +-- recctrl/recgrs.c | 249 ++------------------------------------------------- recctrl/xmlread.c | 46 ++++++++++ 7 files changed, 95 insertions(+), 295 deletions(-) create mode 100644 recctrl/xmlread.c diff --git a/CHANGELOG b/CHANGELOG index 568c80e..40ae05a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,8 @@ --- 1.2.0 2002/MM/DD +XML filter (-t grs.xml). + Multiple registers. New setting in resource 'root' that holds base directory for register(s). A group a databases may be put in separate register in directory root/reg by using db name 'reg/db1' ... 'reg/dbN'. diff --git a/index/zinfo.c b/index/zinfo.c index 4e471a6..a29f76a 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: zinfo.c,v 1.29 2002-05-07 11:05:19 adam Exp $ + * $Id: zinfo.c,v 1.30 2002-05-13 14:13:43 adam Exp $ */ #include @@ -622,7 +622,7 @@ int zebraExplain_curDatabase (ZebraExplainInfo zei, const char *database) static void zebraExplain_initCommonInfo (ZebraExplainInfo zei, data1_node *n) { - data1_node *c = data1_mk_tag (zei->dh, zei->nmem, "commonInfo", n); + data1_node *c = data1_mk_tag (zei->dh, zei->nmem, "commonInfo", 0, n); data1_mk_tag_data_text (zei->dh, c, "dateAdded", zei->date, zei->nmem); data1_mk_tag_data_text (zei->dh, c, "dateChanged", zei->date, zei->nmem); data1_mk_tag_data_text (zei->dh, c, "languageCode", "EN", zei->nmem); @@ -638,8 +638,8 @@ static void zebraExplain_updateCommonInfo (ZebraExplainInfo zei, data1_node *n) static void zebraExplain_initAccessInfo (ZebraExplainInfo zei, data1_node *n) { - data1_node *c = data1_mk_tag (zei->dh, zei->nmem, "accessInfo", n); - data1_node *d = data1_mk_tag (zei->dh, zei->nmem, "unitSystems", c); + data1_node *c = data1_mk_tag (zei->dh, zei->nmem, "accessInfo", 0, n); + data1_node *d = data1_mk_tag (zei->dh, zei->nmem, "unitSystems", 0, c); data1_mk_tag_data_text (zei->dh, d, "string", "ISO", zei->nmem); } @@ -781,9 +781,9 @@ static void writeAttributeValueDetails (ZebraExplainInfo zei, if (set_ordinal != zsui->info.set) continue; node_attvalue = data1_mk_tag (zei->dh, zei->nmem, "attributeValue", - node_atvs); + 0 /* attr */, node_atvs); node_value = data1_mk_tag (zei->dh, zei->nmem, "value", - node_attvalue); + 0 /* attr */, node_attvalue); data1_mk_tag_data_int (zei->dh, node_value, "numeric", zsui->info.use, zei->nmem); } @@ -822,13 +822,14 @@ static void zebraExplain_writeCategoryList (ZebraExplainInfo zei, node_ci = data1_search_tag (zei->dh, node_categoryList->child, "categoryList"); assert (node_ci); - node_ci = data1_mk_tag (zei->dh, zei->nmem, "categories", node_ci); + node_ci = data1_mk_tag (zei->dh, zei->nmem, "categories", 0 /* attr */, + node_ci); assert (node_ci); for (i = 0; category[i]; i++) { - data1_node *node_cat = data1_mk_tag (zei->dh, zei->nmem, - "category", node_ci); + data1_node *node_cat = data1_mk_tag (zei->dh, zei->nmem, "category", + 0 /* attr */, node_ci); data1_mk_tag_data_text (zei->dh, node_cat, "name", category[i], zei->nmem); @@ -902,7 +903,8 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, break; set_min = set_ordinal; node_asd = data1_mk_tag (zei->dh, zei->nmem, - "attributeSetDetails", node_attributesBySet); + "attributeSetDetails", + 0 /* attr */, node_attributesBySet); attset = data1_attset_search_id (zei->dh, set_ordinal); if (!attset) @@ -926,13 +928,16 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, oid, zei->nmem); node_abt = data1_mk_tag (zei->dh, zei->nmem, - "attributesByType", node_asd); + "attributesByType", + 0 /*attr */, node_asd); node_atd = data1_mk_tag (zei->dh, zei->nmem, - "attributeTypeDetails", node_abt); + "attributeTypeDetails", + 0 /* attr */, node_abt); data1_mk_tag_data_int (zei->dh, node_atd, "type", 1, zei->nmem); node_atvs = data1_mk_tag (zei->dh, zei->nmem, - "attributeValues", node_atd); + "attributeValues", + 0 /* attr */, node_atd); writeAttributeValueDetails (zei, zad, node_atvs, attset); } } @@ -948,7 +953,8 @@ static void zebraExplain_writeAttributeDetails (ZebraExplainInfo zei, int oid[OID_SIZE]; data1_node *node_attr; - node_attr = data1_mk_tag (zei->dh, zei->nmem, "attr", node_list); + node_attr = data1_mk_tag (zei->dh, zei->nmem, "attr", 0 /* attr */, + node_list); oident.proto = PROTO_Z3950; oident.oclass = CLASS_ATTSET; @@ -1044,10 +1050,11 @@ static void writeAttributeValues (ZebraExplainInfo zei, data1_node *node_value; node_value = data1_mk_tag (zei->dh, zei->nmem, "attributeValue", - node_values); + 0 /* attr */, node_values); data1_mk_tag_data_text (zei->dh, node_value, "name", atts->name, zei->nmem); - node_value = data1_mk_tag (zei->dh, zei->nmem, "value", node_value); + node_value = data1_mk_tag (zei->dh, zei->nmem, "value", + 0 /* attr */, node_value); data1_mk_tag_data_int (zei->dh, node_value, "numeric", atts->value, zei->nmem); } @@ -1103,7 +1110,7 @@ static void zebraExplain_writeAttributeSet (ZebraExplainInfo zei, data1_mk_tag_data_int (zei->dh, node_atttype, "type", 1, zei->nmem); node_values = data1_mk_tag (zei->dh, zei->nmem, - "attributeValues", node_atttype); + "attributeValues", 0 /* attr */, node_atttype); if (attset) writeAttributeValues (zei, node_values, attset); @@ -1154,12 +1161,12 @@ static void zebraExplain_writeTarget (ZebraExplainInfo zei, int key_flush) data1_mk_tag_data_text (zei->dh, node_zebra, "version", ZEBRAVER, zei->nmem); node_list = data1_mk_tag (zei->dh, zei->nmem, - "databaseList", node_zebra); + "databaseList", 0 /* attr */, node_zebra); for (zdi = zei->databaseInfo; zdi; zdi = zdi->next) { data1_node *node_db; node_db = data1_mk_tag (zei->dh, zei->nmem, - "database", node_list); + "database", 0 /* attr */, node_list); data1_mk_tag_data_text (zei->dh, node_db, "name", zdi->databaseName, zei->nmem); data1_mk_tag_data_int (zei->dh, node_db, "id", diff --git a/recctrl/Makefile.am b/recctrl/Makefile.am index c36d5dd..eff49b8 100644 --- a/recctrl/Makefile.am +++ b/recctrl/Makefile.am @@ -1,7 +1,7 @@ -## $Id: Makefile.am,v 1.2 2000-05-02 11:26:13 adam Exp $ +## $Id: Makefile.am,v 1.3 2002-05-13 14:13:43 adam Exp $ noinst_LIBRARIES = librecctrl.a librecctrl_a_SOURCES = recctrl.c recgrs.c sgmlread.c regxread.c \ - marcread.c rectext.c grsread.h recgrs.h rectext.h + marcread.c rectext.c grsread.h recgrs.h rectext.h xmlread.c INCLUDES = -I$(srcdir)/../include $(YAZINC) $(TCL_INCLUDE) diff --git a/recctrl/grsread.h b/recctrl/grsread.h index 0f9d05d..c68d103 100644 --- a/recctrl/grsread.h +++ b/recctrl/grsread.h @@ -1,35 +1,9 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Log: grsread.h,v $ - * Revision 1.8 1999-11-30 13:48:04 adam - * Improved installation. Updated for inclusion of YAZ header files. - * - * Revision 1.7 1999/05/26 07:49:14 adam - * C++ compilation. - * - * Revision 1.6 1999/05/20 12:57:18 adam - * Implemented TCL filter. Updated recctrl system. - * - * Revision 1.5 1999/02/02 14:51:26 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.4 1997/09/17 12:19:21 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.3 1997/09/04 13:54:40 adam - * Added MARC filter - type grs.marc. where syntax refers - * to abstract syntax. New method tellf in retrieve/extract method. - * - * Revision 1.2 1997/04/30 08:56:08 quinn - * null - * - * Revision 1.1 1996/10/11 10:57:23 adam - * New module recctrl. Used to manage records (extract/retrieval). - * + * $Id: grsread.h,v 1.9 2002-05-13 14:13:43 adam Exp $ */ #ifndef GRSREAD_H @@ -65,6 +39,7 @@ extern RecTypeGrs recTypeGrs_sgml; extern RecTypeGrs recTypeGrs_regx; extern RecTypeGrs recTypeGrs_tcl; extern RecTypeGrs recTypeGrs_marc; +extern RecTypeGrs recTypeGrs_xml; #ifdef __cplusplus } diff --git a/recctrl/marcread.c b/recctrl/marcread.c index 4afdd7a..c85a951 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -3,7 +3,7 @@ * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * - * $Id: marcread.c,v 1.13 2002-05-03 13:50:25 adam Exp $ + * $Id: marcread.c,v 1.14 2002-05-13 14:13:43 adam Exp $ */ #include #include @@ -102,7 +102,7 @@ data1_node *grs_read_marc (struct grs_read_info *p) /* generate field node */ - res = data1_mk_tag_n (p->dh, p->mem, tag, 3, res_root); + res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, res_root); #if MARC_DEBUG fprintf (outf, "%s ", tag); @@ -121,7 +121,7 @@ data1_node *grs_read_marc (struct grs_read_info *p) int j; #endif res = data1_mk_tag_n (p->dh, p->mem, - buf+i, indicator_length, res); + buf+i, indicator_length, 0 /* attr */, res); #if MARC_DEBUG for (j = 0; jdh, p->mem, - buf+i+1, identifier_length-1, parent); + buf+i+1, identifier_length-1, + 0 /* attr */, parent); #if MARC_DEBUG fprintf (outf, " $"); for (j = 1; j i0) { - data1_node *res = data1_mk_tag_n (p->dh, p->mem, - "@", 1, parent); + data1_node *res = data1_mk_tag (p->dh, p->mem, "@", 0 /* attr */, + parent); data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res); } #if MARC_DEBUG diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index a779462..77ce0ae 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,242 +1,8 @@ /* - * Copyright (C) 1994-2001, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. * - * $Log: recgrs.c,v $ - * Revision 1.48 2002-05-07 11:05:20 adam - * data1 updates. Run number fix - * - * Revision 1.47 2002/05/03 13:50:25 adam - * data1 cleanup - * - * Revision 1.46 2002/04/13 18:16:43 adam - * More XPATH work; common sequence numbers for extract keys - * - * Revision 1.45 2002/04/12 14:40:42 adam - * Work on XPATH - * - * Revision 1.44 2002/04/11 20:09:47 adam - * work on string tag indexing - * - * Revision 1.43 2002/03/21 23:06:36 adam - * Source 'tag' in abs-file - * - * Revision 1.42 2002/02/20 17:30:01 adam - * Work on new API. Locking system re-implemented - * - * Revision 1.41 2001/05/22 21:01:47 adam - * Removed print of data1 tree on stdout so that inetd works again. - * - * Revision 1.40 2001/03/29 21:31:31 adam - * Fixed "record begin" for Tcl filter. - * - * Revision 1.39 2000/12/05 19:09:15 adam - * Fixed problem where indexer could crash if abstract syntax was undefined. - * - * Revision 1.38 2000/12/05 14:44:58 adam - * Fixed minor bug that could cause zmbol to break it data were emitted - * with not parent tags. - * - * Revision 1.37 2000/12/05 12:22:53 adam - * Termlist source implemented (so that we can index values of XML/SGML - * attributes). - * - * Revision 1.36 2000/12/05 10:01:44 adam - * Fixed bug regarding user-defined attribute sets. - * - * Revision 1.35 2000/11/29 15:21:31 adam - * Fixed problem with passwd db. - * - * Revision 1.34 2000/02/25 13:24:49 adam - * Fixed bug regarding pointer conversion that showed up on OSF V5. - * - * Revision 1.33 1999/11/30 13:48:04 adam - * Improved installation. Updated for inclusion of YAZ header files. - * - * Revision 1.32 1999/09/07 07:19:21 adam - * Work on character mapping. Implemented replace rules. - * - * Revision 1.31 1999/07/14 10:56:43 adam - * Fixed potential memory leak. - * - * Revision 1.30 1999/07/06 12:26:41 adam - * Retrieval handler obeys schema and handles XML transfer syntax. - * - * Revision 1.29 1999/05/26 07:49:14 adam - * C++ compilation. - * - * Revision 1.28 1999/05/21 12:00:17 adam - * Better diagnostics for extraction process. - * - * Revision 1.27 1999/05/20 12:57:18 adam - * Implemented TCL filter. Updated recctrl system. - * - * Revision 1.26 1999/03/02 16:15:44 quinn - * Added "tagsysno" and "tagrank" directives to zebra.cfg. - * - * Revision 1.25 1999/02/18 15:01:26 adam - * Minor changes. - * - * Revision 1.24 1999/02/02 14:51:28 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.23 1998/10/18 07:51:10 adam - * Changed one logf call. - * - * Revision 1.22 1998/10/16 08:14:37 adam - * Updated record control system. - * - * Revision 1.21 1998/07/01 09:16:10 adam - * Element localno only added when it's greater than 0. - * - * Revision 1.20 1998/05/20 10:12:26 adam - * Implemented automatic EXPLAIN database maintenance. - * Modified Zebra to work with ASN.1 compiled version of YAZ. - * - * Revision 1.19 1998/03/11 11:19:05 adam - * Changed the way sequence numbers are generated. - * - * Revision 1.18 1998/03/05 08:41:31 adam - * Minor changes. - * - * Revision 1.17 1998/02/10 12:03:06 adam - * Implemented Sort. - * - * Revision 1.16 1998/01/29 13:38:17 adam - * Fixed problem with mapping to record with unknown schema. - * - * Revision 1.15 1998/01/26 10:37:57 adam - * Better diagnostics. - * - * Revision 1.14 1997/11/06 11:41:01 adam - * Implemented "begin variant" for the sgml.regx filter. - * - * Revision 1.13 1997/10/31 12:35:44 adam - * Added a few log statements. - * - * Revision 1.12 1997/10/29 12:02:22 adam - * Using oid_ent_to_oid used instead of the non thread-safe oid_getoidbyent. - * - * Revision 1.11 1997/10/27 14:34:00 adam - * Work on generic character mapping depending on "structure" field - * in abstract syntax file. - * - * Revision 1.10 1997/09/18 08:59:21 adam - * Extra generic handle for the character mapping routines. - * - * Revision 1.9 1997/09/17 12:19:21 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.8 1997/09/09 13:38:14 adam - * Partial port to WIN95/NT. - * - * Revision 1.7 1997/09/05 15:30:10 adam - * Changed prototype for chr_map_input - added const. - * Added support for C++, headers uses extern "C" for public definitions. - * - * Revision 1.6 1997/09/04 13:54:40 adam - * Added MARC filter - type grs.marc. where syntax refers - * to abstract syntax. New method tellf in retrieve/extract method. - * - * Revision 1.5 1997/07/15 16:29:03 adam - * Initialized dummy variable to keep checker gcc happy. - * - * Revision 1.4 1997/04/30 08:56:08 quinn - * null - * - * Revision 1.2 1996/10/11 16:06:43 quinn - * Revision 1.3 1997/02/24 10:41:50 adam - * Cleanup of code and commented out the "end element-end-record" code. - * - * Revision 1.2 1996/10/11 16:06:43 quinn - * Fixed arguments to nodetogr - * - * Revision 1.1 1996/10/11 10:57:25 adam - * New module recctrl. Used to manage records (extract/retrieval). - * - * Revision 1.29 1996/10/08 10:30:21 quinn - * Fixed type mismatch - * - * Revision 1.28 1996/10/07 16:06:40 quinn - * Added SOIF support - * - * Revision 1.27 1996/06/11 10:54:12 quinn - * Relevance work - * - * Revision 1.26 1996/06/06 12:08:45 quinn - * Added showRecord function - * - * Revision 1.25 1996/06/04 14:18:53 quinn - * Charmap work - * - * Revision 1.24 1996/06/04 13:27:54 quinn - * More work on charmapping - * - * Revision 1.23 1996/06/04 10:19:01 adam - * Minor changes - removed include of ctype.h. - * - * Revision 1.22 1996/06/03 10:15:27 quinn - * Various character-mapping. - * - * Revision 1.21 1996/05/31 13:27:24 quinn - * Character-conversion in phrases, too. - * - * Revision 1.19 1996/05/16 15:31:14 quinn - * a7 - * - * Revision 1.18 1996/05/09 07:28:56 quinn - * Work towards phrases and multiple registers - * - * Revision 1.17 1996/05/01 13:46:37 adam - * First work on multiple records in one file. - * New option, -offset, to the "unread" command in the filter module. - * - * Revision 1.16 1996/01/17 14:57:54 adam - * Prototype changed for reader functions in extract/retrieve. File - * is identified by 'void *' instead of 'int. - * - * Revision 1.15 1996/01/08 19:15:47 adam - * New input filter that works! - * - * Revision 1.14 1995/12/15 12:36:11 adam - * Retrieval calls data1_read_regx when subType is specified. - * - * Revision 1.13 1995/12/15 12:24:43 quinn - * *** empty log message *** - * - * Revision 1.12 1995/12/15 12:20:28 quinn - * *** empty log message *** - * - * Revision 1.11 1995/12/15 12:07:57 quinn - * Changed extraction strategy. - * - * Revision 1.10 1995/12/14 11:10:48 quinn - * Explain work - * - * Revision 1.9 1995/12/13 17:14:05 quinn - * *** empty log message *** - * - * Revision 1.8 1995/12/13 15:33:18 quinn - * *** empty log message *** - * - * Revision 1.7 1995/12/13 13:45:39 quinn - * Changed data1 to use nmem. - * - * Revision 1.6 1995/12/04 14:22:30 adam - * Extra arg to recType_byName. - * Started work on new regular expression parsed input to - * structured records. - * - * Revision 1.5 1995/11/28 14:18:37 quinn - * Set output_format. - * - * Revision 1.4 1995/11/21 13:14:49 quinn - * Fixed end-of-data-field problem (maybe). - * - * Revision 1.3 1995/11/15 19:13:09 adam - * Work on record management. - * + * $Id: recgrs.c,v 1.49 2002-05-13 14:13:43 adam Exp $ */ #include @@ -299,7 +65,7 @@ static int read_grs_type (struct grs_handlers *h, static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t) { - struct grs_handler *gh = (struct grs_handler *) malloc (sizeof(*gh)); + struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh)); gh->next = h->handlers; h->handlers = gh; gh->initFlag = 0; @@ -309,7 +75,7 @@ static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t) static void *grs_init(RecType recType) { - struct grs_handlers *h = (struct grs_handlers *) malloc (sizeof(*h)); + struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h)); h->handlers = 0; grs_add_handler (h, recTypeGrs_sgml); @@ -318,6 +84,9 @@ static void *grs_init(RecType recType) grs_add_handler (h, recTypeGrs_tcl); #endif grs_add_handler (h, recTypeGrs_marc); +#if YAZ_HAVE_EXPAT + grs_add_handler (h, recTypeGrs_xml); +#endif return h; } @@ -330,10 +99,10 @@ static void grs_destroy(void *clientData) gh_next = gh->next; if (gh->initFlag) (*gh->type->destroy)(gh->clientData); - free (gh); + xfree (gh); gh = gh_next; } - free (h); + xfree (h); } static void index_xpath (data1_node *n, struct recExtractCtrl *p, diff --git a/recctrl/xmlread.c b/recctrl/xmlread.c new file mode 100644 index 0000000..b413e17 --- /dev/null +++ b/recctrl/xmlread.c @@ -0,0 +1,46 @@ +/* + * Copyright (C) 1994-2002, Index Data + * All rights reserved. + * + * $Id: xmlread.c,v 1.1 2002-05-13 14:13:43 adam Exp $ + */ + +#if YAZ_HAVE_EXPAT + +#include +#include + +#include "grsread.h" + +struct xml_info { + int dummy; +}; + +static void *grs_init_xml(void) +{ + struct xml_info *p = (struct xml_info *) xmalloc (sizeof(*p)); + return p; +} + +static data1_node *grs_read_xml (struct grs_read_info *p) +{ + return data1_read_xml (p->dh, p->readf, p->fh, p->mem); +} + +static void grs_destroy_xml(void *clientData) +{ + struct sgml_getc_info *p = (struct sgml_getc_info *) clientData; + + xfree (p); +} + +static struct recTypeGrs xml_type = { + "xml", + grs_init_xml, + grs_destroy_xml, + grs_read_xml +}; + +RecTypeGrs recTypeGrs_xml = &xml_type; + +#endif -- 1.7.10.4