From 1c3797bc503c1e7a109c8887d89d3ddda93bba71 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 7 Jul 2000 12:49:20 +0000 Subject: [PATCH] Optimized resultSetInsert{Rank,Sort}. --- TODO | 10 ++++++++- configure | 4 ++-- configure.in | 4 ++-- dict/Makefile.am | 10 ++++----- dict/Makefile.in | 66 ++++++++++++++++++++++++++++++++++++++++++------------ dict/dicttest.c | 40 +++++++++++++++++++++++---------- index/zebraapi.c | 7 ++++-- index/zinfo.c | 59 +++++++++++++++++++++++++++++------------------- index/zsets.c | 32 +++++++++++++++++--------- util/passtest.c | 1 + 10 files changed, 161 insertions(+), 72 deletions(-) diff --git a/TODO b/TODO index d2bd938..34d84a2 100644 --- a/TODO +++ b/TODO @@ -1,5 +1,13 @@ Zebra TODO -$Id: TODO,v 1.8 1999-12-01 14:29:50 adam Exp $ +$Id: TODO,v 1.9 2000-07-07 12:49:20 adam Exp $ + +Browse set. + +Multiple register areas. + +Hit Vector for each term in query in search-response PDU. + +Prefix configuration for externally stored records. Size of sort entries should be configurable. diff --git a/configure b/configure index 31815c9..249f9c7 100755 --- a/configure +++ b/configure @@ -1681,8 +1681,8 @@ fi done fi -echo $ac_n "checking for large files""... $ac_c" 1>&6 -echo "configure:1686: checking for large files" >&5 +echo $ac_n "checking for LFS""... $ac_c" 1>&6 +echo "configure:1686: checking for LFS" >&5 if test "$cross_compiling" = yes; then bits=32 else diff --git a/configure.in b/configure.in index 62cd8ec..b8d9608 100644 --- a/configure.in +++ b/configure.in @@ -1,5 +1,5 @@ dnl Zebra, Index Data Aps, 1994-2000 -dnl $Id: configure.in,v 1.21 2000-05-15 15:32:50 adam Exp $ +dnl $Id: configure.in,v 1.22 2000-07-07 12:49:20 adam Exp $ dnl AC_INIT(include/zebraver.h) AC_MSG_CHECKING(for package) @@ -144,7 +144,7 @@ if test "$ac_cv_lib_bz2_bzCompressInit" = "yes"; then AC_CHECK_HEADERS(bzlib.h) fi dnl ------- 64 bit files -AC_MSG_CHECKING(for large files) +AC_MSG_CHECKING(for LFS) AC_TRY_RUN([#define _FILE_OFFSET_BITS 64 #include #include diff --git a/dict/Makefile.am b/dict/Makefile.am index c673404..2d91aad 100644 --- a/dict/Makefile.am +++ b/dict/Makefile.am @@ -1,15 +1,15 @@ -## $Id: Makefile.am,v 1.1 2000-04-05 09:49:35 adam Exp $ +## $Id: Makefile.am,v 1.2 2000-07-07 12:49:20 adam Exp $ noinst_LIBRARIES = libdict.a -## noinst_PROGRAMS = dicttest dictext +noinst_PROGRAMS = dicttest dictext INCLUDES = -I$(srcdir)/../include @YAZINC@ -LDADD = ../bfile/libbfile.a ../dfa/libdfa.a ../util/libutil.a @YAZLIB@ @LIBS@ +LDADD = libdict.a ../bfile/libbfile.a ../dfa/libdfa.a ../util/libutil.a @YAZLIB@ @LIBS@ libdict_a_SOURCES = scan.c dopen.c dclose.c drdwr.c open.c close.c insert.c \ lookup.c lookupec.c lookgrep.c delete.c dcompact.c -## dicttest_SOURCES = dicttest.c +dicttest_SOURCES = dicttest.c -## dictext_SOURCES = dictext.c +dictext_SOURCES = dictext.c diff --git a/dict/Makefile.in b/dict/Makefile.in index 67116c2..b264351 100644 --- a/dict/Makefile.in +++ b/dict/Makefile.in @@ -73,12 +73,17 @@ YAZLIB = @YAZLIB@ yazconfig = @yazconfig@ noinst_LIBRARIES = libdict.a +noinst_PROGRAMS = dicttest dictext INCLUDES = -I$(srcdir)/../include @YAZINC@ -LDADD = ../bfile/libbfile.a ../dfa/libdfa.a ../util/libutil.a @YAZLIB@ @LIBS@ +LDADD = libdict.a ../bfile/libbfile.a ../dfa/libdfa.a ../util/libutil.a @YAZLIB@ @LIBS@ libdict_a_SOURCES = scan.c dopen.c dclose.c drdwr.c open.c close.c insert.c lookup.c lookupec.c lookgrep.c delete.c dcompact.c + +dicttest_SOURCES = dicttest.c + +dictext_SOURCES = dictext.c mkinstalldirs = $(SHELL) $(top_srcdir)/mkinstalldirs CONFIG_CLEAN_FILES = LIBRARIES = $(noinst_LIBRARIES) @@ -90,6 +95,18 @@ libdict_a_LIBADD = libdict_a_OBJECTS = scan.o dopen.o dclose.o drdwr.o open.o close.o \ insert.o lookup.o lookupec.o lookgrep.o delete.o dcompact.o AR = ar +PROGRAMS = $(noinst_PROGRAMS) + +dicttest_OBJECTS = dicttest.o +dicttest_LDADD = $(LDADD) +dicttest_DEPENDENCIES = libdict.a ../bfile/libbfile.a ../dfa/libdfa.a \ +../util/libutil.a +dicttest_LDFLAGS = +dictext_OBJECTS = dictext.o +dictext_LDADD = $(LDADD) +dictext_DEPENDENCIES = libdict.a ../bfile/libbfile.a ../dfa/libdfa.a \ +../util/libutil.a +dictext_LDFLAGS = CFLAGS = @CFLAGS@ COMPILE = $(CC) $(DEFS) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) CCLD = $(CC) @@ -102,11 +119,11 @@ DISTFILES = $(DIST_COMMON) $(SOURCES) $(HEADERS) $(TEXINFOS) $(EXTRA_DIST) TAR = gtar GZIP_ENV = --best DEP_FILES = .deps/close.P .deps/dclose.P .deps/dcompact.P \ -.deps/delete.P .deps/dopen.P .deps/drdwr.P .deps/insert.P \ -.deps/lookgrep.P .deps/lookup.P .deps/lookupec.P .deps/open.P \ -.deps/scan.P -SOURCES = $(libdict_a_SOURCES) -OBJECTS = $(libdict_a_OBJECTS) +.deps/delete.P .deps/dictext.P .deps/dicttest.P .deps/dopen.P \ +.deps/drdwr.P .deps/insert.P .deps/lookgrep.P .deps/lookup.P \ +.deps/lookupec.P .deps/open.P .deps/scan.P +SOURCES = $(libdict_a_SOURCES) $(dicttest_SOURCES) $(dictext_SOURCES) +OBJECTS = $(libdict_a_OBJECTS) $(dicttest_OBJECTS) $(dictext_OBJECTS) all: all-redirect .SUFFIXES: @@ -149,6 +166,23 @@ libdict.a: $(libdict_a_OBJECTS) $(libdict_a_DEPENDENCIES) $(AR) cru libdict.a $(libdict_a_OBJECTS) $(libdict_a_LIBADD) $(RANLIB) libdict.a +mostlyclean-noinstPROGRAMS: + +clean-noinstPROGRAMS: + -test -z "$(noinst_PROGRAMS)" || rm -f $(noinst_PROGRAMS) + +distclean-noinstPROGRAMS: + +maintainer-clean-noinstPROGRAMS: + +dicttest: $(dicttest_OBJECTS) $(dicttest_DEPENDENCIES) + @rm -f dicttest + $(LINK) $(dicttest_LDFLAGS) $(dicttest_OBJECTS) $(dicttest_LDADD) $(LIBS) + +dictext: $(dictext_OBJECTS) $(dictext_DEPENDENCIES) + @rm -f dictext + $(LINK) $(dictext_LDFLAGS) $(dictext_OBJECTS) $(dictext_LDADD) $(LIBS) + tags: TAGS ID: $(HEADERS) $(SOURCES) $(LISP) @@ -249,7 +283,7 @@ install-am: all-am install: install-am uninstall-am: uninstall: uninstall-am -all-am: Makefile $(LIBRARIES) +all-am: Makefile $(LIBRARIES) $(PROGRAMS) all-redirect: all-am install-strip: $(MAKE) $(AM_MAKEFLAGS) AM_INSTALL_PROGRAM_FLAGS=-s install @@ -266,23 +300,25 @@ distclean-generic: maintainer-clean-generic: mostlyclean-am: mostlyclean-noinstLIBRARIES mostlyclean-compile \ - mostlyclean-tags mostlyclean-depend mostlyclean-generic + mostlyclean-noinstPROGRAMS mostlyclean-tags \ + mostlyclean-depend mostlyclean-generic mostlyclean: mostlyclean-am -clean-am: clean-noinstLIBRARIES clean-compile clean-tags clean-depend \ - clean-generic mostlyclean-am +clean-am: clean-noinstLIBRARIES clean-compile clean-noinstPROGRAMS \ + clean-tags clean-depend clean-generic mostlyclean-am clean: clean-am distclean-am: distclean-noinstLIBRARIES distclean-compile \ - distclean-tags distclean-depend distclean-generic \ - clean-am + distclean-noinstPROGRAMS distclean-tags \ + distclean-depend distclean-generic clean-am distclean: distclean-am maintainer-clean-am: maintainer-clean-noinstLIBRARIES \ - maintainer-clean-compile maintainer-clean-tags \ + maintainer-clean-compile \ + maintainer-clean-noinstPROGRAMS maintainer-clean-tags \ maintainer-clean-depend maintainer-clean-generic \ distclean-am @echo "This command is intended for maintainers to use;" @@ -293,7 +329,9 @@ maintainer-clean: maintainer-clean-am .PHONY: mostlyclean-noinstLIBRARIES distclean-noinstLIBRARIES \ clean-noinstLIBRARIES maintainer-clean-noinstLIBRARIES \ mostlyclean-compile distclean-compile clean-compile \ -maintainer-clean-compile tags mostlyclean-tags distclean-tags \ +maintainer-clean-compile mostlyclean-noinstPROGRAMS \ +distclean-noinstPROGRAMS clean-noinstPROGRAMS \ +maintainer-clean-noinstPROGRAMS tags mostlyclean-tags distclean-tags \ clean-tags maintainer-clean-tags distdir mostlyclean-depend \ distclean-depend clean-depend maintainer-clean-depend info-am info \ dvi-am dvi check check-am installcheck-am installcheck install-exec-am \ diff --git a/dict/dicttest.c b/dict/dicttest.c index 4199f30..5538b49 100644 --- a/dict/dicttest.c +++ b/dict/dicttest.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dicttest.c,v $ - * Revision 1.22 1999-02-02 14:50:19 adam + * Revision 1.23 2000-07-07 12:49:20 adam + * Optimized resultSetInsert{Rank,Sort}. + * + * Revision 1.22 1999/02/02 14:50:19 adam * Updated WIN32 code specific sections. Changed header. * * Revision 1.21 1996/10/29 14:00:03 adam @@ -103,9 +106,11 @@ static int grep_handle (char *name, const char *info, void *client) int main (int argc, char **argv) { + Res my_resource = 0; + BFiles bfs; const char *name = NULL; const char *inputfile = NULL; - const char *base = NULL; + const char *config = NULL; int do_delete = 0; int range = -1; int srange = 0; @@ -126,7 +131,7 @@ int main (int argc, char **argv) { fprintf (stderr, "usage:\n " " %s [-d] [-r n] [-p n] [-u] [-g pat] [-s n] [-v n] [-i f]" - " [-w] [-c n] base file\n\n", + " [-w] [-c n] config file\n\n", prog); fprintf (stderr, " -d delete instead of insert\n"); fprintf (stderr, " -r n set regular match range\n"); @@ -144,8 +149,8 @@ int main (int argc, char **argv) { if (ret == 0) { - if (!base) - base = arg; + if (!config) + config = arg; else if (!name) name = arg; else @@ -196,18 +201,24 @@ int main (int argc, char **argv) exit (1); } } - if (!base || !name) + if (!config || !name) + { + logf (LOG_FATAL, "no config and/or dictionary specified"); + exit (1); + } + my_resource = res_open (config); + if (!my_resource) { - logf (LOG_FATAL, "no base and/or dictionary specified"); + logf (LOG_FATAL, "cannot open resource `%s'", config); exit (1); } - common_resource = res_open (base); - if (!common_resource) + bfs = bfs_create (res_get(my_resource, "register")); + if (!bfs) { - logf (LOG_FATAL, "cannot open resource `%s'", base); + logf (LOG_FATAL, "bfs_create fail"); exit (1); } - dict = dict_open (name, cache, rw); + dict = dict_open (bfs, name, cache, rw, 0); if (!dict) { logf (LOG_FATAL, "dict_open fail of `%s'", name); @@ -292,6 +303,10 @@ int main (int argc, char **argv) no_of_misses++; } ++no_of_iterations; + if ((no_of_iterations % 10000) == 0) + { + printf ("."); fflush(stdout); + } ipf_ptr += (i-1); } } @@ -328,6 +343,7 @@ int main (int argc, char **argv) logf (LOG_LOG, "No of misses.. %d", no_of_misses); } dict_close (dict); - res_close (common_resource); + bfs_destroy (bfs); + res_close (my_resource); return 0; } diff --git a/index/zebraapi.c b/index/zebraapi.c index 69dd5a0..21784de 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -3,7 +3,10 @@ * All rights reserved. * * $Log: zebraapi.c,v $ - * Revision 1.34 2000-06-09 13:56:38 ian + * Revision 1.35 2000-07-07 12:49:20 adam + * Optimized resultSetInsert{Rank,Sort}. + * + * Revision 1.34 2000/06/09 13:56:38 ian * Added some logging on Authentication and searches. * * Revision 1.33 2000/05/18 12:01:36 adam @@ -387,7 +390,7 @@ static int zebra_register_deactivate (ZebraService zh) zebra_chdir (zh); if (zh->records) { - zebraExplain_close (zh->zei, 1); + zebraExplain_close (zh->zei, 0); dict_close (zh->dict); sortIdx_close (zh->sortIdx); if (zh->isams) diff --git a/index/zinfo.c b/index/zinfo.c index 5c1b7df..989cad8 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zinfo.c,v $ - * Revision 1.18 2000-03-20 19:08:36 adam + * Revision 1.19 2000-07-07 12:49:20 adam + * Optimized resultSetInsert{Rank,Sort}. + * + * Revision 1.18 2000/03/20 19:08:36 adam * Added remote record import using Z39.50 extended services and Segment * Requests. * @@ -492,16 +495,19 @@ ZebraExplainInfo zebraExplain_open ( zdip = &zei->databaseInfo; trec = rec_get (records, 1); /* get "root" record */ + zei->ordinalSU = 1; + zei->runNumber = 0; + zebraExplain_mergeAccessInfo (zei, 0, &zei->accessInfo); if (trec) /* targetInfo already exists ... */ { data1_node *node_tgtinfo, *node_zebra, *node_list, *np; zei->data1_target = read_sgml_rec (zei->dh, zei->nmem, trec); - if (!zei->data1_target) + if (!zei->data1_target || !zei->data1_target->u.root.absyn) { - rec_rm (&trec); - nmem_destroy(zei->nmem); + logf (LOG_FATAL, "Explain schema missing. Check profilePath"); + nmem_destroy (zei->nmem); return 0; } #if ZINFO_DEBUG @@ -514,9 +520,15 @@ ZebraExplainInfo zebraExplain_open ( node_zebra = data1_search_tag (zei->dh, node_tgtinfo->child, "zebraInfo"); - node_list = data1_search_tag (zei->dh, node_zebra->child, - "databaseList"); - for (np = node_list->child; np; np = np->next) + np = 0; + if (node_zebra) + { + node_list = data1_search_tag (zei->dh, node_zebra->child, + "databaseList"); + if (node_list) + np = node_list->child; + } + for (; np; np = np->next) { data1_node *node_name = NULL; data1_node *node_id = NULL; @@ -563,25 +575,26 @@ ZebraExplainInfo zebraExplain_open ( zdip = &(*zdip)->next; } - np = data1_search_tag (zei->dh, node_zebra->child, - "ordinalSU"); - np = np->child; - assert (np && np->which == DATA1N_data); - zei->ordinalSU = atoi_n (np->u.data.data, np->u.data.len); - - np = data1_search_tag (zei->dh, node_zebra->child, - "runNumber"); - np = np->child; - assert (np && np->which == DATA1N_data); - zei->runNumber = atoi_n (np->u.data.data, np->u.data.len); - *zdip = NULL; + if (node_zebra) + { + np = data1_search_tag (zei->dh, node_zebra->child, + "ordinalSU"); + np = np->child; + assert (np && np->which == DATA1N_data); + zei->ordinalSU = atoi_n (np->u.data.data, np->u.data.len); + + np = data1_search_tag (zei->dh, node_zebra->child, + "runNumber"); + np = np->child; + assert (np && np->which == DATA1N_data); + zei->runNumber = atoi_n (np->u.data.data, np->u.data.len); + *zdip = NULL; + } rec_rm (&trec); } else /* create initial targetInfo */ { data1_node *node_tgtinfo; - zei->ordinalSU = 1; - zei->runNumber = 0; *zdip = NULL; if (writeFlag) @@ -597,9 +610,9 @@ ZebraExplainInfo zebraExplain_open ( "1\n" "Zebra\n" "\n" ); - - if (!zei->data1_target) + if (!zei->data1_target || !zei->data1_target->u.root.absyn) { + logf (LOG_FATAL, "Explain schema missing. Check profilePath"); nmem_destroy (zei->nmem); return 0; } diff --git a/index/zsets.c b/index/zsets.c index 5fce7d3..d2ebf5f 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: zsets.c,v $ - * Revision 1.27 2000-04-05 09:49:36 adam + * Revision 1.28 2000-07-07 12:49:20 adam + * Optimized resultSetInsert{Rank,Sort}. + * + * Revision 1.27 2000/04/05 09:49:36 adam * On Unix, zebra/z'mbol uses automake. * * Revision 1.26 2000/03/20 19:08:36 adam @@ -409,20 +412,23 @@ void resultSetInsertSort (ZebraHandle zh, ZebraSet sset, break; } } - j = sort_info->max_entries-1; + ++i; + j = sort_info->max_entries; if (i == j) return; - ++i; + + if (sort_info->num_entries == j) + --j; + else + j = (sort_info->num_entries)++; new_entry = sort_info->entries[j]; while (j != i) { sort_info->entries[j] = sort_info->entries[j-1]; --j; } - sort_info->entries[j] = new_entry; + sort_info->entries[i] = new_entry; assert (new_entry); - if (sort_info->num_entries != sort_info->max_entries) - (sort_info->num_entries)++; for (i = 0; ibuf[i], this_entry.buf[i], SORT_IDX_ENTRYSIZE); new_entry->sysno = sysno; @@ -453,20 +459,24 @@ void resultSetInsertRank (ZebraHandle zh, struct zset_sort_info *sort_info, break; } } - j = sort_info->max_entries-1; + ++i; + j = sort_info->max_entries; if (i == j) return; - ++i; + + if (sort_info->num_entries == j) + --j; + else + j = (sort_info->num_entries)++; + new_entry = sort_info->entries[j]; while (j != i) { sort_info->entries[j] = sort_info->entries[j-1]; --j; } - sort_info->entries[j] = new_entry; + sort_info->entries[i] = new_entry; assert (new_entry); - if (sort_info->num_entries != sort_info->max_entries) - (sort_info->num_entries)++; new_entry->sysno = sysno; new_entry->score = score; } diff --git a/util/passtest.c b/util/passtest.c index 160a9b1..6874be9 100644 --- a/util/passtest.c +++ b/util/passtest.c @@ -11,4 +11,5 @@ int main (int argc, char **argv) passwd_db_show (db); passwd_db_auth (db, "adam", "xtx9Y="); passwd_db_close (db); + return 0; } -- 1.7.10.4