Fixed an old-style declaration

[idzebra-moved-to-github.git] / recctrl / recgrs.c
diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c

index 49fdf91..3fe7f21 100644 (file)
--- a/recctrl/recgrs.c
+++ b/recctrl/recgrs.c
@@ -1,5 +1,5 @@
-/* $Id: recgrs.c,v 1.85 2003-10-07 09:18:21 adam Exp $
-   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003
+/* $Id: recgrs.c,v 1.92 2004-10-12 18:21:35 quinn Exp $
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
     Index Data Aps
  
  This file is part of the Zebra server.
@@ -25,104 +25,147 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  #include <sys/types.h>
  #ifndef WIN32
  #include <unistd.h>
+#include <dlfcn.h>
  #endif
  
  #include <yaz/log.h>
  #include <yaz/oid.h>
  
-#include <recctrl.h>
-#include "grsread.h"
+#include <d1_absyn.h>
+#include <idzebra/recgrs.h>
  
  #define GRS_MAX_WORD 512
  
-struct grs_handler {
-    RecTypeGrs type;
-    void *clientData;
-    int initFlag;
-    struct grs_handler *next;
+struct source_parser {
+    int len;
+    const char *tok;
+    const char *src;
+    int lookahead;
  };
  
-struct grs_handlers {
-    struct grs_handler *handlers;
-};
-
-static int read_grs_type (struct grs_handlers *h,
-                         struct grs_read_info *p, const char *type,
-                         data1_node **root)
+static int sp_lex(struct source_parser *sp)
  {
-    struct grs_handler *gh = h->handlers;
-    const char *cp = strchr (type, '.');
-
-    if (cp == NULL || cp == type)
+    while (*sp->src == ' ')
+       (sp->src)++;
+    sp->tok = sp->src;
+    sp->len = 0;
+    while (*sp->src && !strchr("<>();,-: ", *sp->src))
      {
-        cp = strlen(type) + type;
-        *p->type = 0;
+       sp->src++;
+       sp->len++;
      }
+    if (sp->len)
+       sp->lookahead = 't';
      else
-        strcpy (p->type, cp+1);
-    for (gh = h->handlers; gh; gh = gh->next)
      {
-        if (!memcmp (type, gh->type->type, cp-type) && 
-           gh->type->type[cp-type] == '\0')
+       sp->lookahead = *sp->src;
+       if (*sp->src)
+           sp->src++;
+    }
+    return sp->lookahead;
+}
+
+
+static int sp_expr(struct source_parser *sp, data1_node *n, RecWord *wrd)
+{
+    if (sp->lookahead != 't')
+       return 0;
+    if (sp->len == 4 && !memcmp(sp->tok, "data", sp->len))
+    {
+       if (n->which == DATA1N_data)
         {
-           if (!gh->initFlag)
+           wrd->string = n->u.data.data;
+           wrd->length = n->u.data.len;
+       }
+       sp_lex(sp);
+    }
+    else if (sp->len == 3 && !memcmp(sp->tok, "tag", sp->len))
+    {
+       if (n->which == DATA1N_tag)
+       {               
+           wrd->string = n->u.tag.tag;
+           wrd->length = strlen(n->u.tag.tag);
+       }
+       sp_lex(sp);
+    }
+    else if (sp->len == 4 && !memcmp(sp->tok, "attr", sp->len))
+    {
+       sp_lex(sp);
+       if (sp->lookahead != '(')
+           return 0;
+       sp_lex(sp);
+       if (sp->lookahead != 't')
+           return 0;
+       
+       if (n->which == DATA1N_tag)
+       {
+           data1_xattr *p = n->u.tag.attributes;
+           while (p && strlen(p->name) != sp->len && 
+                  memcmp (p->name, sp->tok, sp->len))
+               p = p->next;
+           if (p)
             {
-               gh->initFlag = 1;
-               gh->clientData = (*gh->type->init)();
+               wrd->string = p->value;
+               wrd->length = strlen(p->value);
             }
-           p->clientData = gh->clientData;
-            *root = (gh->type->read)(p);
-           gh->clientData = p->clientData;
+       }
+       sp_lex(sp);
+       if (sp->lookahead != ')')
+           return 0;
+       sp_lex(sp);
+    }
+    else if (sp->len == 5 && !memcmp(sp->tok, "range", sp->len))
+    {
+       int start, len;
+       sp_lex(sp);
+       if (sp->lookahead != '(')
+           return 0;
+       
+       sp_lex(sp);
+       sp_expr(sp, n, wrd);
+       if (sp->lookahead != ',')
+           return 0;
+       
+       sp_lex(sp);
+       if (sp->lookahead != 't')
+           return 0;
+       start = atoi_n(sp->tok, sp->len);
+       
+       sp_lex(sp);
+       if (sp->lookahead != ',')
+           return 0;
+       
+       sp_lex(sp);
+       if (sp->lookahead != 't')
+           return 0;
+       len = atoi_n(sp->tok, sp->len);
+       
+       sp_lex(sp);
+       if (sp->lookahead != ')')
             return 0;
+       
+       sp_lex(sp);
+       if (wrd->string && wrd->length)
+       {
+           wrd->string += start;
+           wrd->length -= start;
+           if (wrd->length > len)
+               wrd->length = len;
         }
      }
      return 1;
  }
  
-static void grs_add_handler (struct grs_handlers *h, RecTypeGrs t)
+static int sp_parse(data1_node *n, RecWord *wrd, const char *src)
  {
-    struct grs_handler *gh = (struct grs_handler *) xmalloc (sizeof(*gh));
-    gh->next = h->handlers;
-    h->handlers = gh;
-    gh->initFlag = 0;
-    gh->clientData = 0;
-    gh->type = t;
-}
-
-static void *grs_init(RecType recType)
-{
-    struct grs_handlers *h = (struct grs_handlers *) xmalloc (sizeof(*h));
-    h->handlers = 0;
-
-    grs_add_handler (h, recTypeGrs_sgml);
-    grs_add_handler (h, recTypeGrs_regx);
-#if HAVE_TCL_H
-    grs_add_handler (h, recTypeGrs_tcl);
-#endif
-    grs_add_handler (h, recTypeGrs_marc);
-    grs_add_handler (h, recTypeGrs_marcxml);
-#if HAVE_EXPAT_H
-    grs_add_handler (h, recTypeGrs_xml);
-#endif
-#if HAVE_PERL
-    grs_add_handler (h, recTypeGrs_perl);
-#endif
-    return h;
-}
-
-static void grs_destroy(void *clientData)
-{
-    struct grs_handlers *h = (struct grs_handlers *) clientData;
-    struct grs_handler *gh = h->handlers, *gh_next;
-    while (gh)
-    {
-       gh_next = gh->next;
-       if (gh->initFlag)
-           (*gh->type->destroy)(gh->clientData);
-       xfree (gh);
-       gh = gh_next;
-    }
-    xfree (h);
+    struct source_parser sp;
+    sp.len = 0;
+    sp.tok = 0;
+    sp.src = src;
+    sp.lookahead = 0;
+    sp_lex(&sp);
+
+    return sp_expr(&sp, n, wrd);
  }
  
  int d1_check_xpath_predicate(data1_node *n, struct xpath_predicate *p)
@@ -354,6 +397,8 @@ static void index_xpath (data1_node *n, struct recExtractCtrl *p,
      size_t flen = 0;
      data1_node *nn;
      int termlist_only = 1;
+    data1_termlist *tl;
+    int xpdone = 0;
  
      yaz_log(LOG_DEBUG, "index_xpath level=%d use=%d", level, use);
      if ((!n->root->u.root.absyn) ||
@@ -366,58 +411,88 @@ static void index_xpath (data1_node *n, struct recExtractCtrl *p,
      case DATA1N_data:
          wrd->string = n->u.data.data;
          wrd->length = n->u.data.len;
-        if (p->flagShowRecords)
-        {
-            printf("%*s XData:\"", (level + 1) * 4, "");
-            for (i = 0; i<wrd->length && i < 8; i++)
-                fputc (wrd->string[i], stdout);
-            printf("\"\n");
-        }  
-        else  {
-            data1_termlist *tl;
-            int xpdone = 0;
-            flen = 0;
-            
-            /* we have to fetch the whole path to the data tag */
-            for (nn = n; nn; nn = nn->parent) {
-                if (nn->which == DATA1N_tag) {
-                    size_t tlen = strlen(nn->u.tag.tag);
-                    if (tlen + flen > (sizeof(tag_path_full)-2)) return;
-                    memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
-                    flen += tlen;
-                    tag_path_full[flen++] = '/';
-                }
-                else if (nn->which == DATA1N_root)  break;
-            }
-
-            tag_path_full[flen] = 0;
+        xpdone = 0;
+        flen = 0;
              
-            /* If we have a matching termlist... */
-            if (n->root->u.root.absyn && (tl = xpath_termlist_by_tagpath(tag_path_full, n))) {
-                for (; tl; tl = tl->next) {
-                    wrd->reg_type = *tl->structure;
-                    /* this is the ! case, so structure is for the xpath index */
-                    if (!tl->att) {
-                        wrd->attrSet = VAL_IDXPATH;
-                        wrd->attrUse = use;
-                        (*p->tokenAdd)(wrd);
-                        xpdone = 1;
-                    } else {
-                        /* this is just the old fashioned attribute based index */
-                        wrd->attrSet = (int) (tl->att->parent->reference);
-                        wrd->attrUse = tl->att->locals->local;
-                        (*p->tokenAdd)(wrd);
-                    }
-                }
-            }
-            /* xpath indexing is done, if there was no termlist given, 
-               or no ! in the termlist, and default indexing is enabled... */
-            if ((!xpdone) && (!termlist_only)) {
-                wrd->attrSet = VAL_IDXPATH;
-                wrd->attrUse = use;
-                wrd->reg_type = 'w';
-                (*p->tokenAdd)(wrd);
-            }
+       /* we have to fetch the whole path to the data tag */
+       for (nn = n; nn; nn = nn->parent) {
+           if (nn->which == DATA1N_tag) {
+               size_t tlen = strlen(nn->u.tag.tag);
+               if (tlen + flen > (sizeof(tag_path_full)-2)) return;
+               memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
+               flen += tlen;
+               tag_path_full[flen++] = '/';
+           }
+           else if (nn->which == DATA1N_root)  break;
+       }
+       
+       tag_path_full[flen] = 0;
+       
+       /* If we have a matching termlist... */
+       if (n->root->u.root.absyn && 
+           (tl = xpath_termlist_by_tagpath(tag_path_full, n)))
+       {
+           for (; tl; tl = tl->next)
+           {
+               /* need to copy recword because it may be changed */
+               RecWord wrd_tl;
+               wrd->reg_type = *tl->structure;
+               /* this is the ! case, so structure is for the xpath index */
+               memcpy (&wrd_tl, wrd, sizeof(*wrd));
+               if (tl->source)
+                   sp_parse(n, &wrd_tl, tl->source);
+               if (!tl->att) {
+                   wrd_tl.attrSet = VAL_IDXPATH;
+                   wrd_tl.attrUse = use;
+                   if (p->flagShowRecords)
+                   {
+                       int i;
+                       printf("%*sXPath index", (level + 1) * 4, "");
+                       printf (" XData:\"");
+                       for (i = 0; i<wrd_tl.length && i < 40; i++)
+                           fputc (wrd_tl.string[i], stdout);
+                       fputc ('"', stdout);
+                       if (wrd_tl.length > 40)
+                           printf (" ...");
+                       fputc ('\n', stdout);
+                   }
+                   else
+                       (*p->tokenAdd)(&wrd_tl);
+                   xpdone = 1;
+               } else {
+                   /* this is just the old fashioned attribute based index */
+                   wrd_tl.attrSet = (int) (tl->att->parent->reference);
+                   wrd_tl.attrUse = tl->att->locals->local;
+                   if (p->flagShowRecords)
+                   {
+                       int i;
+                       printf("%*sIdx: [%s]", (level + 1) * 4, "",
+                              tl->structure);
+                       printf("%s:%s [%d] %s",
+                              tl->att->parent->name,
+                              tl->att->name, tl->att->value,
+                              tl->source);
+                       printf (" XData:\"");
+                       for (i = 0; i<wrd_tl.length && i < 40; i++)
+                           fputc (wrd_tl.string[i], stdout);
+                       fputc ('"', stdout);
+                       if (wrd_tl.length > 40)
+                           printf (" ...");
+                       fputc ('\n', stdout);
+                   }
+                   else
+                       (*p->tokenAdd)(&wrd_tl);
+               }
+           }
+       }
+       /* xpath indexing is done, if there was no termlist given, 
+          or no ! in the termlist, and default indexing is enabled... */
+       if (!p->flagShowRecords && !xpdone && !termlist_only)
+       {
+           wrd->attrSet = VAL_IDXPATH;
+           wrd->attrUse = use;
+           wrd->reg_type = 'w';
+           (*p->tokenAdd)(wrd);
         }
          break;
      case DATA1N_tag:
@@ -611,33 +686,11 @@ static void index_termlist (data1_node *par, data1_node *n,
      
      for (; tlist; tlist = tlist->next)
      {
-
-       char xattr[512];
         /* consider source */
         wrd->string = 0;
+       assert(tlist->source);
+       sp_parse(n, wrd, tlist->source);
  
-       if (!strcmp (tlist->source, "data") && n->which == DATA1N_data)
-       {
-           wrd->string = n->u.data.data;
-           wrd->length = n->u.data.len;
-       }
-       else if (!strcmp (tlist->source, "tag") && n->which == DATA1N_tag)
-        {
-           wrd->string = n->u.tag.tag;
-           wrd->length = strlen(n->u.tag.tag);
-       }
-       else if (sscanf (tlist->source, "attr(%511[^)])", xattr) == 1 &&
-           n->which == DATA1N_tag)
-       {
-           data1_xattr *p = n->u.tag.attributes;
-           while (p && strcmp (p->name, xattr))
-               p = p->next;
-           if (p)
-           {
-               wrd->string = p->value;
-               wrd->length = strlen(p->value);
-           }
-       }
         if (wrd->string)
         {
             if (p->flagShowRecords)
@@ -780,8 +833,9 @@ int grs_extract_tree(struct recExtractCtrl *p, data1_node *n)
      return dumpkeys(n, p, 0, &wrd);
  }
  
-static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
-                          NMEM mem)
+static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
+                          NMEM mem,
+                          data1_node *(*grs_read)(struct grs_read_info *))
  {
      data1_node *n;
      struct grs_read_info gri;
@@ -797,9 +851,9 @@ static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
      gri.offset = p->offset;
      gri.mem = mem;
      gri.dh = p->dh;
+    gri.clientData = clientData;
  
-    if (read_grs_type (h, &gri, p->subType, &n))
-       return RECCTRL_EXTRACT_ERROR_NO_SUCH_FILTER;
+    n = (*grs_read)(&gri);
      if (!n)
          return RECCTRL_EXTRACT_EOF;
      oe.proto = PROTO_Z3950;
@@ -833,13 +887,12 @@ static int grs_extract_sub(struct grs_handlers *h, struct recExtractCtrl *p,
      return RECCTRL_EXTRACT_OK;
  }
  
-static int grs_extract(void *clientData, struct recExtractCtrl *p)
+int zebra_grs_extract(void *clientData, struct recExtractCtrl *p,
+                     data1_node *(*grs_read)(struct grs_read_info *))
  {
      int ret;
      NMEM mem = nmem_create ();
-    struct grs_handlers *h = (struct grs_handlers *) clientData;
-
-    ret = grs_extract_sub(h, p, mem);
+    ret = grs_extract_sub(clientData, p, mem, grs_read);
      nmem_destroy(mem);
      return ret;
  }
@@ -958,7 +1011,7 @@ static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
          data1_mk_tag_data_int (p->dh, n, "score", p->score, mem);
      }
      data1_mk_text (p->dh, mem, i4, n);
-    data1_mk_tag_data_int (p->dh, n, "localnumber", p->localno, mem);
+    data1_mk_tag_data_zint (p->dh, n, "localnumber", p->localno, mem);
      if (p->fname)
      {
          data1_mk_text (p->dh, mem, i4, n);
@@ -967,7 +1020,8 @@ static void zebra_xml_metadata (struct recRetrieveCtrl *p, data1_node *top,
      data1_mk_text (p->dh, mem, i2, n);
  }
  
-static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
+int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
+                      data1_node *(*grs_read)(struct grs_read_info *))
  {
      data1_node *node = 0, *onode = 0, *top;
      data1_node *dnew;
@@ -976,7 +1030,7 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
      NMEM mem;
      struct grs_read_info gri;
      const char *tagname;
-    struct grs_handlers *h = (struct grs_handlers *) clientData;
+
      int requested_schema = VAL_NONE;
      data1_marctab *marctab;
      int dummy;
@@ -990,14 +1044,10 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
      gri.offset = 0;
      gri.mem = mem;
      gri.dh = p->dh;
+    gri.clientData = clientData;
  
      yaz_log(LOG_DEBUG, "grs_retrieve");
-    if (read_grs_type (h, &gri, p->subType, &node))
-    {
-       p->diagnostic = 14;
-        nmem_destroy (mem);
-       return 0;
-    }
+    node = (*grs_read)(&gri);
      if (!node)
      {
         p->diagnostic = 14;
@@ -1045,9 +1095,13 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
         dnew->u.data.what = DATA1I_text;
         dnew->u.data.data = dnew->lbuf;
          
-       sprintf(dnew->u.data.data, "%d", p->localno);
+       sprintf(dnew->u.data.data, ZINT_FORMAT, p->localno);
         dnew->u.data.len = strlen(dnew->u.data.data);
      }
+
+    if (p->input_format == VAL_TEXT_XML)
+       zebra_xml_metadata (p, top, mem);
+
  #if 0
      data1_pr_tree (p->dh, node, stdout);
  #endif
@@ -1179,7 +1233,6 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
                                 p->input_format : VAL_SUTRS))
      {
      case VAL_TEXT_XML:
-        zebra_xml_metadata (p, top, mem);
  
  #if 0
          data1_pr_tree (p->dh, node, stdout);
@@ -1279,13 +1332,3 @@ static int grs_retrieve(void *clientData, struct recRetrieveCtrl *p)
      return 0;
  }
  
-static struct recType grs_type =
-{
-    "grs",
-    grs_init,
-    grs_destroy,
-    grs_extract,
-    grs_retrieve
-};
-
-RecType recTypeGrs = &grs_type;