-/* $Id: rectext.c,v 1.23 2005-01-15 19:38:32 adam Exp $
+/* $Id: rectext.c,v 1.29 2005-06-23 06:45:47 adam Exp $
Copyright (C) 1995-2005
Index Data ApS
#include <assert.h>
#include <ctype.h>
-#include <zebrautl.h>
+#include <idzebra/util.h>
#include <idzebra/recctrl.h>
-struct text_info {
+struct filter_info {
char *sep;
};
-static void *text_init (Res res, RecType recType)
+static void *filter_init (Res res, RecType recType)
{
- struct text_info *tinfo = (struct text_info *) xmalloc(sizeof(*tinfo));
+ struct filter_info *tinfo = (struct filter_info *) xmalloc(sizeof(*tinfo));
tinfo->sep = 0;
return tinfo;
}
-static void text_config(void *clientData, Res res, const char *args)
+static void filter_config(void *clientData, Res res, const char *args)
{
-
+ struct filter_info *tinfo = (struct filter_info*) clientData;
+ xfree(tinfo->sep);
+ tinfo->sep = 0;
+ if (args && *args)
+ tinfo->sep = xstrdup(args);
}
-static void text_destroy (void *clientData)
+static void filter_destroy (void *clientData)
{
- struct text_info *tinfo = clientData;
+ struct filter_info *tinfo = clientData;
xfree (tinfo->sep);
xfree (tinfo);
}
int max;
};
-struct buf_info *buf_open (struct recExtractCtrl *p)
+static struct buf_info *buf_open (struct recExtractCtrl *p)
{
struct buf_info *fi = (struct buf_info *) xmalloc (sizeof(*fi));
return fi;
}
-int buf_read (struct text_info *tinfo, struct buf_info *fi, char *dst)
+static int buf_getchar (struct filter_info *tinfo, struct buf_info *fi, char *dst)
{
if (fi->offset >= fi->max)
{
return 1;
}
-void buf_close (struct buf_info *fi)
+static void buf_close (struct buf_info *fi)
{
xfree (fi->buf);
xfree (fi);
}
-static int text_extract (void *clientData, struct recExtractCtrl *p)
+static int filter_extract (void *clientData, struct recExtractCtrl *p)
{
- struct text_info *tinfo = clientData;
+ struct filter_info *tinfo = clientData;
char w[512];
RecWord recWord;
int r;
struct buf_info *fi = buf_open (p);
+ int no_read = 0;
#if 0
- yaz_log(YLOG_LOG, "text_extract off=%ld",
+ yaz_log(YLOG_LOG, "filter_extract off=%ld",
(long) (*fi->p->tellf)(fi->p->fh));
#endif
- xfree(tinfo->sep);
- tinfo->sep = 0;
(*p->init)(p, &recWord);
- recWord.reg_type = 'w';
do
{
int i = 0;
- r = buf_read (tinfo, fi, w);
+ r = buf_getchar (tinfo, fi, w);
while (r > 0 && i < 511 && w[i] != '\n' && w[i] != '\r')
{
i++;
- r = buf_read (tinfo, fi, w + i);
+ r = buf_getchar (tinfo, fi, w + i);
}
if (i)
{
- recWord.string = w;
- recWord.length = i;
+ no_read += i;
+ recWord.term_buf = w;
+ recWord.term_len = i;
(*p->tokenAdd)(&recWord);
}
} while (r > 0);
buf_close (fi);
+ if (no_read == 0)
+ return RECCTRL_EXTRACT_EOF;
return RECCTRL_EXTRACT_OK;
}
-static int text_retrieve (void *clientData, struct recRetrieveCtrl *p)
+static int filter_retrieve (void *clientData, struct recRetrieveCtrl *p)
{
- int r, text_ptr = 0;
- static char *text_buf = NULL;
- static int text_size = 0;
+ int r, filter_ptr = 0;
+ static char *filter_buf = NULL;
+ static int filter_size = 0;
int make_header = 1;
int make_body = 1;
const char *elementSetName = NULL;
}
while (1)
{
- if (text_ptr + 4096 >= text_size)
+ if (filter_ptr + 4096 >= filter_size)
{
char *nb;
- text_size = 2*text_size + 8192;
- nb = (char *) xmalloc (text_size);
- if (text_buf)
+ filter_size = 2*filter_size + 8192;
+ nb = (char *) xmalloc (filter_size);
+ if (filter_buf)
{
- memcpy (nb, text_buf, text_ptr);
- xfree (text_buf);
+ memcpy (nb, filter_buf, filter_ptr);
+ xfree (filter_buf);
}
- text_buf = nb;
+ filter_buf = nb;
}
- if (make_header && text_ptr == 0)
+ if (make_header && filter_ptr == 0)
{
if (p->score >= 0)
{
- sprintf (text_buf, "Rank: %d\n", p->score);
- text_ptr = strlen(text_buf);
+ sprintf (filter_buf, "Rank: %d\n", p->score);
+ filter_ptr = strlen(filter_buf);
}
- sprintf (text_buf + text_ptr, "Local Number: " ZINT_FORMAT "\n",
+ sprintf (filter_buf + filter_ptr, "Local Number: " ZINT_FORMAT "\n",
p->localno);
- text_ptr = strlen(text_buf);
+ filter_ptr = strlen(filter_buf);
if (p->fname)
{
- sprintf (text_buf + text_ptr, "Filename: %s\n", p->fname);
- text_ptr = strlen(text_buf);
+ sprintf (filter_buf + filter_ptr, "Filename: %s\n", p->fname);
+ filter_ptr = strlen(filter_buf);
}
- strcpy(text_buf+text_ptr++, "\n");
+ strcpy(filter_buf+filter_ptr++, "\n");
}
if (!make_body)
break;
- r = (*p->readf)(p->fh, text_buf + text_ptr, 4096);
+ r = (*p->readf)(p->fh, filter_buf + filter_ptr, 4096);
if (r <= 0)
break;
- text_ptr += r;
+ filter_ptr += r;
}
- text_buf[text_ptr] = '\0';
+ filter_buf[filter_ptr] = '\0';
if (elementSetName)
{
if (!strcmp (elementSetName, "B"))
}
if (no_lines)
{
- char *p = text_buf;
+ char *p = filter_buf;
int i = 0;
while (++i <= no_lines && (p = strchr (p, '\n')))
if (p)
{
p[1] = '\0';
- text_ptr = p-text_buf;
+ filter_ptr = p-filter_buf;
}
}
p->output_format = VAL_SUTRS;
- p->rec_buf = text_buf;
- p->rec_len = text_ptr;
+ p->rec_buf = filter_buf;
+ p->rec_len = filter_ptr;
return 0;
}
-static struct recType text_type = {
+static struct recType filter_type = {
+ 0,
"text",
- text_init,
- text_config,
- text_destroy,
- text_extract,
- text_retrieve
+ filter_init,
+ filter_config,
+ filter_destroy,
+ filter_extract,
+ filter_retrieve
};
RecType
#endif
[] = {
- &text_type,
+ &filter_type,
0,
};