/*
- * Copyright (C) 1994-1996, Index Data I/S
+ * Copyright (C) 1994-1998, Index Data I/S
* All rights reserved.
* Sebastian Hammer, Adam Dickmeiss
*
* $Log: invstat.c,v $
- * Revision 1.2 1996-05-22 08:25:56 adam
+ * Revision 1.7 1998-03-13 15:30:50 adam
+ * New functions isc_block_used and isc_block_size. Fixed 'leak'
+ * in isc_alloc_block.
+ *
+ * Revision 1.6 1998/03/06 13:54:02 adam
+ * Fixed two nasty bugs in isc_merge.
+ *
+ * Revision 1.5 1997/09/17 12:19:13 adam
+ * Zebra version corresponds to YAZ version 1.4.
+ * Changed Zebra server so that it doesn't depend on global common_resource.
+ *
+ * Revision 1.4 1996/11/08 11:10:21 adam
+ * Buffers used during file match got bigger.
+ * Compressed ISAM support everywhere.
+ * Bug fixes regarding masking characters in queries.
+ * Redesigned Regexp-2 queries.
+ *
+ * Revision 1.3 1996/06/04 10:18:58 adam
+ * Minor changes - removed include of ctype.h.
+ *
+ * Revision 1.2 1996/05/22 08:25:56 adam
* Minor change.
*
* Revision 1.1 1996/05/14 14:04:34 adam
#include <stdio.h>
#include <assert.h>
#include <string.h>
-#include <ctype.h>
#include "index.h"
#include "recindex.h"
struct inv_stat_info {
ISAM isam;
+ ISAMC isamc;
+ int no_isam_entries[8];
int no_dict_entries;
int no_dict_bytes;
int isam_bounds[20];
static int inv_stat_handle (char *name, const char *info, int pos,
void *client)
{
- int occur;
+ int occur = 0;
int i = 0;
struct inv_stat_info *stat_info = (struct inv_stat_info*) client;
- ISPT ispt;
ISAM_P isam_p;
stat_info->no_dict_entries++;
assert (*info == sizeof(ISAM_P));
memcpy (&isam_p, info+1, sizeof(ISAM_P));
- ispt = is_position (stat_info->isam, isam_p);
-
- occur = is_numkeys (ispt);
+ if (stat_info->isam)
+ {
+ ISPT ispt;
- is_pt_free (ispt);
+ ispt = is_position (stat_info->isam, isam_p);
+ occur = is_numkeys (ispt);
+ is_pt_free (ispt);
+ }
+ if (stat_info->isamc)
+ {
+ ISAMC_PP pp;
+ int occurx = 0;
+ char buf[128];
+
+ pp = isc_pp_open (stat_info->isamc, isam_p);
+ occur = isc_pp_num (pp);
+ while (isc_pp_read(pp, buf))
+ occurx++;
+ assert (occurx == occur);
+ stat_info->no_isam_entries[isc_type(isam_p)] += occur;
+ isc_pp_close (pp);
+ }
while (occur > stat_info->isam_bounds[i] && stat_info->isam_bounds[i])
i++;
++(stat_info->isam_occurrences[i]);
-
return 0;
}
-void inv_prstat (const char *dict_fname, const char *isam_fname)
+void inv_prstat (BFiles bfs)
{
Dict dict;
- ISAM isam;
+ ISAM isam = NULL;
+ ISAMC isamc = NULL;
Records records;
int i, prev;
int before = 0;
term_dict[0] = 1;
term_dict[1] = 0;
- dict = dict_open (dict_fname, 100, 0);
+ dict = dict_open (bfs, FNAME_DICT, 100, 0);
if (!dict)
{
- logf (LOG_FATAL, "dict_open fail of `%s'", dict_fname);
+ logf (LOG_FATAL, "dict_open fail");
exit (1);
}
- isam = is_open (isam_fname, key_compare, 0, sizeof(struct it_key));
- if (!isam)
+ if (!res_get_match (common_resource, "isam", "i", NULL))
{
- logf (LOG_FATAL, "is_open fail of `%s'", isam_fname);
- exit (1);
+ isamc = isc_open (bfs, FNAME_ISAMC, 0, key_isamc_m (common_resource));
+ if (!isamc)
+ {
+ logf (LOG_FATAL, "isc_open fail");
+ exit (1);
+ }
}
- records = rec_open (0);
+ else
+ {
+ isam = is_open (bfs, FNAME_ISAM, key_compare, 0,
+ sizeof(struct it_key), common_resource);
+ if (!isam)
+ {
+ logf (LOG_FATAL, "is_open fail");
+ exit (1);
+ }
+ }
+ records = rec_open (bfs, 0);
+ for (i = 0; i<8; i++)
+ stat_info.no_isam_entries[i] = 0;
stat_info.no_dict_entries = 0;
stat_info.no_dict_bytes = 0;
stat_info.isam = isam;
+ stat_info.isamc = isamc;
stat_info.isam_bounds[0] = 1;
stat_info.isam_bounds[1] = 2;
stat_info.isam_bounds[2] = 3;
- stat_info.isam_bounds[3] = 5;
+ stat_info.isam_bounds[3] = 6;
stat_info.isam_bounds[4] = 10;
stat_info.isam_bounds[5] = 20;
stat_info.isam_bounds[6] = 30;
dict_scan (dict, term_dict, &before, &after, &stat_info, inv_stat_handle);
- rec_close (&records);
- dict_close (dict);
- is_close (isam);
-
- fprintf (stderr, "%d dictionary entries. %d bytes for strings\n",
+ if (isamc)
+ {
+ fprintf (stderr, " Blocks Occur Size KB Bytes/Entry\n");
+ for (i = 0; isc_block_used (isamc, i) >= 0; i++)
+ {
+ fprintf (stderr, " %8d %8d", isc_block_used (isamc, i),
+ stat_info.no_isam_entries[i]);
+
+ if (stat_info.no_isam_entries[i])
+ fprintf (stderr, " %8d %f",
+ (int) ((1023.0 + (double) isc_block_used(isamc, i) *
+ isc_block_size(isamc,i))/1024),
+ ((double) isc_block_used(isamc, i) *
+ isc_block_size(isamc,i))/
+ stat_info.no_isam_entries[i]);
+ fprintf (stderr, "\n");
+ }
+ }
+
+ fprintf (stderr, "\n%d words using %d bytes\n",
stat_info.no_dict_entries, stat_info.no_dict_bytes);
- fprintf (stderr, " size occurrences\n");
+ fprintf (stderr, " Occurrences Words\n");
prev = 1;
for (i = 0; stat_info.isam_bounds[i]; i++)
{
}
fprintf (stderr, "%7d- %7d\n",
prev, stat_info.isam_occurrences[i]);
+
+ rec_close (&records);
+ dict_close (dict);
+
+ if (isam)
+ is_close (isam);
+ if (isamc)
+ isc_close (isamc);
+
}