2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.11 1995-09-04 12:33:31 adam
8 * Various cleanup. YAZ util used instead.
10 * Revision 1.10 1994/10/05 12:16:48 adam
11 * Pagesize is a resource now.
13 * Revision 1.9 1994/09/16 15:39:13 adam
14 * Initial code of lookup - not tested yet.
16 * Revision 1.8 1994/09/16 12:35:01 adam
17 * New version of split_page which use clean_page for splitting.
19 * Revision 1.7 1994/09/12 08:06:42 adam
20 * Futher development of insert.c
22 * Revision 1.6 1994/09/06 13:05:15 adam
23 * Further development of insertion. Some special cases are
24 * not properly handled yet! assert(0) are put here. The
25 * binary search in each page definitely reduce usr CPU.
27 * Revision 1.5 1994/09/01 17:49:39 adam
28 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
30 * Revision 1.4 1994/09/01 17:44:09 adam
31 * depend include change.
33 * Revision 1.3 1994/08/18 12:40:56 adam
34 * Some development of dictionary. Not finished at all!
36 * Revision 1.2 1994/08/17 13:32:19 adam
37 * Use cache in dict - not in bfile.
39 * Revision 1.1 1994/08/16 16:26:48 adam
53 static int dict_ins (Dict dict, const Dict_char *str,
54 Dict_ptr back_ptr, int userlen, void *userinfo);
55 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
56 Dict_ptr subptr, char *userinfo);
59 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
62 Dict_ptr ptr = dict->head.free_list;
63 if (dict->head.free_list == dict->head.last)
65 dict->head.free_list++;
66 dict->head.last = dict->head.free_list;
67 dict_bf_newp (dict->dbf, ptr, &p);
71 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
72 dict->head.free_list = DICT_nextptr(p);
73 if (dict->head.free_list == 0)
74 dict->head.free_list = dict->head.last;
78 DICT_backptr(p) = back_ptr;
81 DICT_size(p) = DICT_infoffset;
87 static int split_page (Dict dict, Dict_ptr ptr, void *p)
93 short *indxp, *best_indxp = NULL;
94 Dict_char best_char = 0;
95 Dict_char prev_char = 0;
96 int best_no = -1, no_current = 1;
98 /* determine splitting char... */
99 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
100 for (i = DICT_nodir (p); --i >= 0; --indxp)
102 if (*indxp > 0) /* tail string here! */
106 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
108 { /* first entry met */
109 best_char = prev_char = dc;
112 else if (prev_char == dc)
113 { /* same char prefix. update */
114 if (++no_current > best_no)
115 { /* best entry so far */
116 best_no = no_current;
122 { /* new char prefix. restore */
128 if (best_no < 0) /* we didn't find any tail string entry at all! */
131 subptr = new_page (dict, ptr, &subp);
132 /* scan entries to see if there is a string with */
133 /* length 1. info_here indicates if such entry exist */
135 for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
142 info = (char*) p + *indxp; /* entry start */
143 assert (*info == best_char);
144 slen = dict_strlen(info);
150 info_here = info+(slen+1)*sizeof(Dict_char);
154 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
155 dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
156 dict_bf_readp (dict->dbf, ptr, &p);
159 /* now clean the page ... */
160 clean_page (dict, ptr, p, &best_char, subptr, info_here);
164 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
165 Dict_ptr subptr, char *userinfo)
167 char *np = xmalloc (dict->head.page_size);
169 short *indxp1, *indxp2;
172 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
173 indxp2 = (short*) ((char*) np+DICT_pagesize(dict));
174 info2 = (char*) np + DICT_infoffset;
175 for (i = DICT_nodir (p); --i >= 0; --indxp1)
177 if (*indxp1 > 0) /* tail string here! */
179 /* string (Dict_char *) DICT_EOS terminated */
180 /* unsigned char length of information */
181 /* char * information */
183 info1 = (char*) p + *indxp1;
184 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
188 *--indxp2 = -(info2 - np);
189 memcpy (info2, &subptr, sizeof(Dict_ptr));
190 info2 += sizeof(Dict_ptr);
191 memcpy (info2, out, sizeof(Dict_char));
192 info2 += sizeof(Dict_char);
195 memcpy (info2, userinfo, *userinfo+1);
196 info2 += *userinfo + 1;
204 *--indxp2 = info2 - np;
205 slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
206 memcpy (info2, info1, slen);
212 /* Dict_ptr subptr */
213 /* Dict_char sub char */
214 /* unsigned char length of information */
215 /* char * information */
217 assert (*indxp1 < 0);
218 *--indxp2 = -(info2 - np);
219 info1 = (char*) p - *indxp1;
220 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
221 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
222 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
225 memcpy (info2, info1, slen);
229 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
230 DICT_pagesize(dict)-DICT_infoffset);
231 DICT_size(p) = info2 - np;
235 dict_bf_touch (dict->dbf, ptr);
240 /* return 0 if new */
241 /* return 1 if before but change of info */
242 /* return 2 if same as before */
244 static int dict_ins (Dict dict, const Dict_char *str,
245 Dict_ptr back_ptr, int userlen, void *userinfo)
247 int hi, lo, mid, slen, cmp = 1;
248 Dict_ptr ptr = back_ptr;
254 ptr = new_page (dict, back_ptr, &p);
256 dict_bf_readp (dict->dbf, ptr, &p);
262 hi = DICT_nodir(p)-1;
263 indxp = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
269 /* string (Dict_char *) DICT_EOS terminated */
270 /* unsigned char length of information */
271 /* char * information */
272 info = (char*)p + indxp[-mid];
273 cmp = dict_strcmp((Dict_char*) info, str);
276 info += (dict_strlen(info)+1)*sizeof(Dict_char);
277 /* consider change of userinfo length... */
278 if (*info == userlen)
280 if (memcmp (info+1, userinfo, userlen))
282 dict_bf_touch (dict->dbf, ptr);
283 memcpy (info+1, userinfo, userlen);
288 else if (*info > userlen)
292 dict_bf_touch (dict->dbf, ptr);
293 memcpy (info+1, userinfo, userlen);
304 /* Dict_ptr subptr */
305 /* Dict_char sub char */
306 /* unsigned char length of information */
307 /* char * information */
308 info = (char*)p - indxp[-mid];
309 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
313 memcpy (&subptr, info, sizeof(Dict_ptr));
314 if (*++str == DICT_EOS)
318 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
321 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
324 dict_bf_touch (dict->dbf, ptr);
325 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
331 else if (xlen > userlen)
334 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
335 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
337 dict_bf_touch (dict->dbf, ptr);
340 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
342 DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short))
344 if (DICT_type(p) == 1)
346 clean_page (dict, ptr, p, NULL, 0, NULL);
347 return dict_ins (dict, str-1, ptr,
350 if (split_page (dict, ptr, p))
352 logf (LOG_FATAL, "Unable to split page %d\n", ptr);
355 return dict_ins (dict, str-1, ptr, userlen, userinfo);
359 info = (char*)p + DICT_size(p);
360 memcpy (info, &subptr, sizeof(subptr));
361 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
362 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
363 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
365 indxp[-mid] = -DICT_size(p);
366 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
369 dict_bf_touch (dict->dbf, ptr);
379 subptr = new_page (dict, ptr, NULL);
380 memcpy (info, &subptr, sizeof(subptr));
381 dict_bf_touch (dict->dbf, ptr);
383 return dict_ins (dict, str, subptr, userlen, userinfo);
393 if (lo>hi && cmp < 0)
395 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
396 if (DICT_size(p)+slen+userlen >=
397 DICT_pagesize(dict) - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
399 split_page (dict, ptr, p);
400 return dict_ins (dict, str, ptr, userlen, userinfo);
406 indxp1 = (short*)((char*) p + DICT_pagesize(dict)
407 - DICT_nodir(p)*sizeof(short));
408 for (; indxp1 != indxp; indxp1++)
409 indxp1[0] = indxp1[1];
411 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
412 for (i = DICT_nodir (p); --i >= 0; --indxp1)
416 info = (char*)p - *indxp1;
417 assert (info[sizeof(Dict_ptr)] > ' ');
424 info = (char*)p + DICT_size(p);
425 memcpy (info, str, slen);
428 memcpy (info, userinfo, userlen);
431 *indxp = DICT_size(p);
432 DICT_size(p) = info- (char*) p;
433 dict_bf_touch (dict->dbf, ptr);
439 int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
441 assert (dict->head.last > 0);
442 if (dict->head.last == 1)
443 return dict_ins (dict, str, 0, userlen, userinfo);
445 return dict_ins (dict, str, 1, userlen, userinfo);