2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.8 1994-09-16 12:35:01 adam
8 * New version of split_page which use clean_page for splitting.
10 * Revision 1.7 1994/09/12 08:06:42 adam
11 * Futher development of insert.c
13 * Revision 1.6 1994/09/06 13:05:15 adam
14 * Further development of insertion. Some special cases are
15 * not properly handled yet! assert(0) are put here. The
16 * binary search in each page definitely reduce usr CPU.
18 * Revision 1.5 1994/09/01 17:49:39 adam
19 * Removed stupid line. Work on insertion in dictionary. Not finished yet.
21 * Revision 1.4 1994/09/01 17:44:09 adam
22 * depend include change.
24 * Revision 1.3 1994/08/18 12:40:56 adam
25 * Some development of dictionary. Not finished at all!
27 * Revision 1.2 1994/08/17 13:32:19 adam
28 * Use cache in dict - not in bfile.
30 * Revision 1.1 1994/08/16 16:26:48 adam
44 static int dict_ins (Dict dict, const Dict_char *str,
45 Dict_ptr back_ptr, int userlen, void *userinfo);
46 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
47 Dict_ptr subptr, char *userinfo);
50 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
53 Dict_ptr ptr = dict->head.free_list;
54 if (dict->head.free_list == dict->head.last)
56 dict->head.free_list++;
57 dict->head.last = dict->head.free_list;
58 dict_bf_newp (dict->dbf, ptr, &p);
62 dict_bf_readp (dict->dbf, dict->head.free_list, &p);
63 dict->head.free_list = DICT_nextptr(p);
64 if (dict->head.free_list == 0)
65 dict->head.free_list = dict->head.last;
69 DICT_backptr(p) = back_ptr;
72 DICT_size(p) = DICT_infoffset;
78 static int split_page (Dict dict, Dict_ptr ptr, void *p)
84 short *indxp, *best_indxp;
85 Dict_char best_char = 0;
86 Dict_char prev_char = 0;
87 int best_no = -1, no_current = 1;
89 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
90 for (i = DICT_nodir (p); --i >= 0; --indxp)
92 if (*indxp > 0) /* tail string here! */
96 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
98 { /* first entry met */
99 best_char = prev_char = dc;
102 else if (prev_char == dc)
103 { /* same char prefix. update */
104 if (++no_current > best_no)
105 { /* best entry so far */
106 best_no = no_current;
112 { /* new char prefix. restore */
118 if (best_no < 0) /* we didn't find any tail string entry at all! */
121 subptr = new_page (dict, ptr, &subp);
122 /* scan entries to see if there is a string with */
123 /* length 1. info_here indicates if such entry exist */
125 for (indxp=best_indxp, i=0; i<best_no; i++, indxp++)
132 info = (char*) p + *indxp; /* entry start */
133 assert (*info == best_char);
134 slen = dict_strlen(info);
140 info_here = info+(slen+1)*sizeof(Dict_char);
143 /* calculate the amount of bytes needed for this entry when */
144 /* transformed to a sub entry */
145 need = sizeof(Dict_char)+sizeof(Dict_ptr)+1;
150 /* now loop on all entries with string length > 1 i.e. all */
151 /* those entries which contribute to a sub page */
153 for (i=0; i<best_no; i++, indxp++)
160 info = (char*) p + *indxp; /* entry start */
161 assert (*info == best_char);
162 slen = dict_strlen(info);
166 info1 = info+(1+slen)*sizeof(Dict_char); /* info start */
168 if (need <= (1+slen)*sizeof(Dict_char) + 1 + *info1)
169 best_indxp = indxp; /* space for entry */
170 dict_ins (dict, info+sizeof(Dict_char), subptr, *info1, info1+1);
171 dict_bf_readp (dict->dbf, ptr, &p);
175 /* now clean the page ... */
176 clean_page (dict, ptr, p, &best_char, subptr, info_here);
180 { /* there was a hole big enough for a sub entry */
181 char *info = (char*) p + *best_indxp;
184 *--indxp = - *best_indxp;
186 DICT_nodir (p) -= (best_no-1);
187 indxp1 = (short*)((char*)p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
188 while (indxp != indxp1)
191 *indxp = indxp[1-best_no];
193 memcpy (info, &subptr, sizeof(Dict_ptr)); /* store subptr */
194 info += sizeof(Dict_ptr);
195 memcpy (info, &best_char, sizeof(Dict_char)); /* store sub char */
196 info += sizeof(Dict_char);
198 memcpy (info, info_here, *info_here+1); /* with information */
200 *info = 0; /* without info */
204 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
205 for (i = DICT_nodir (p); --i >= 0; --indxp)
207 if (*indxp > 0) /* tail string here! */
211 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
212 assert (dc != best_char);
213 assert (dc >= prev_char);
219 memcpy (&dc, (char*)p - *indxp+sizeof(Dict_ptr),
221 assert (dc > prev_char);
224 assert (best_indxp == NULL);
235 short *indxp1, *indxp2;
238 DICT_nodir(p) -= best_no;
240 indxp1 = (short*)((char*) p+DICT_PAGESIZE-DICT_nodir(p)*sizeof(short));
244 indxp2[0] = indxp2[-best_no];
245 } while (indxp2 != indxp1);
250 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
251 Dict_ptr subptr, char *userinfo)
253 char *np = xmalloc (dict->head.page_size);
255 short *indxp1, *indxp2;
258 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
259 indxp2 = (short*) ((char*) np+DICT_PAGESIZE);
260 info2 = (char*) np + DICT_infoffset;
261 for (i = DICT_nodir (p); --i >= 0; --indxp1)
263 if (*indxp1 > 0) /* tail string here! */
265 /* string (Dict_char *) DICT_EOS terminated */
266 /* unsigned char length of information */
267 /* char * information */
269 info1 = (char*) p + *indxp1;
270 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
274 *--indxp2 = -(info2 - np);
275 memcpy (info2, &subptr, sizeof(Dict_ptr));
276 info2 += sizeof(Dict_ptr);
277 memcpy (info2, out, sizeof(Dict_char));
278 info2 += sizeof(Dict_char);
281 memcpy (info2, userinfo, *userinfo+1);
282 info2 += *userinfo + 1;
290 *--indxp2 = info2 - np;
291 slen = (dict_strlen(info1)+1)*sizeof(Dict_char);
292 memcpy (info2, info1, slen);
298 /* Dict_ptr subptr */
299 /* Dict_char sub char */
300 /* unsigned char length of information */
301 /* char * information */
303 assert (*indxp1 < 0);
304 *--indxp2 = -(info2 - np);
305 info1 = (char*) p - *indxp1;
306 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
307 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
308 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
311 memcpy (info2, info1, slen);
315 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
316 DICT_PAGESIZE-DICT_infoffset);
317 DICT_size(p) = info2 - np;
325 /* return 0 if new */
326 /* return 1 if before but change of info */
327 /* return 2 if same as before */
329 static int dict_ins (Dict dict, const Dict_char *str,
330 Dict_ptr back_ptr, int userlen, void *userinfo)
332 int hi, lo, mid, i, slen, cmp = 1;
333 Dict_ptr ptr = back_ptr;
339 ptr = new_page (dict, back_ptr, &p);
341 dict_bf_readp (dict->dbf, ptr, &p);
347 hi = DICT_nodir(p)-1;
348 indxp = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
354 /* string (Dict_char *) DICT_EOS terminated */
355 /* unsigned char length of information */
356 /* char * information */
357 info = (char*)p + indxp[-mid];
358 cmp = dict_strcmp((Dict_char*) info, str);
361 info += (dict_strlen(info)+1)*sizeof(Dict_char);
362 /* consider change of userinfo length... */
363 if (*info == userlen)
365 if (memcmp (info+1, userinfo, userlen))
367 dict_bf_touch (dict->dbf, ptr);
368 memcpy (info+1, userinfo, userlen);
373 else if (*info > userlen)
377 dict_bf_touch (dict->dbf, ptr);
378 memcpy (info+1, userinfo, userlen);
389 /* Dict_ptr subptr */
390 /* Dict_char sub char */
391 /* unsigned char length of information */
392 /* char * information */
393 info = (char*)p - indxp[-mid];
394 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
398 memcpy (&subptr, info, sizeof(Dict_ptr));
399 if (*++str == DICT_EOS)
403 xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
406 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
409 dict_bf_touch (dict->dbf, ptr);
410 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
416 else if (xlen > userlen)
419 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
420 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
422 dict_bf_touch (dict->dbf, ptr);
425 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
427 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
429 if (DICT_type(p) == 1)
431 clean_page (dict, ptr, p, NULL, 0, NULL);
432 dict_bf_touch (dict->dbf, ptr);
433 return dict_ins (dict, str-1, ptr,
436 if (split_page (dict, ptr, p))
438 log (LOG_FATAL, "Unable to split page %d\n", ptr);
441 return dict_ins (dict, str-1, ptr, userlen, userinfo);
445 info = (char*)p + DICT_size(p);
446 memcpy (info, &subptr, sizeof(subptr));
447 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
448 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
449 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
451 indxp[-mid] = -DICT_size(p);
452 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
455 dict_bf_touch (dict->dbf, ptr);
465 subptr = new_page (dict, ptr, NULL);
466 memcpy (info, &subptr, sizeof(subptr));
467 dict_bf_touch (dict->dbf, ptr);
469 return dict_ins (dict, str, subptr, userlen, userinfo);
479 if (lo>hi && cmp < 0)
481 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
482 if (DICT_size(p)+slen+userlen >=
483 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short)) /* overflow? */
485 if (DICT_type(p) == 1)
487 clean_page (dict, ptr, p, NULL, 0, NULL);
488 dict_bf_touch (dict->dbf, ptr);
489 return dict_ins (dict, str, ptr, userlen, userinfo);
495 if (split_page (dict, ptr, p))
497 log (LOG_FATAL, "Unable to split page %d\n", ptr);
500 if (DICT_size(p)+slen+userlen <
501 DICT_PAGESIZE - (1+DICT_nodir(p))*sizeof(short))
504 clean_page (dict, ptr, p, NULL, 0, NULL);
505 } while (DICT_size(p)+slen+userlen > DICT_PAGESIZE -
506 (1+DICT_nodir(p))*sizeof(short));
507 return dict_ins (dict, str, ptr, userlen, userinfo);
513 indxp1 = (short*)((char*) p + DICT_PAGESIZE
514 - DICT_nodir(p)*sizeof(short));
515 for (; indxp1 != indxp; indxp1++)
516 indxp1[0] = indxp1[1];
518 indxp1 = (short*) ((char*) p+DICT_PAGESIZE-sizeof(short));
519 for (i = DICT_nodir (p); --i >= 0; --indxp1)
523 info = (char*)p - *indxp1;
524 assert (info[sizeof(Dict_ptr)] > ' ');
531 info = (char*)p + DICT_size(p);
532 memcpy (info, str, slen);
535 memcpy (info, userinfo, userlen);
538 *indxp = DICT_size(p);
539 DICT_size(p) = info- (char*) p;
540 dict_bf_touch (dict->dbf, ptr);
546 int dict_insert (Dict dict, const Dict_char *str, int userlen, void *userinfo)
548 assert (dict->head.last > 0);
549 if (dict->head.last == 1)
550 return dict_ins (dict, str, 0, userlen, userinfo);
552 return dict_ins (dict, str, 1, userlen, userinfo);