1 /* $Id: insert.c,v 1.25 2005-01-15 19:38:21 adam Exp $
2 Copyright (C) 1995-2005
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
34 static int dict_ins (Dict dict, const Dict_char *str,
35 Dict_ptr back_ptr, int userlen, void *userinfo);
36 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
37 Dict_ptr subptr, char *userinfo);
40 static Dict_ptr new_page (Dict dict, Dict_ptr back_ptr, void **pp)
43 Dict_ptr ptr = dict->head.last;
44 if (!dict->head.freelist)
46 dict_bf_newp (dict->dbf, dict->head.last, &p, dict->head.page_size);
51 ptr = dict->head.freelist;
52 dict_bf_readp (dict->dbf, ptr, &p);
53 dict->head.freelist = DICT_backptr(p);
57 DICT_backptr(p) = back_ptr;
59 DICT_size(p) = DICT_infoffset;
60 DICT_bsize(p) = dict->head.page_size;
66 static int split_page (Dict dict, Dict_ptr ptr, void *p)
72 short *indxp, *best_indxp = NULL;
73 Dict_char best_char = 0;
74 Dict_char prev_char = 0;
75 int best_no = -1, no_current = 1;
77 /* determine splitting char... */
78 indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
79 for (i = DICT_nodir (p); --i >= 0; --indxp)
81 if (*indxp > 0) /* tail string here! */
85 memcpy (&dc, (char*) p + *indxp, sizeof(dc));
87 { /* first entry met */
88 best_char = prev_char = dc;
92 else if (prev_char == dc)
93 { /* same char prefix. update */
94 if (++no_current > best_no)
95 { /* best entry so far */
102 { /* new char prefix. restore */
108 if (best_no < 0) /* we didn't find any tail string entry at all! */
111 j = best_indxp - (short*) p;
112 subptr = new_page (dict, ptr, &subp);
113 /* scan entries to see if there is a string with */
114 /* length 1. info_here indicates if such entry exist */
116 for (i=0; i<best_no; i++, j++)
122 info = (char*) p + ((short*) p)[j];
124 memcpy (&dc, info, sizeof(dc));
125 assert (dc == best_char);
126 slen = 1+dict_strlen((Dict_char*) info);
132 info_here = info+slen*sizeof(Dict_char);
136 info1 = info+slen*sizeof(Dict_char); /* info start */
137 dict_ins (dict, (Dict_char*) (info+sizeof(Dict_char)),
138 subptr, *info1, info1+1);
139 dict_bf_readp (dict->dbf, ptr, &p);
142 /* now clean the page ... */
143 clean_page (dict, ptr, p, &best_char, subptr, info_here);
147 static void clean_page (Dict dict, Dict_ptr ptr, void *p, Dict_char *out,
148 Dict_ptr subptr, char *userinfo)
150 char *np = (char *) xmalloc (dict->head.page_size);
152 short *indxp1, *indxp2;
155 DICT_bsize(np) = dict->head.page_size;
156 indxp1 = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
157 indxp2 = (short*) ((char*) np+DICT_bsize(np));
158 info2 = (char*) np + DICT_infoffset;
159 for (i = DICT_nodir (p); --i >= 0; --indxp1)
161 if (*indxp1 > 0) /* tail string here! */
163 /* string (Dict_char *) DICT_EOS terminated */
164 /* unsigned char length of information */
165 /* char * information */
167 info1 = (char*) p + *indxp1;
168 if (out && memcmp (out, info1, sizeof(Dict_char)) == 0)
172 *--indxp2 = -(info2 - np);
173 memcpy (info2, &subptr, sizeof(Dict_ptr));
174 info2 += sizeof(Dict_ptr);
175 memcpy (info2, out, sizeof(Dict_char));
176 info2 += sizeof(Dict_char);
179 memcpy (info2, userinfo, *userinfo+1);
180 info2 += *userinfo + 1;
188 *--indxp2 = info2 - np;
189 slen = (dict_strlen((Dict_char*) info1)+1)*sizeof(Dict_char);
190 memcpy (info2, info1, slen);
196 /* Dict_ptr subptr */
197 /* Dict_char sub char */
198 /* unsigned char length of information */
199 /* char * information */
201 assert (*indxp1 < 0);
202 *--indxp2 = -(info2 - np);
203 info1 = (char*) p - *indxp1;
204 memcpy (info2, info1, sizeof(Dict_ptr)+sizeof(Dict_char));
205 info1 += sizeof(Dict_ptr)+sizeof(Dict_char);
206 info2 += sizeof(Dict_ptr)+sizeof(Dict_char);
209 memcpy (info2, info1, slen);
214 memcpy ((char*)p+DICT_infoffset,
215 (char*)np+DICT_infoffset,
216 info2 - ((char*)np+DICT_infoffset));
217 memcpy ((char*)p + ((char*)indxp2 - (char*)np),
219 ((char*) np+DICT_bsize(p)) - (char*)indxp2);
221 memcpy ((char*)p+DICT_infoffset, (char*)np+DICT_infoffset,
222 DICT_pagesize(dict)-DICT_infoffset);
224 DICT_size(p) = info2 - np;
228 dict_bf_touch (dict->dbf, ptr);
233 /* return 0 if new */
234 /* return 1 if before but change of info */
235 /* return 2 if same as before */
237 static int dict_ins (Dict dict, const Dict_char *str,
238 Dict_ptr ptr, int userlen, void *userinfo)
240 int hi, lo, mid, slen, cmp = 1;
245 dict_bf_readp (dict->dbf, ptr, &p);
251 hi = DICT_nodir(p)-1;
252 indxp = (short*) ((char*) p+DICT_bsize(p)-sizeof(short));
258 /* string (Dict_char *) DICT_EOS terminated */
259 /* unsigned char length of information */
260 /* char * information */
261 info = (char*)p + indxp[-mid];
262 cmp = dict_strcmp((Dict_char*) info, str);
265 info += (dict_strlen((Dict_char*) info)+1)*sizeof(Dict_char);
266 /* consider change of userinfo length... */
267 if (*info == userlen)
269 /* change of userinfo ? */
270 if (memcmp (info+1, userinfo, userlen))
272 dict_bf_touch (dict->dbf, ptr);
273 memcpy (info+1, userinfo, userlen);
279 else if (*info > userlen)
281 /* room for new userinfo */
284 dict_bf_touch (dict->dbf, ptr);
285 memcpy (info+1, userinfo, userlen);
296 /* Dict_ptr subptr */
297 /* Dict_char sub char */
298 /* unsigned char length of information */
299 /* char * information */
300 info = (char*)p - indxp[-mid];
301 memcpy (&dc, info+sizeof(Dict_ptr), sizeof(Dict_char));
305 memcpy (&subptr, info, sizeof(Dict_ptr));
306 if (*++str == DICT_EOS)
308 /* finish of string. Store userinfo here... */
310 int xlen = info[sizeof(Dict_ptr)+sizeof(Dict_char)];
313 if (memcmp (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
316 dict_bf_touch (dict->dbf, ptr);
317 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
323 else if (xlen > userlen)
326 info[sizeof(Dict_ptr)+sizeof(Dict_char)] = userlen;
327 memcpy (info+sizeof(Dict_ptr)+sizeof(Dict_char)+1,
329 dict_bf_touch (dict->dbf, ptr);
332 /* xlen < userlen, expanding needed ... */
333 if (DICT_size(p)+sizeof(Dict_char)+sizeof(Dict_ptr)+
335 DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short))
337 /* not enough room - split needed ... */
338 if (DICT_type(p) == 1)
340 clean_page (dict, ptr, p, NULL, 0, NULL);
341 return dict_ins (dict, str-1, ptr,
344 if (split_page (dict, ptr, p))
346 yaz_log (YLOG_FATAL, "Unable to split page %d\n", ptr);
349 return dict_ins (dict, str-1, ptr, userlen, userinfo);
352 { /* enough room - no split needed ... */
353 info = (char*)p + DICT_size(p);
354 memcpy (info, &subptr, sizeof(subptr));
355 memcpy (info+sizeof(Dict_ptr), &dc, sizeof(Dict_char));
356 info[sizeof(Dict_char)+sizeof(Dict_ptr)] = userlen;
357 memcpy (info+sizeof(Dict_char)+sizeof(Dict_ptr)+1,
359 indxp[-mid] = -DICT_size(p);
360 DICT_size(p) += sizeof(Dict_char)+sizeof(Dict_ptr)
363 dict_bf_touch (dict->dbf, ptr);
373 subptr = new_page (dict, ptr, NULL);
374 memcpy (info, &subptr, sizeof(subptr));
375 dict_bf_touch (dict->dbf, ptr);
377 return dict_ins (dict, str, subptr, userlen, userinfo);
387 if (lo>hi && cmp < 0)
389 slen = (dict_strlen(str)+1)*sizeof(Dict_char);
390 if (DICT_size(p)+slen+userlen >=
391 (int)(DICT_bsize(p) - (1+DICT_nodir(p))*sizeof(short)))/* overflow? */
395 clean_page (dict, ptr, p, NULL, 0, NULL);
396 return dict_ins (dict, str, ptr, userlen, userinfo);
398 split_page (dict, ptr, p);
399 return dict_ins (dict, str, ptr, userlen, userinfo);
405 indxp1 = (short*)((char*) p + DICT_bsize(p)
406 - DICT_nodir(p)*sizeof(short));
407 for (; indxp1 != indxp; indxp1++)
408 indxp1[0] = indxp1[1];
410 indxp1 = (short*) ((char*) p+DICT_pagesize(dict)-sizeof(short));
411 for (i = DICT_nodir (p); --i >= 0; --indxp1)
415 info = (char*)p - *indxp1;
416 assert (info[sizeof(Dict_ptr)] > ' ');
423 info = (char*)p + DICT_size(p);
424 memcpy (info, str, slen);
427 memcpy (info, userinfo, userlen);
430 *indxp = DICT_size(p);
431 DICT_size(p) = info- (char*) p;
432 dict_bf_touch (dict->dbf, ptr);
438 int dict_insert (Dict dict, const char *str, int userlen, void *userinfo)
440 if (!dict->head.root)
444 dict->head.root = new_page (dict, 0, &p);
445 if (!dict->head.root)
448 return dict_ins (dict, (const Dict_char *) str, dict->head.root,