+ICU is used for tokenization and normalization of the following: mergekey,
+sorting, relevance terms.
+
+Debian package now enables ICU tokenization and normalization by default.
+
--- 1.0.2 2007/08/22
Exposed user setting values (i.e. non-pz: names) to the record systems in two
+pazpar2 (1.0.2-9) unstable; urgency=low
+
+ * ICU enabled by default for the Debian package.
+ * ICU for sorting.
+
+ -- Adam Dickmeiss <adam@indexdata.dk> Mon, 10 Sep 2007 10:44:03 +0200
+
pazpar2 (1.0.2-8) unstable; urgency=low
* Bug fixes #1395, #1507.
<!ENTITY % idcommon SYSTEM "common/common.ent">
%idcommon;
]>
-<!-- $Id: pazpar2_conf.xml,v 1.30 2007-08-01 11:48:26 quinn Exp $ -->
+<!-- $Id: pazpar2_conf.xml,v 1.31 2007-09-10 16:25:49 adam Exp $ -->
<refentry id="pazpar2_conf">
<refentryinfo>
<productname>Pazpar2</productname>
</varlistentry>
<varlistentry>
- <term>icu_chain</term>
+ <term>relevance</term>
<listitem>
<para>
- Definition of ICU tokenization and normalization rules
- are used if ICU support is compiled in. The 'id'
+ Specifies ICU tokenization and normalization rules
+ for tokens that are used in Pazpar2's relevance ranking. The 'id'
attribute is currently not used, and the 'locale'
attribute must be set to one of the locale strings
defined in ICU. The child elements listed below can be
</variablelist>
</listitem>
</varlistentry>
+
+ <varlistentry>
+ <term>sort</term>
+ <listitem>
+ <para>
+ Specifies ICU tokenization and normalization rules
+ for tokens that are used in Pazpar2's sorting. The contents
+ is similar to that of <literal>relevance</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
+
+ <varlistentry>
+ <term>mergekey</term>
+ <listitem>
+ <para>
+ Specifies ICU tokenization and normalization rules
+ for tokens that are used in Pazpar2's mergekey. The contents
+ is similar to that of <literal>relevance</literal>.
+ </para>
+ </listitem>
+ </varlistentry>
<varlistentry>
<term>service</term>
<settings target="*">
<!-- This file introduces default settings for pazpar2 -->
- <!-- $Id: pazpar2_conf.xml,v 1.30 2007-08-01 11:48:26 quinn Exp $ -->
+ <!-- $Id: pazpar2_conf.xml,v 1.31 2007-09-10 16:25:49 adam Exp $ -->
<!-- mapping for unqualified search -->
<set name="pz:cclmap:term" value="u=1016 t=l,r s=al"/>
<?xml version="1.0" encoding="UTF-8"?>
-<!-- $Id: pazpar2.cfg.dist,v 1.18 2007-08-17 11:29:36 jakub Exp $ -->
+<!-- $Id: pazpar2.cfg.dist,v 1.19 2007-09-10 16:25:50 adam Exp $ -->
<pazpar2 xmlns="http://www.indexdata.com/pazpar2/1.0">
<server>
<listen port="9004"/>
<proxy host="localhost:80"/>
<settings src="../etc/settings"/>
+
+ <relevance>
+ <icu_chain id="relevance" locale="el">
+ <normalize rule="[:Control:] Any-Remove"/>
+ <tokenize rule="l"/>
+ <normalize rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
+ <casemap rule="l"/>
+ <index/>
+ </icu_chain>
+ </relevance>
+
+ <sort>
+ <icu_chain id="sort" locale="el">
+ <normalize rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
+ <casemap rule="l"/>
+ <sortkey/>
+ </icu_chain>
+ </sort>
+
+ <mergekey>
+ <icu_chain id="mergekey" locale="el">
+ <tokenize rule="l"/>
+ <normalize rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
+ <casemap rule="l"/>
+ <index/>
+ </icu_chain>
+ </mergekey>
<service>
<!-- we try to keep same order as in marc21.xsl -->
+++ /dev/null
-<?xml version="1.0" encoding="UTF-8"?>
-<pazpar2 xmlns="http://www.indexdata.com/pazpar2/1.0">
-
-<server>
- <listen port="9004"/>
- <proxy host="localhost:80"/>
- <settings src="../etc/settings"/>
-
- <icu_chain id="el:word" locale="el">
- <normalize rule="[:Control:] Any-Remove"/>
- <tokenize rule="l"/>
- <normalize rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
- <!-- <display/> -->
- <casemap rule="l"/>
- <!-- <normalize rule="Lower"/> -->
- <index/>
- </icu_chain>
-
- <service>
- <metadata name="url" merge="unique"/>
- <metadata name="title" brief="yes" sortkey="skiparticle" merge="longest" rank="6"/>
- <metadata name="isbn"/>
- <metadata name="date" brief="yes" sortkey="numeric" type="year" merge="range"
- termlist="yes"/>
- <metadata name="author" brief="yes" termlist="yes" merge="longest" rank="2"/>
- <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
- <metadata name="id"/>
- <metadata name="lccn" merge="unique"/>
- <metadata name="description" merge="longest" rank="3"/>
- </service>
-</server>
-
-<targetprofiles type="local" src="../zeerex/records/"/>
-
-</pazpar2>
-/* $Id: charsets.c,v 1.5 2007-05-25 10:32:55 marc Exp $
+/* $Id: charsets.c,v 1.6 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
#include <assert.h>
#include "charsets.h"
-//#include "config.h"
-//#include "parameters.h"
+#include "normalize7bit.h"
#ifdef HAVE_ICU
#include "icu_I18N.h"
/* charset handle */
struct pp2_charset_s {
const char *(*token_next_handler)(pp2_relevance_token_t prt);
- /* other handlers will come as we see fit */
+ const char *(*get_sort_handler)(pp2_relevance_token_t prt, int skip);
#ifdef HAVE_ICU
struct icu_chain * icu_chn;
UErrorCode icu_sts;
};
static const char *pp2_relevance_token_a_to_z(pp2_relevance_token_t prt);
+static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt, int skip_article);
#ifdef HAVE_ICU
static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt);
+static const char *pp2_get_sort_icu(pp2_relevance_token_t prt, int skip_article);
#endif // HAVE_ICU
/* tokenzier handle */
struct pp2_relevance_token_s {
const char *cp; /* unnormalized buffer we're tokenizing */
+ const char *last_cp; /* pointer to last token we're dealing with */
pp2_charset_t pct; /* our main charset handle (type+config) */
WRBUF norm_str; /* normized string we return (temporarily) */
+ WRBUF sort_str; /* sort string we return (temporarily) */
};
+
+pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node)
+{
+#ifdef HAVE_ICU
+ UErrorCode status = U_ZERO_ERROR;
+ while (xml_node && xml_node->type != XML_ELEMENT_NODE)
+ xml_node = xml_node->next;
+ struct icu_chain *chain = icu_chain_xml_config(xml_node, &status);
+ if (!chain || U_FAILURE(status)){
+ //xmlDocPtr icu_doc = 0;
+ //xmlChar *xmlstr = 0;
+ //int size = 0;
+ //xmlDocDumpMemory(icu_doc, size);
+
+ yaz_log(YLOG_FATAL, "Could not parse ICU chain config:\n"
+ "<%s>\n ... \n</%s>",
+ xml_node->name, xml_node->name);
+ return 0;
+ }
+ return pp2_charset_create(chain);
+#else // HAVE_ICU
+ yaz_log(YLOG_FATAL, "Error: ICU support requested with element:\n"
+ "<%s>\n ... \n</%s>",
+ n->name, n->name);
+ yaz_log(YLOG_FATAL,
+ "But no ICU support compiled into pazpar2 server.");
+ yaz_log(YLOG_FATAL,
+ "Please install libicu36-dev and icu-doc or similar, "
+ "re-configure and re-compile");
+ return 0;
+#endif // HAVE_ICU
+}
+
+
pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn)
{
pp2_charset_t pct = xmalloc(sizeof(*pct));
pct->token_next_handler = pp2_relevance_token_a_to_z;
+ pct->get_sort_handler = pp2_get_sort_ascii;
#ifdef HAVE_ICU
pct->icu_chn = 0;
- if (icu_chn){
+ if (icu_chn)
+ {
pct->icu_chn = icu_chn;
pct->icu_sts = U_ZERO_ERROR;
pct->token_next_handler = pp2_relevance_token_icu;
+ pct->get_sort_handler = pp2_get_sort_icu;
}
- #endif // HAVE_ICU
+#endif // HAVE_ICU
return pct;
}
assert(pct);
prt->norm_str = wrbuf_alloc();
+ prt->sort_str = wrbuf_alloc();
prt->cp = buf;
+ prt->last_cp = 0;
prt->pct = pct;
#ifdef HAVE_ICU
ok = icu_chain_assign_cstr(pct->icu_chn, buf, &pct->icu_sts);
//printf("\nfield ok: %d '%s'\n", ok, buf);
prt->pct = pct;
- prt->norm_str = 0;
}
#endif // HAVE_ICU
return prt;
assert(prt);
if(prt->norm_str)
wrbuf_destroy(prt->norm_str);
+ if(prt->sort_str)
+ wrbuf_destroy(prt->sort_str);
xfree(prt);
}
return (prt->pct->token_next_handler)(prt);
}
-#define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) - 'a' + 1 : -1)
+const char *pp2_get_sort(pp2_relevance_token_t prt, int skip)
+{
+ return prt->pct->get_sort_handler(prt, skip);
+}
+
+#define raw_char(c) (((c) >= 'a' && (c) <= 'z') ? (c) : -1)
/* original tokenizer with our tokenize interface, but we
add +1 to ensure no '\0' are in our string (except for EOF)
*/
if (*cp == '\0')
{
prt->cp = cp;
+ prt->last_cp = 0;
return 0;
}
/* now read the term itself */
+
+ prt->last_cp = cp;
wrbuf_rewind(prt->norm_str);
while (*cp && (c = raw_char(tolower(*cp))) >= 0)
{
return wrbuf_cstr(prt->norm_str);
}
+static const char *pp2_get_sort_ascii(pp2_relevance_token_t prt,
+ int skip_article)
+{
+ if (prt->last_cp == 0)
+ return 0;
+ else
+ {
+ char *tmp = xstrdup(prt->last_cp);
+ char *result = 0;
+ result = normalize7bit_mergekey(tmp, skip_article);
+
+ wrbuf_rewind(prt->sort_str);
+ wrbuf_puts(prt->sort_str, result);
+ xfree(tmp);
+ return wrbuf_cstr(prt->sort_str);
+ }
+}
+
#ifdef HAVE_ICU
static const char *pp2_relevance_token_icu(pp2_relevance_token_t prt)
{
- //&& U_SUCCESS(pct->icu_sts))
- if (icu_chain_next_token(prt->pct->icu_chn, &prt->pct->icu_sts)){
- //printf("'%s' ", icu_chain_get_norm(prt->pct->icu_chn));
+ if (icu_chain_next_token(prt->pct->icu_chn, &prt->pct->icu_sts))
+ {
if (U_FAILURE(prt->pct->icu_sts))
{
- //printf("ICU status failure\n ");
return 0;
}
-
return icu_chain_get_norm(prt->pct->icu_chn);
}
- //printf ("EOF\n");
return 0;
-};
+}
+
+static const char *pp2_get_sort_icu(pp2_relevance_token_t prt,
+ int skip_article)
+{
+ return icu_chain_get_sort(prt->pct->icu_chn);
+}
+
#endif // HAVE_ICU
-/* $Id: charsets.h,v 1.2 2007-05-23 14:44:18 marc Exp $
+/* $Id: charsets.h,v 1.3 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
#ifndef PAZPAR_CHARSETS_H
#define PAZPAR_CHARSETS_H
+#include <yaz/wrbuf.h>
+#include <yaz/xmltypes.h>
struct icu_chain;
typedef struct pp2_charset_s *pp2_charset_t;
typedef struct pp2_relevance_token_s *pp2_relevance_token_t;
+pp2_charset_t pp2_charset_create_xml(xmlNode *xml_node);
pp2_charset_t pp2_charset_create(struct icu_chain * icu_chn);
void pp2_charset_destroy(pp2_charset_t pct);
const char *buf);
void pp2_relevance_token_destroy(pp2_relevance_token_t prt);
const char *pp2_relevance_token_next(pp2_relevance_token_t prt);
+const char *pp2_get_sort(pp2_relevance_token_t prt, int skip_article);
+
+#if 0
+typedef int pp2_charset_normalize_t(pp2_charset_t pct,
+ const char *buf,
+ WRBUF norm_str, WRBUF sort_str,
+ int skip_article);
+
+pp2_charset_normalize_t pp2_charset_metadata_norm;
+#endif
#endif
-/* $Id: client.c,v 1.19 2007-09-05 08:40:12 adam Exp $
+/* $Id: client.c,v 1.20 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
// Initialize relevance structure with query terms
char *p[512];
extract_terms(se->nmem, cn, p);
- se->relevance = relevance_create(client_get_database(cl)->pct,
- se->nmem, (const char **) p,
- se->expected_maxrecs);
+ se->relevance = relevance_create(
+ global_parameters.server->relevance_pct,
+ se->nmem, (const char **) p,
+ se->expected_maxrecs);
}
ccl_rpn_delete(cn);
-/* $Id: config.c,v 1.40 2007-07-30 23:16:33 quinn Exp $
+/* $Id: config.c,v 1.41 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
02111-1307, USA.
*/
-/* $Id: config.c,v 1.40 2007-07-30 23:16:33 quinn Exp $ */
+/* $Id: config.c,v 1.41 2007-09-10 16:25:50 adam Exp $ */
#include <string.h>
server->service = 0;
server->next = 0;
server->settings = 0;
-
-#ifdef HAVE_ICU
- server->icu_chn = 0;
-#endif // HAVE_ICU
-
+ server->relevance_pct = 0;
+ server->sort_pct = 0;
+ server->mergekey_pct = 0;
for (n = node->children; n; n = n->next)
{
if (!(server->settings = parse_settings(n)))
return 0;
}
- else if (!strcmp((const char *) n->name, "icu_chain"))
+ else if (!strcmp((const char *) n->name, "relevance"))
{
-#ifdef HAVE_ICU
- UErrorCode status = U_ZERO_ERROR;
- struct icu_chain *chain = icu_chain_xml_config(n, &status);
- if (!chain || U_FAILURE(status)){
- //xmlDocPtr icu_doc = 0;
- //xmlChar *xmlstr = 0;
- //int size = 0;
- //xmlDocDumpMemory(icu_doc, size);
-
- yaz_log(YLOG_FATAL, "Could not parse ICU chain config:\n"
- "<%s>\n ... \n</%s>",
- n->name, n->name);
- return 0;
- }
- server->icu_chn = chain;
-#else // HAVE_ICU
- yaz_log(YLOG_FATAL, "Error: ICU support requested with element:\n"
- "<%s>\n ... \n</%s>",
- n->name, n->name);
- yaz_log(YLOG_FATAL,
- "But no ICU support compiled into pazpar2 server.");
- yaz_log(YLOG_FATAL,
- "Please install libicu36-dev and icu-doc or similar, "
- "re-configure and re-compile");
- return 0;
-#endif // HAVE_ICU
+ server->relevance_pct = pp2_charset_create_xml(n->children);
+ }
+ else if (!strcmp((const char *) n->name, "sort"))
+ {
+ server->sort_pct = pp2_charset_create_xml(n->children);
+ }
+ else if (!strcmp((const char *) n->name, "mergekey"))
+ {
+ server->mergekey_pct = pp2_charset_create_xml(n->children);
}
else if (!strcmp((const char *) n->name, "service"))
{
return 0;
}
}
+ if (!server->relevance_pct)
+ server->relevance_pct = pp2_charset_create(0);
+ if (!server->sort_pct)
+ server->sort_pct = pp2_charset_create(0);
+ if (!server->mergekey_pct)
+ server->mergekey_pct = pp2_charset_create(0);
return server;
}
-/* $Id: config.h,v 1.26 2007-07-30 11:52:08 quinn Exp $
+/* $Id: config.h,v 1.27 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
#include <libxslt/xsltutils.h>
#include <yaz/nmem.h>
-
-#ifdef HAVE_ICU
-#include "icu_I18N.h"
-#endif // HAVE_ICU
-
+#include "charsets.h"
enum conf_metadata_type {
Metadata_type_generic, // Generic text field
char *myurl;
char *settings;
-#ifdef HAVE_ICU
- struct icu_chain * icu_chn;
-#endif // HAVE_ICU
+ pp2_charset_t relevance_pct;
+ pp2_charset_t sort_pct;
+ pp2_charset_t mergekey_pct;
struct conf_service *service;
struct conf_server *next;
-/* $Id: http_command.c,v 1.61 2007-09-05 09:13:32 adam Exp $
+/* $Id: http_command.c,v 1.62 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
*/
/*
- * $Id: http_command.c,v 1.61 2007-09-05 09:13:32 adam Exp $
+ * $Id: http_command.c,v 1.62 2007-09-10 16:25:50 adam Exp $
*/
#include <stdio.h>
switch (cmd->type)
{
case Metadata_type_generic:
- wrbuf_xmlputs(w, md->data.text);
+ wrbuf_xmlputs(w, md->data.text.disp);
break;
case Metadata_type_year:
wrbuf_printf(w, "%d", md->data.number.min);
-/* $Id: logic.c,v 1.65 2007-09-07 10:27:14 adam Exp $
+/* $Id: logic.c,v 1.66 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
new->database = db;
new->yaz_marc = 0;
-#ifdef HAVE_ICU
- if (global_parameters.server && global_parameters.server->icu_chn)
- new->pct = pp2_charset_create(global_parameters.server->icu_chn);
- else
- new->pct = pp2_charset_create(0);
-#else // HAVE_ICU
- new->pct = pp2_charset_create(0);
-#endif // HAVE_ICU
-
new->map = 0;
new->settings
= nmem_malloc(se->session_nmem, sizeof(struct settings *) * num);
xsltFreeStylesheet(m->stylesheet);
if (sdb->yaz_marc)
yaz_marc_destroy(sdb->yaz_marc);
- if (sdb->pct)
- pp2_charset_destroy(sdb->pct);
}
// Initialize session_database list -- this represents this session's view
session->watchlist[i].data = 0;
session->watchlist[i].fun = 0;
}
-
return session;
}
char * p = value;
p = normalize7bit_generic(p, " ,/.:([");
- rec_md->data.text = nmem_strdup(nmem, p);
+ rec_md->data.text.disp = nmem_strdup(nmem, p);
+ rec_md->data.text.sort = 0;
}
else if (type == Metadata_type_year)
{
xmlChar *type = 0;
xmlChar *value = 0;
struct conf_service *service = global_parameters.server->service;
+ const char *norm_str = 0;
+ pp2_relevance_token_t prt = 0;
+ WRBUF norm_wr = 0;
if (!xdoc)
return 0;
xmlFreeDoc(xdoc);
return 0;
}
-
+
record = record_create(se->nmem,
service->num_metadata, service->num_sortkeys, cl,
record_no);
- mergekey_norm = (xmlChar *) nmem_strdup(se->nmem, (char*) mergekey);
- xmlFree(mergekey);
- normalize7bit_mergekey((char *) mergekey_norm, 0);
+ prt = pp2_relevance_tokenize(
+ global_parameters.server->mergekey_pct, (const char *) mergekey);
+
+ norm_wr = wrbuf_alloc();
+
+ while ((norm_str = pp2_relevance_token_next(prt)))
+ {
+ if (*norm_str)
+ {
+ if (wrbuf_len(norm_wr))
+ wrbuf_puts(norm_wr, " ");
+ wrbuf_puts(norm_wr, norm_str);
+ }
+ }
+
+ mergekey_norm = (xmlChar *)nmem_strdup(se->nmem, wrbuf_cstr(norm_wr));
+ wrbuf_destroy(norm_wr);
+
+ pp2_relevance_token_destroy(prt);
+
+ xmlFree(mergekey);
+
cluster = reclist_insert(se->reclist,
global_parameters.server->service,
record, (char *) mergekey_norm,
return 0;
}
relevance_newrec(se->relevance, cluster);
-
-
- // now parsing XML record and adding data to cluster or record metadata
- for (n = root->children; n; n = n->next)
- {
- if (type)
- xmlFree(type);
- if (value)
- xmlFree(value);
- type = value = 0;
-
- if (n->type != XML_ELEMENT_NODE)
- continue;
- if (!strcmp((const char *) n->name, "metadata"))
- {
- struct conf_metadata *ser_md = 0;
- struct conf_sortkey *ser_sk = 0;
- struct record_metadata **wheretoput = 0;
- struct record_metadata *rec_md = 0;
- int md_field_id = -1;
- int sk_field_id = -1;
-
- type = xmlGetProp(n, (xmlChar *) "type");
- value = xmlNodeListGetString(xdoc, n->children, 1);
-
- if (!type || !value || !*value)
- continue;
-
- md_field_id
- = conf_service_metadata_field_id(service, (const char *) type);
- if (md_field_id < 0)
- {
- yaz_log(YLOG_WARN,
- "Ignoring unknown metadata element: %s", type);
- continue;
- }
-
- ser_md = &service->metadata[md_field_id];
-
- if (ser_md->sortkey_offset >= 0){
- sk_field_id = ser_md->sortkey_offset;
- ser_sk = &service->sortkeys[sk_field_id];
- }
-
- // non-merged metadata
- rec_md = record_metadata_init(se->nmem, (char *) value,
- ser_md->type);
- if (!rec_md)
- {
- yaz_log(YLOG_WARN, "bad metadata data '%s' for element '%s'",
- value, type);
- continue;
- }
- rec_md->next = record->metadata[md_field_id];
- record->metadata[md_field_id] = rec_md;
-
- // merged metadata
- rec_md = record_metadata_init(se->nmem, (char *) value,
- ser_md->type);
- wheretoput = &cluster->metadata[md_field_id];
-
- // and polulate with data:
- // assign cluster or record based on merge action
- if (ser_md->merge == Metadata_merge_unique)
- {
- struct record_metadata *mnode;
- for (mnode = *wheretoput; mnode; mnode = mnode->next)
- if (!strcmp((const char *) mnode->data.text,
- rec_md->data.text))
- break;
- if (!mnode)
- {
- rec_md->next = *wheretoput;
- *wheretoput = rec_md;
- }
- }
- else if (ser_md->merge == Metadata_merge_longest)
- {
- if (!*wheretoput
- || strlen(rec_md->data.text)
- > strlen((*wheretoput)->data.text))
- {
- *wheretoput = rec_md;
- if (ser_sk)
- {
- char *s = nmem_strdup(se->nmem, rec_md->data.text);
- if (!cluster->sortkeys[sk_field_id])
- cluster->sortkeys[sk_field_id] =
- nmem_malloc(se->nmem,
- sizeof(union data_types));
- normalize7bit_mergekey(s,
- (ser_sk->type == Metadata_sortkey_skiparticle));
- cluster->sortkeys[sk_field_id]->text = s;
+
+
+ // now parsing XML record and adding data to cluster or record metadata
+ for (n = root->children; n; n = n->next)
+ {
+ if (type)
+ xmlFree(type);
+ if (value)
+ xmlFree(value);
+ type = value = 0;
+
+ if (n->type != XML_ELEMENT_NODE)
+ continue;
+ if (!strcmp((const char *) n->name, "metadata"))
+ {
+ struct conf_metadata *ser_md = 0;
+ struct conf_sortkey *ser_sk = 0;
+ struct record_metadata **wheretoput = 0;
+ struct record_metadata *rec_md = 0;
+ int md_field_id = -1;
+ int sk_field_id = -1;
+
+ type = xmlGetProp(n, (xmlChar *) "type");
+ value = xmlNodeListGetString(xdoc, n->children, 1);
+
+ if (!type || !value || !*value)
+ continue;
+
+ md_field_id
+ = conf_service_metadata_field_id(service, (const char *) type);
+ if (md_field_id < 0)
+ {
+ yaz_log(YLOG_WARN,
+ "Ignoring unknown metadata element: %s", type);
+ continue;
+ }
+
+ ser_md = &service->metadata[md_field_id];
+
+ if (ser_md->sortkey_offset >= 0){
+ sk_field_id = ser_md->sortkey_offset;
+ ser_sk = &service->sortkeys[sk_field_id];
+ }
+
+ // non-merged metadata
+ rec_md = record_metadata_init(se->nmem, (char *) value,
+ ser_md->type);
+ if (!rec_md)
+ {
+ yaz_log(YLOG_WARN, "bad metadata data '%s' for element '%s'",
+ value, type);
+ continue;
+ }
+ rec_md->next = record->metadata[md_field_id];
+ record->metadata[md_field_id] = rec_md;
+
+ // merged metadata
+ rec_md = record_metadata_init(se->nmem, (char *) value,
+ ser_md->type);
+ wheretoput = &cluster->metadata[md_field_id];
+
+ // and polulate with data:
+ // assign cluster or record based on merge action
+ if (ser_md->merge == Metadata_merge_unique)
+ {
+ struct record_metadata *mnode;
+ for (mnode = *wheretoput; mnode; mnode = mnode->next)
+ if (!strcmp((const char *) mnode->data.text.disp,
+ rec_md->data.text.disp))
+ break;
+ if (!mnode)
+ {
+ rec_md->next = *wheretoput;
+ *wheretoput = rec_md;
+ }
+ }
+ else if (ser_md->merge == Metadata_merge_longest)
+ {
+ if (!*wheretoput
+ || strlen(rec_md->data.text.disp)
+ > strlen((*wheretoput)->data.text.disp))
+ {
+ *wheretoput = rec_md;
+ if (ser_sk)
+ {
+ const char *sort_str = 0;
+ int skip_article =
+ ser_sk->type == Metadata_sortkey_skiparticle;
+
+ if (!cluster->sortkeys[sk_field_id])
+ cluster->sortkeys[sk_field_id] =
+ nmem_malloc(se->nmem,
+ sizeof(union data_types));
+
+ prt = pp2_relevance_tokenize(
+ global_parameters.server->sort_pct,
+ rec_md->data.text.disp);
+
+ pp2_relevance_token_next(prt);
+
+ sort_str = pp2_get_sort(prt, skip_article);
+
+ cluster->sortkeys[sk_field_id]->text.disp =
+ rec_md->data.text.disp;
+ cluster->sortkeys[sk_field_id]->text.sort =
+ nmem_strdup(se->nmem, sort_str);
+#if 0
+ yaz_log(YLOG_LOG, "text disp=%s",
+ cluster->sortkeys[sk_field_id]->text.disp);
+ yaz_log(YLOG_LOG, "text sort=%s",
+ cluster->sortkeys[sk_field_id]->text.sort);
+#endif
+ pp2_relevance_token_destroy(prt);
}
}
}
-/* $Id: pazpar2.h,v 1.49 2007-09-05 08:40:12 adam Exp $
+/* $Id: pazpar2.h,v 1.50 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
// for that session
struct session_database
{
- pp2_charset_t pct;
struct database *database;
struct setting **settings;
yaz_marc_t yaz_marc;
-/* $Id: reclists.c,v 1.22 2007-08-28 21:11:21 quinn Exp $
+/* $Id: reclists.c,v 1.23 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
union data_types *ut2 = r2->sortkeys[s->offset];
switch (s->type)
{
- char *s1, *s2;
+ const char *s1, *s2;
case Metadata_sortkey_relevance:
res = r2->relevance - r1->relevance;
break;
case Metadata_sortkey_string:
- s1 = ut1 ? ut1->text : "";
- s2 = ut2 ? ut2->text : "";
+ s1 = ut1 ? ut1->text.sort : "";
+ s2 = ut2 ? ut2->text.sort : "";
res = strcmp(s2, s1);
if (res)
{
-/* $Id: record.h,v 1.11 2007-07-16 17:01:46 adam Exp $
+/* $Id: record.h,v 1.12 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
struct conf_service;
union data_types {
- char *text;
+ struct {
+ const char *disp;
+ const char *sort;
+ } text;
struct {
int min;
int max;
-/* $Id: test_record.c,v 1.8 2007-07-30 23:16:33 quinn Exp $
+/* $Id: test_record.c,v 1.9 2007-09-10 16:25:50 adam Exp $
Copyright (c) 2006-2007, Index Data.
This file is part of Pazpar2.
struct client *client = 0;
char * bla = "blabla";
union data_types data_text;
- data_text.text = bla;
+ data_text.text.disp = bla;
+ data_text.text.sort = bla;
union data_types data_num;
-# $Id: Makefile.am,v 1.4 2007-09-10 08:18:19 adam Exp $
+# $Id: Makefile.am,v 1.5 2007-09-10 16:25:51 adam Exp $
-check_SCRIPTS = test_http.sh
+check_SCRIPTS = test_http.sh test_icu.sh
EXTRA_DIST = run_pazpar2.sh marc21.xsl test_http.xml test_http.cfg \
- test_http_urls $(check_SCRIPTS)
+ test_http_urls test_icu_urls $(check_SCRIPTS)
TESTS = $(check_SCRIPTS)
dist-hook:
cp ${srcdir}/test_http_*.res $(distdir)
+ cp ${srcdir}/test_url_*.res $(distdir)
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<pazpar2 xmlns="http://www.indexdata.com/pazpar2/1.0">
+ <!-- $Id: test_icu.cfg,v 1.1 2007-09-10 16:25:51 adam Exp $ -->
+ <!-- Used by test_http.sh -->
+ <server>
+ <listen port="9763"/>
+ <proxy host="localhost"/>
+
+ <relevance>
+ <icu_chain id="relevance" locale="el">
+ <normalize rule="[:Control:] Any-Remove"/>
+ <tokenize rule="l"/>
+ <normalize rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
+ <casemap rule="l"/>
+ <index/>
+ </icu_chain>
+ </relevance>
+
+ <sort>
+ <icu_chain id="sort" locale="el">
+ <normalize rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
+ <casemap rule="l"/>
+ <sortkey/>
+ </icu_chain>
+ </sort>
+
+ <mergekey>
+ <icu_chain id="mergekey" locale="el">
+ <tokenize rule="l"/>
+ <normalize rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
+ <casemap rule="l"/>
+ <index/>
+ </icu_chain>
+ </mergekey>
+
+ <service>
+ <metadata name="url" merge="unique"/>
+ <metadata name="title" brief="yes" sortkey="skiparticle" merge="longest" rank="6"/>
+ <metadata name="title-remainder" brief="yes" merge="longest" rank="5"/>
+ <metadata name="isbn"/>
+ <metadata name="date" brief="yes" sortkey="numeric" type="year" merge="range"
+ termlist="yes"/>
+ <metadata name="author" brief="yes" termlist="yes" merge="longest" rank="2"/>
+ <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
+ <metadata name="id"/>
+ <metadata name="lccn" merge="unique"/>
+ <metadata name="description" brief="yes" merge="longest" rank="3"/>
+
+ <metadata name="test-usersetting" brief="yes" setting="postproc"/>
+ <metadata name="test" setting="parameter"/>
+ <metadata name="test_usersetting_2" brief="yes"/>
+ </service>
+ </server>
+
+ <targetprofiles type="local" src="../zeerex/records/"/>
+
+</pazpar2>
+<!-- Keep this comment at the end of the file
+ Local variables:
+ mode: nxml
+ End:
+-->
--- /dev/null
+#!/bin/sh
+# $Id: test_icu.sh,v 1.1 2007-09-10 16:25:51 adam Exp $
+#
+
+# srcdir might be set by make
+srcdir=${srcdir:-"."}
+
+if test -x ../src/pazpar2; then
+ if ../src/pazpar2 -V |grep icu: >/dev/null; then
+ exec ${srcdir}/run_pazpar2.sh test_icu
+ fi
+fi
+exit 0
+# Local Variables:
+# mode:shell-script
+# sh-indentation: 2
+# sh-basic-offset: 4
+# End:
--- /dev/null
+<init><status>OK</status><session>1</session><protocol>1</protocol></init>
\ No newline at end of file
--- /dev/null
+<stat><activeclients>0</activeclients>
+<hits>0</hits>
+<records>0</records>
+<clients>0</clients>
+<unconnected>0</unconnected>
+<connecting>0</connecting>
+<initializing>0</initializing>
+<searching>0</searching>
+<presenting>0</presenting>
+<idle>0</idle>
+<failed>0</failed>
+<error>0</error>
+</stat>
\ No newline at end of file
--- /dev/null
+<search><status>OK</status></search>
\ No newline at end of file
--- /dev/null
+<show>
+<status>OK</status>
+<activeclients>0</activeclients>
+<merged>9</merged>
+<total>10</total>
+<start>0</start>
+<num>9</num>
+<hit>
+
+<md-title>Washington metropolitan area rail computer feasibility study;</md-title>
+<md-title-remainder>final report</md-title-remainder>
+<md-date>1971</md-date>
+<md-author>Englund, Carl R</md-author>
+<md-description>"Contract DOT-UT-10003."</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title washington metropolitan area rail computer feasibility study author englund carl r medium book</recid>
+</hit>
+<hit>
+
+<md-title>The use of passwords for controlled access to computer resources</md-title>
+<md-date>1977</md-date>
+<md-author>Wood, Helen M</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the use of passwords for controlled access to computer resources author wood helen m medium book</recid>
+</hit>
+<hit>
+
+<md-title>The Puget Sound Region</md-title>
+<md-title-remainder>a portfolio of thematic computer maps</md-title-remainder>
+<md-date>1974</md-date>
+<md-author>Mairs, John W</md-author>
+<md-description>Scale of maps ca. 1:1,000,000</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the puget sound region author mairs john w medium book</recid>
+</hit>
+<hit>
+
+<md-title>The Computer Bible</md-title>
+<md-date>1973-1980</md-date>
+<md-description>Vols. 2, 8: Missoula, Mont. : Published by Scholars Press for Biblical Research Associates</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the computer bible author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Reconstruction tomography in diagnostic radiology and nuclear medicine</md-title>
+<md-title-remainder>proceedings of the workshop</md-title-remainder>
+<md-date>1977</md-date>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book</recid>
+</hit>
+<hit>
+
+<md-title>How to program a computer</md-title>
+<md-author>Jack Collins</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<count>2</count>
+<recid>title how to program a computer author jack collins medium book</recid>
+</hit>
+<hit>
+
+<md-title>Computer science & technology</md-title>
+<md-title-remainder>proceedings of a workshop held at the National Bureau of Standards, Gaithersburg, MD, June 3-4, 1976</md-title-remainder>
+<md-date>1977</md-date><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title computer science technology author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Computer processing of dynamic images from an Anger scintillation camera</md-title>
+<md-title-remainder>the proceedings of a workshop</md-title-remainder>
+<md-date>1974</md-date>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title computer processing of dynamic images from an anger scintillation camera author medium book</recid>
+</hit>
+<hit>
+
+<md-title>A plan for community college computer development</md-title>
+<md-date>1971</md-date>
+<md-description>Cover title</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title a plan for community college computer development author medium book</recid>
+</hit>
+</show>
--- /dev/null
+<show>
+<status>OK</status>
+<activeclients>0</activeclients>
+<merged>9</merged>
+<total>10</total>
+<start>0</start>
+<num>9</num>
+<hit>
+
+<md-title>A plan for community college computer development</md-title>
+<md-date>1971</md-date>
+<md-description>Cover title</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title a plan for community college computer development author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Computer processing of dynamic images from an Anger scintillation camera</md-title>
+<md-title-remainder>the proceedings of a workshop</md-title-remainder>
+<md-date>1974</md-date>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title computer processing of dynamic images from an anger scintillation camera author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Computer science & technology</md-title>
+<md-title-remainder>proceedings of a workshop held at the National Bureau of Standards, Gaithersburg, MD, June 3-4, 1976</md-title-remainder>
+<md-date>1977</md-date><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title computer science technology author medium book</recid>
+</hit>
+<hit>
+
+<md-title>How to program a computer</md-title>
+<md-author>Jack Collins</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<count>2</count>
+<recid>title how to program a computer author jack collins medium book</recid>
+</hit>
+<hit>
+
+<md-title>Reconstruction tomography in diagnostic radiology and nuclear medicine</md-title>
+<md-title-remainder>proceedings of the workshop</md-title-remainder>
+<md-date>1977</md-date>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book</recid>
+</hit>
+<hit>
+
+<md-title>The Computer Bible</md-title>
+<md-date>1973-1980</md-date>
+<md-description>Vols. 2, 8: Missoula, Mont. : Published by Scholars Press for Biblical Research Associates</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the computer bible author medium book</recid>
+</hit>
+<hit>
+
+<md-title>The Puget Sound Region</md-title>
+<md-title-remainder>a portfolio of thematic computer maps</md-title-remainder>
+<md-date>1974</md-date>
+<md-author>Mairs, John W</md-author>
+<md-description>Scale of maps ca. 1:1,000,000</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the puget sound region author mairs john w medium book</recid>
+</hit>
+<hit>
+
+<md-title>The use of passwords for controlled access to computer resources</md-title>
+<md-date>1977</md-date>
+<md-author>Wood, Helen M</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the use of passwords for controlled access to computer resources author wood helen m medium book</recid>
+</hit>
+<hit>
+
+<md-title>Washington metropolitan area rail computer feasibility study;</md-title>
+<md-title-remainder>final report</md-title-remainder>
+<md-date>1971</md-date>
+<md-author>Englund, Carl R</md-author>
+<md-description>"Contract DOT-UT-10003."</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title washington metropolitan area rail computer feasibility study author englund carl r medium book</recid>
+</hit>
+</show>
--- /dev/null
+<show>
+<status>OK</status>
+<activeclients>0</activeclients>
+<merged>9</merged>
+<total>10</total>
+<start>0</start>
+<num>9</num>
+<hit>
+
+<md-title>The Computer Bible</md-title>
+<md-date>1973-1980</md-date>
+<md-description>Vols. 2, 8: Missoula, Mont. : Published by Scholars Press for Biblical Research Associates</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the computer bible author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Computer science & technology</md-title>
+<md-title-remainder>proceedings of a workshop held at the National Bureau of Standards, Gaithersburg, MD, June 3-4, 1976</md-title-remainder>
+<md-date>1977</md-date><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title computer science technology author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Reconstruction tomography in diagnostic radiology and nuclear medicine</md-title>
+<md-title-remainder>proceedings of the workshop</md-title-remainder>
+<md-date>1977</md-date>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book</recid>
+</hit>
+<hit>
+
+<md-title>The use of passwords for controlled access to computer resources</md-title>
+<md-date>1977</md-date>
+<md-author>Wood, Helen M</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the use of passwords for controlled access to computer resources author wood helen m medium book</recid>
+</hit>
+<hit>
+
+<md-title>Computer processing of dynamic images from an Anger scintillation camera</md-title>
+<md-title-remainder>the proceedings of a workshop</md-title-remainder>
+<md-date>1974</md-date>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title computer processing of dynamic images from an anger scintillation camera author medium book</recid>
+</hit>
+<hit>
+
+<md-title>The Puget Sound Region</md-title>
+<md-title-remainder>a portfolio of thematic computer maps</md-title-remainder>
+<md-date>1974</md-date>
+<md-author>Mairs, John W</md-author>
+<md-description>Scale of maps ca. 1:1,000,000</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the puget sound region author mairs john w medium book</recid>
+</hit>
+<hit>
+
+<md-title>A plan for community college computer development</md-title>
+<md-date>1971</md-date>
+<md-description>Cover title</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title a plan for community college computer development author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Washington metropolitan area rail computer feasibility study;</md-title>
+<md-title-remainder>final report</md-title-remainder>
+<md-date>1971</md-date>
+<md-author>Englund, Carl R</md-author>
+<md-description>"Contract DOT-UT-10003."</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title washington metropolitan area rail computer feasibility study author englund carl r medium book</recid>
+</hit>
+<hit>
+
+<md-title>How to program a computer</md-title>
+<md-author>Jack Collins</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<count>2</count>
+<recid>title how to program a computer author jack collins medium book</recid>
+</hit>
+</show>
--- /dev/null
+<show>
+<status>OK</status>
+<activeclients>0</activeclients>
+<merged>9</merged>
+<total>10</total>
+<start>0</start>
+<num>9</num>
+<hit>
+
+<md-title>A plan for community college computer development</md-title>
+<md-date>1971</md-date>
+<md-description>Cover title</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title a plan for community college computer development author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Washington metropolitan area rail computer feasibility study;</md-title>
+<md-title-remainder>final report</md-title-remainder>
+<md-date>1971</md-date>
+<md-author>Englund, Carl R</md-author>
+<md-description>"Contract DOT-UT-10003."</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title washington metropolitan area rail computer feasibility study author englund carl r medium book</recid>
+</hit>
+<hit>
+
+<md-title>The Computer Bible</md-title>
+<md-date>1973-1980</md-date>
+<md-description>Vols. 2, 8: Missoula, Mont. : Published by Scholars Press for Biblical Research Associates</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the computer bible author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Computer processing of dynamic images from an Anger scintillation camera</md-title>
+<md-title-remainder>the proceedings of a workshop</md-title-remainder>
+<md-date>1974</md-date>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title computer processing of dynamic images from an anger scintillation camera author medium book</recid>
+</hit>
+<hit>
+
+<md-title>The Puget Sound Region</md-title>
+<md-title-remainder>a portfolio of thematic computer maps</md-title-remainder>
+<md-date>1974</md-date>
+<md-author>Mairs, John W</md-author>
+<md-description>Scale of maps ca. 1:1,000,000</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the puget sound region author mairs john w medium book</recid>
+</hit>
+<hit>
+
+<md-title>Computer science & technology</md-title>
+<md-title-remainder>proceedings of a workshop held at the National Bureau of Standards, Gaithersburg, MD, June 3-4, 1976</md-title-remainder>
+<md-date>1977</md-date><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title computer science technology author medium book</recid>
+</hit>
+<hit>
+
+<md-title>Reconstruction tomography in diagnostic radiology and nuclear medicine</md-title>
+<md-title-remainder>proceedings of the workshop</md-title-remainder>
+<md-date>1977</md-date>
+<md-description>Includes bibliographical references and index</md-description><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title reconstruction tomography in diagnostic radiology and nuclear medicine author medium book</recid>
+</hit>
+<hit>
+
+<md-title>The use of passwords for controlled access to computer resources</md-title>
+<md-date>1977</md-date>
+<md-author>Wood, Helen M</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<recid>title the use of passwords for controlled access to computer resources author wood helen m medium book</recid>
+</hit>
+<hit>
+
+<md-title>How to program a computer</md-title>
+<md-author>Jack Collins</md-author><location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<location id="z3950.indexdata.com/marc" name="Local Test"></location>
+<count>2</count>
+<recid>title how to program a computer author jack collins medium book</recid>
+</hit>
+</show>