ZEBRA_MODULE(alvis,[$def], [ --enable-mod-alvis ALVIS filter (Requires libxslt)])
ZEBRA_MODULE(safari,shared,[ --enable-mod-safari Safari filter (DBC)])
+
+AC_CHECK_HEADERS([mysql/mysql.h], [def="shared"], [def="disabled"], [])
+ZEBRA_MODULE(indexplugin_mysql,[$def],[ --enable-mod-indexplugin-mysql indexing plugin])
+
dnl ------ ANSI C Header files
AC_STDC_HEADERS
if test "$ac_cv_header_stdc" = "no"; then
YAZ_EXPORT
ZEBRA_RES zebra_repository_index(ZebraHandle zh, const char *path,
- enum zebra_recctrl_action_t action);
+ enum zebra_recctrl_action_t action, char *useIndexDriver);
YAZ_EXPORT
ZEBRA_RES zebra_repository_update(ZebraHandle zh, const char *path);
mod_text_la_LADD =
mod_text_la_LIBADD = $(zebralib) $(mod_text_la_LADD)
+mod_indexplugin_mysql_la_SOURCES = mod_indexplugin_mysql.c
+mod_indexplugin_mysql_la_LDFLAGS = -rpath $(modlibdir) -module -avoid-version
+mod_indexplugin_mysql_la_LADD =
+mod_indexplugin_mysql_la_LIBADD = $(zebralib) $(mod_indexplugin_mysql_la_LADD) -lmysqlclient
+
modlib_LTLIBRARIES = $(SHARED_MODULE_LA)
EXTRA_LTLIBRARIES = \
mod-grs-regx.la \
mod-safari.la \
mod-alvis.la \
mod-dom.la \
- mod-text.la
+ mod-text.la \
+ mod-indexplugin_mysql.la
EXTRA_libidzebra_2_0_la_SOURCES = \
$(mod_grs_regx_la_SOURCES) \
$(mod_safari_la_SOURCES) \
$(mod_alvis_la_SOURCES) \
$(mod_dom_la_SOURCES) \
- $(mod_text_la_SOURCES)
+ $(mod_text_la_SOURCES) \
+ $(mod_indexplugin_mysql_la_SOURCES)
lib_LTLIBRARIES = $(zebralib)
rpnscan.c rpnsearch.c sortidx.c stream.c \
update_path.c update_file.c trunc.c untrans.c isam_methods.c \
zaptterm.c zebraapi.c zinfo.c zinfo.h zsets.c key_block.c key_block.h \
- check_res.c rset_isam.c
+ check_res.c rset_isam.c \
+ update_driver.c
bin_PROGRAMS = zebraidx zebrasrv
-DDEFAULT_MODULE_PATH=\"$(modlibdir)\" \
$(TCL_INCLUDE)
-LDADD = $(zebralib) $(YAZLALIB)
+LDADD = $(zebralib) $(YAZLALIB) -ldl
zebrash_LDADD= $(LDADD) $(READLINE_LIBS)
res_add(v, "facetNumRecs", "");
res_add(v, "facetMaxChunks", "");
+ //I think this makes things more elegant (names can change of course)
+ res_add(v, "indexplugin", "s");
+
errors = res_check(res, v);
res_close(v);
ZEBRA_RES zebra_update_file_match(ZebraHandle zh, const char *path);
ZEBRA_RES zebra_update_from_path(ZebraHandle zh, const char *path,
enum zebra_recctrl_action_t action);
+ZEBRA_RES zebra_update_from_driver(ZebraHandle zh, const char *path,
+ enum zebra_recctrl_action_t action, char *useIndexDriver);
ZEBRA_RES zebra_remove_file_match(ZebraHandle zh);
struct rpn_char_map_info
--- /dev/null
+/* This file is part of the Zebra server.
+ Copyright (C) 1994-2009 Index Data
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#ifndef ZEBRA_INDEXPLUGINH
+#define ZEBRA_INDEXPLUGINH
+
+#include "index.h"
+#include <assert.h>
+
+typedef int (*indexList)(ZebraHandle zh, const char *driverArg, enum zebra_recctrl_action_t action);
+
+typedef struct
+{
+ indexList idxList;
+} zebra_index_plugin_object;
+
+void addDriverFunction(indexList);
+void zebraIndexBuffer(ZebraHandle zh, char *data, int dataLength, enum zebra_recctrl_action_t action, char *name);
+
+#endif
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* This file is part of the Zebra server.
+ Copyright (C) 1994-2009 Index Data
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#include "indexplugin.h"
+#include <mysql/mysql.h>
+#include <stdint.h>
+
+MYSQL mCon;
+
+static int mysqlConnect(const char *username, const char *password, const char *database, const char *hostname)
+{
+ mysql_init(&mCon);
+
+ /* Set the default encoding to utf-8 so that zebra
+ doesn't gribe that the XML conflicts with it's encoding */
+ mysql_options(&mCon, MYSQL_SET_CHARSET_NAME, "utf8");
+
+ mysql_options(&mCon, MYSQL_READ_DEFAULT_GROUP, "indexplugin_mysql");
+ if (!mysql_real_connect(&mCon, hostname, username, password, database, 0, NULL, 0))
+ {
+ yaz_log(YLOG_FATAL, "Failed to connect to database: %s\n", mysql_error(&mCon));
+ return ZEBRA_FAIL;
+ }
+ else
+ {
+ yaz_log(YLOG_LOG, "Connected to Mysql Database");
+ }
+
+ return ZEBRA_OK;
+}
+
+
+static int repositoryExtract(ZebraHandle zh, const char *driverCommand, enum zebra_recctrl_action_t action)
+{
+ /* this doesn't really need to be initialised */
+ int ret = ZEBRA_FAIL;
+
+ //nasty parsing method
+ char *sqlQuery = strchr(driverCommand, ':');
+ if(sqlQuery) *(sqlQuery ++) = NULL;
+ else
+ {
+ yaz_log(YLOG_LOG, "No MySQL Query given, falling back on config default");
+ sqlQuery = res_get_named(zh->session_res, "indexplugin.mysql_defaultsql", driverCommand);
+ }
+
+ yaz_log(YLOG_LOG, "Database configuration selected: %s", driverCommand);
+
+ //Get our connection specific info from the config
+ //TODO: make the "test" bit configurable by command line
+ const char *username = res_get_named(zh->session_res, "indexplugin.mysql_username", driverCommand);
+ const char *password = res_get_named(zh->session_res, "indexplugin.mysql_password", driverCommand);
+ const char *hostname = res_get_named(zh->session_res, "indexplugin.mysql_hostname", driverCommand);
+ const char *database = res_get_named(zh->session_res, "indexplugin.mysql_database", driverCommand);
+
+ const char *idfield = res_get_named(zh->session_res, "indexplugin.mysql_idfield", driverCommand);
+ const char *datafield = res_get_named(zh->session_res, "indexplugin.mysql_datafield", driverCommand);
+
+ if(!username)
+ {
+ yaz_log(YLOG_FATAL, "Database configuration incomplete or missing");
+ return ZEBRA_FAIL;
+ }
+
+ if(!sqlQuery)
+ {
+ yaz_log(YLOG_FATAL, "No valid MySQL query");
+ return ZEBRA_FAIL;
+ }
+
+ enum
+ {
+ IDFIELD,
+ DATAFIELD
+ };
+ //This is a rudimentary way of binding fields, it's nasty
+ uint8_t fieldBind[2] = {0xFF, 0xFF};
+
+ yaz_log(YLOG_LOG, "MySQL Query: %s", sqlQuery);
+
+ if ((ret = mysqlConnect(username, password, database, hostname)) == ZEBRA_OK)
+ {
+ const char *mQuery = sqlQuery;
+ if (mysql_real_query(&mCon, mQuery, strlen(mQuery)) == 0)
+ {
+ MYSQL_RES *result = NULL;
+ if ((result = mysql_store_result(&mCon)))
+ {
+ //Check for the binding fields
+ MYSQL_FIELD *field;
+ int i = 0;
+ while(field = mysql_fetch_field(result))
+ {
+ if(strcmp(field->name, idfield) == 0) fieldBind[IDFIELD] = i;
+ if(strcmp(field->name, datafield) == 0) fieldBind[DATAFIELD] = i;
+ i ++;
+ }
+
+ //Test the binding fields
+ if(fieldBind[IDFIELD] == 0xFF || fieldBind[DATAFIELD] == 0xFF)
+ {
+ yaz_log(YLOG_FATAL, "Query did not reveal all/any binding columns");
+ ret = ZEBRA_FAIL;
+ }
+ else
+ {
+ yaz_log(YLOG_LOG, "Successfully found all binding columns");
+
+ unsigned int num_fields;
+ num_fields = mysql_num_fields(result);
+
+ MYSQL_ROW row;
+ while ((row = mysql_fetch_row(result)))
+ {
+ unsigned long *lengths;
+ lengths = mysql_fetch_lengths(result);
+
+ //This is the critical line, that actually indexes your data
+ //Args: Zebra Handle, Data, Data length, Action, FileName(Unique identifier)
+ zebraIndexBuffer(zh, row[fieldBind[DATAFIELD]], lengths[fieldBind[DATAFIELD]], action, row[fieldBind[IDFIELD]]);
+ }
+ }
+ mysql_free_result(result);
+ }
+ }
+ else
+ {
+ yaz_log(YLOG_FATAL, "Failed to run query: %s", mysql_error(&mCon));
+ ret = ZEBRA_FAIL;
+ }
+ }
+
+ /* Drop our MYSQL connection as we don't need it anymore
+ and deallocate anything allocated */
+ mysql_close(&mCon);
+
+ return ret;
+}
+
+void indexPluginRegister(void)
+{
+ /* register our function that gets called while indexing a document */
+ addDriverFunction(repositoryExtract);
+}
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
--- /dev/null
+/* This file is part of the Zebra server.
+ Copyright (C) 1994-2009 Index Data
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+
+*/
+
+#include <stdio.h>
+#include <assert.h>
+#include <sys/types.h>
+#ifdef WIN32
+#include <io.h>
+#define S_ISREG(x) (x & _S_IFREG)
+#define S_ISDIR(x) (x & _S_IFDIR)
+#endif
+#if HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+#include <direntz.h>
+#include <fcntl.h>
+#include <time.h>
+
+#include "index.h"
+
+/* plugin includes */
+#include <sys/stat.h>
+#include "indexplugin.h"
+#include <stdlib.h>
+#include <dlfcn.h>
+
+
+
+
+
+zebra_index_plugin_object *pluginObj = NULL;
+
+static zebra_index_plugin_object *newZebraPlugin(void)
+{
+ zebra_index_plugin_object *newPlugin = malloc(sizeof(zebra_index_plugin_object));
+ return newPlugin;
+}
+
+static void destroyZebraPlugin(zebra_index_plugin_object *zebraIdxPlugin)
+{
+ free(zebraIdxPlugin);
+}
+
+void addDriverFunction(indexList function)
+{
+ /* Assign the function to the object */
+ pluginObj->idxList = function;
+}
+
+
+void zebraIndexBuffer(ZebraHandle zh, char *data, int dataLength, enum zebra_recctrl_action_t action, char *name)
+{
+ zebra_buffer_extract_record(zh, data, dataLength, action, zh->m_record_type, NULL, NULL, name);
+}
+
+
+/* I'm not even sure what this is for */
+void repositoryShowDriver(ZebraHandle zh, const char *path)
+{
+ char src[1024];
+ int src_len;
+ struct dirs_entry *dst;
+ Dict dict;
+ struct dirs_info *di;
+
+ if (!(dict = dict_open_res(zh->reg->bfs, FMATCH_DICT, 50, 0, 0, zh->res)))
+ {
+ yaz_log(YLOG_FATAL, "dict_open fail of %s", FMATCH_DICT);
+ return;
+ }
+
+ strncpy(src, path, sizeof(src)-1);
+ src[sizeof(src)-1]='\0';
+ src_len = strlen(src);
+
+ if (src_len && src[src_len-1] != '/')
+ {
+ src[src_len] = '/';
+ src[++src_len] = '\0';
+ }
+
+ di = dirs_open(dict, src, zh->m_flag_rw);
+
+ while ((dst = dirs_read(di)))
+ yaz_log(YLOG_LOG, "%s", dst->path);
+ dirs_free(&di);
+ dict_close(dict);
+}
+
+
+ZEBRA_RES zebra_update_from_driver(ZebraHandle zh, const char *path,
+ enum zebra_recctrl_action_t action, char *useIndexDriver)
+{
+ /* delcair something to hold out remote call */
+ void (*idxPluginRegister)(void);
+ char *dlError;
+ void *libHandle;
+ int pluginReturn;
+
+ char driverName[100];
+ sprintf(driverName, "mod-%s.so", useIndexDriver);
+
+ yaz_log(YLOG_LOG, "Loading driver %s", useIndexDriver);
+
+ libHandle = dlopen(driverName, RTLD_LAZY);
+ if (!libHandle)
+ {
+ yaz_log(YLOG_FATAL, "Unable to load index plugin %s", dlerror());
+ return ZEBRA_FAIL;
+ }
+ /* clear the error buffer */
+ dlerror();
+
+ idxPluginRegister = dlsym(libHandle, "indexPluginRegister");
+
+ if ((dlError = dlerror()) != NULL)
+ {
+ yaz_log(YLOG_FATAL, "Index plugin error: %s", dlError);
+
+ /* Although the documentation says this dlclose isn't needed
+ it seems better to put it in, incase there were memory
+ allocations */
+ dlclose(libHandle);
+ return ZEBRA_FAIL;
+ }
+
+ pluginObj = newZebraPlugin();
+
+ /* invoke the plugin starter */
+ idxPluginRegister();
+
+ pluginReturn = pluginObj->idxList(zh, path, action);
+ destroyZebraPlugin(pluginObj);
+
+ /* close the plugin handle */
+ dlclose(libHandle);
+
+ /* repositoryExtract(zh, path, action);*/
+ return pluginReturn;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
ZEBRA_RES zebra_repository_update(ZebraHandle zh, const char *path)
{
- return zebra_repository_index(zh, path, action_update);
+ /* Both of these probably need to be thought out better */
+ return zebra_repository_index(zh, path, action_update, NULL);
}
ZEBRA_RES zebra_repository_delete(ZebraHandle zh, const char *path)
{
- return zebra_repository_index(zh, path, action_delete);
+ /* Both of these probably need to be thought out better */
+ return zebra_repository_index(zh, path, action_delete, NULL);
}
ZEBRA_RES zebra_repository_index(ZebraHandle zh, const char *path,
- enum zebra_recctrl_action_t action)
+ enum zebra_recctrl_action_t action, char *useIndexDriver)
{
ASSERTZH;
assert(path);
else
yaz_log(log_level, "update action=%d", (int) action);
- if (zh->m_record_id && !strcmp(zh->m_record_id, "file"))
- return zebra_update_file_match(zh, path);
+ if(!useIndexDriver)
+ {
+ if (zh->m_record_id && !strcmp(zh->m_record_id, "file"))
+ return zebra_update_file_match(zh, path);
+ else
+ return zebra_update_from_path(zh, path, action);
+ }
else
- return zebra_update_from_path(zh, path, action);
+ {
+ /* This is used if we indicate we'll be indexing from the plugin
+ rather than any of the file input systems */
+ zebra_update_from_driver(zh, path, action, useIndexDriver);
+ }
+
}
ZEBRA_RES zebra_repository_show(ZebraHandle zh, const char *path)
sprintf(nbuf, "%.40s(%ld)", *argv, (long) getpid());
yaz_log_init_prefix(nbuf);
#endif
+ /* For indexing driver support */
+ char *useIndexDriver = NULL;
prog = *argv;
if (argc < 2)
{
" -l <file> Write log to <file>.\n"
" -L Don't follow symbolic links.\n"
" -f <n> Display information for the first <n> records.\n"
+ " -i <driver> Select which index driver to use.\n"
+ " Note: when using a driver, the <dir> gets passed\n"
+ " to the driver as an argument.\n"
+ " Current drivers available:\n"
+ " - indexplugin_mysql\n\n"
" -V Show version.\n", *argv
);
exit(1);
}
res_set(default_res, "profilePath", DEFAULT_PROFILE_PATH);
res_set(default_res, "modulePath", DEFAULT_MODULE_PATH);
- while ((ret = options("sVt:c:g:d:m:v:nf:l:L", argv, argc, &arg)) != -2)
+ while ((ret = options("sVt:c:g:d:m:v:nf:l:L:i:", argv, argc, &arg)) != -2)
{
if (ret == 0)
{
switch (cmd)
{
case 'u':
- res = zebra_repository_index(zh, arg, action_update);
+ res = zebra_repository_index(zh, arg, action_update, useIndexDriver);
break;
case 'd':
- res = zebra_repository_index(zh, arg, action_delete);
+ res = zebra_repository_index(zh, arg, action_delete, useIndexDriver);
break;
case 'a':
- res = zebra_repository_index(zh, arg, action_a_delete);
+ res = zebra_repository_index(zh, arg, action_a_delete, useIndexDriver);
break;
case 's':
res = zebra_repository_show(zh, arg);
}
else if (ret == 'v')
yaz_log_init_level(yaz_log_mask_str(arg));
+ else if (ret == 'i')
+ useIndexDriver = arg;
else if (ret == 'l')
yaz_log_init_file(arg);
else if (ret == 'm')
return 0;
}
+const char *res_get_named(Res r, const char *resName, const char *name)
+{
+ //This part relies on dynamic stack allocation cus I'm lazy
+ //resName.name = strlen(resName) + 1 + strlen(name) + (space for \0)
+ unsigned int resLen = strlen(resName) + strlen(name) + 2;
+ char wholeName[resLen];
+ memset(wholeName, NULL, resLen);
+
+ strcat(wholeName, resName);
+ strcat(wholeName, ".");
+ strcat(wholeName, name);
+
+ const char *nr = res_get(r, wholeName);
+ return nr;
+}
+
void res_set(Res r, const char *name, const char *value)
{
struct res_entry *re;