2 # ============================================================================
3 # Zebra perl API header
4 # =============================================================================
6 # ============================================================================
7 package IDZebra::Session;
9 use IDZebra::Logger qw(:flags :calls);
10 #use IDZebra::Repository;
11 use IDZebra::Resultset;
15 our @ISA = qw(IDZebra::Logger);
18 # -----------------------------------------------------------------------------
19 # Class constructors, destructor
20 # -----------------------------------------------------------------------------
22 my ($proto, %args) = @_;
23 my $class = ref($proto) || $proto;
25 $self->{args} = \%args;
27 bless ($self, $class);
28 $self->{cql_ct} = undef;
31 $self->{databases} = {};
35 my ($self, %args) = @_;
38 unless (defined($self->{zs})) {
39 if (defined($args{'configFile'})) {
40 $self->{zs} = IDZebra::start($args{'configFile'});
42 $self->{zs} = IDZebra::start("zebra.cfg");
49 if (defined($self->{zs})) {
50 IDZebra::stop($self->{zs}) if ($self->{zs});
57 my ($proto,%args) = @_;
60 if (ref($proto)) { $self = $proto; } else {
61 $self = $proto->new(%args);
65 %args = %{$self->{args}};
68 $self->start_service(%args);
70 unless (defined($self->{zs})) {
71 croak ("Falied to open zebra service");
74 unless (defined($self->{zh})) {
75 $self->{zh}=IDZebra::open($self->{zs});
78 # Reset result set counter
81 # This is needed in order to somehow initialize the service
82 $self->select_databases("Default");
84 # Load the default configuration
87 $self->{odr_input} = IDZebra::odr_createmem($IDZebra::ODR_DECODE);
88 $self->{odr_output} = IDZebra::odr_createmem($IDZebra::ODR_ENCODE);
97 while (IDZebra::trans_no($self->{zh}) > 0) {
98 logf (LOG_WARN,"Explicitly closing transaction with session");
102 IDZebra::close($self->{zh});
106 if ($self->{odr_input}) {
107 IDZebra::odr_reset($self->{odr_input});
108 IDZebra::odr_destroy($self->{odr_input});
109 $self->{odr_input} = undef;
112 if ($self->{odr_output}) {
113 IDZebra::odr_reset($self->{odr_output});
114 IDZebra::odr_destroy($self->{odr_output});
115 $self->{odr_output} = undef;
123 logf (LOG_LOG,"DESTROY $self");
126 if (defined ($self->{cql_ct})) {
127 IDZebra::cql_transform_close($self->{cql_ct});
130 # -----------------------------------------------------------------------------
131 # Record group selection This is a bit nasty... but used at many places
132 # -----------------------------------------------------------------------------
134 my ($self,%args) = @_;
136 $self->{rg} = $self->_makeRecordGroup(%args);
137 $self->_selectRecordGroup($self->{rg});
142 sub selectRecordGroup {
143 my ($self, $groupName) = @_;
144 $self->{rg} = $self->_getRecordGroup($groupName);
145 $self->_selectRecordGroup($self->{rg});
148 sub _displayRecordGroup {
149 my ($self, $rg) = @_;
150 print STDERR "-----\n";
151 foreach my $key qw (groupName
162 print STDERR "$key:",$rg->{$key},"\n";
166 sub _cloneRecordGroup {
167 my ($self, $orig) = @_;
168 my $rg = IDZebra::recordGroup->new();
169 my $r = IDZebra::init_recordGroup($rg);
170 foreach my $key qw (groupName
182 $rg->{$key} = $orig->{$key} if ($orig->{$key});
187 sub _getRecordGroup {
188 my ($self, $groupName, $ext) = @_;
189 my $rg = IDZebra::recordGroup->new();
190 my $r = IDZebra::init_recordGroup($rg);
191 $rg->{groupName} = $groupName if ($groupName ne "");
192 $ext = "" unless ($ext);
193 my $r = IDZebra::res_get_recordGroup($self->{zh}, $rg, $ext);
197 sub _makeRecordGroup {
198 my ($self, %args) = @_;
201 my @keys = keys(%args);
202 unless ($#keys >= 0) {
203 return ($self->{rg});
206 if ($args{groupName}) {
207 $rg = $self->_getRecordGroup($args{groupName});
209 $rg = $self->_cloneRecordGroup($self->{rg});
211 $self->_setRecordGroupOptions($rg, %args);
215 sub _setRecordGroupOptions {
216 my ($self, $rg, %args) = @_;
218 foreach my $key qw (databaseName
229 if (defined ($args{$key})) {
230 $rg->{$key} = $args{$key};
234 sub _selectRecordGroup {
235 my ($self, $rg) = @_;
236 my $r = IDZebra::set_group($self->{zh}, $rg);
238 unless ($dbName = $rg->{databaseName}) {
241 if ($self->select_databases($dbName)) {
242 croak("Fatal error selecting database $dbName");
245 # -----------------------------------------------------------------------------
246 # Selecting databases for search (and also for updating - internally)
247 # -----------------------------------------------------------------------------
248 sub select_databases {
249 my ($self, @databases) = @_;
252 foreach my $db (@databases) {
253 next if ($self->{databases}{$db});
259 delete ($self->{databases});
260 foreach my $db (@databases) {
261 $self->{databases}{$db}++;
264 if (my $res = IDZebra::select_databases($self->{zh},
268 "Could not select database(s) %s errCode=%d",
269 join(",",@databases),
273 logf(LOG_LOG,"Database(s) selected: %s",join(",",@databases));
279 # -----------------------------------------------------------------------------
281 # -----------------------------------------------------------------------------
284 return(IDZebra::errCode($self->{zh}));
289 return(IDZebra::errString($self->{zh}));
294 return(IDZebra::errAdd($self->{zh}));
297 # -----------------------------------------------------------------------------
299 # -----------------------------------------------------------------------------
302 IDZebra::begin_trans($self->{zh});
307 my $stat = IDZebra::ZebraTransactionStatus->new();
308 IDZebra::end_trans($self->{zh}, $stat);
314 return(IDZebra::begin_read($self->{zh}));
319 IDZebra::end_read($self->{zh});
323 my ($self, $value) = @_;
324 if ($#_ > 0) { IDZebra::set_shadow_enable($self->{zh},$value); }
325 return (IDZebra::get_shadow_enable($self->{zh}));
330 if ($self->shadow_enable) {
331 return(IDZebra::commit($self->{zh}));
335 # -----------------------------------------------------------------------------
336 # We don't really need that...
337 # -----------------------------------------------------------------------------
339 my ($self, $name) = @_;
340 if ($name !~/^(input|output)$/) {
341 croak("Undefined ODR '$name'");
343 IDZebra::odr_reset($self->{"odr_$name"});
346 # -----------------------------------------------------------------------------
348 # -----------------------------------------------------------------------------
351 return(IDZebra::init($self->{zh}));
356 return(IDZebra::compact($self->{zh}));
360 my ($self, %args) = @_;
361 my $rg = $self->_update_args(%args);
362 $self->_selectRecordGroup($rg);
364 IDZebra::repository_update($self->{zh});
365 $self->_selectRecordGroup($self->{rg});
370 my ($self, %args) = @_;
371 my $rg = $self->_update_args(%args);
372 $self->_selectRecordGroup($rg);
374 IDZebra::repository_delete($self->{zh});
375 $self->_selectRecordGroup($self->{rg});
380 my ($self, %args) = @_;
381 my $rg = $self->_update_args(%args);
382 $self->_selectRecordGroup($rg);
384 IDZebra::repository_show($self->{zh});
385 $self->_selectRecordGroup($self->{rg});
390 my ($self, %args) = @_;
391 my $rg = $self->_makeRecordGroup(%args);
392 $self->_selectRecordGroup($rg);
396 # -----------------------------------------------------------------------------
398 # -----------------------------------------------------------------------------
401 my ($self, %args) = @_;
402 return(IDZebra::update_record($self->{zh},
403 $self->_record_update_args(%args)));
407 my ($self, %args) = @_;
408 return(IDZebra::delete_record($self->{zh},
409 $self->_record_update_args(%args)));
411 sub _record_update_args {
412 my ($self, %args) = @_;
414 my $sysno = $args{sysno} ? $args{sysno} : 0;
415 my $match = $args{match} ? $args{match} : "";
416 my $rectype = $args{recordType} ? $args{recordType} : "";
417 my $fname = $args{file} ? $args{file} : "<no file>";
424 elsif ($args{file}) {
425 open (F, $args{file}) || warn ("Cannot open $args{file}");
426 $buff = join('',(<F>));
429 my $len = length($buff);
431 delete ($args{sysno});
432 delete ($args{match});
433 delete ($args{recordType});
434 delete ($args{file});
435 delete ($args{data});
437 my $rg = $self->_makeRecordGroup(%args);
439 # If no record type is given, then try to find it out from the
442 if (my ($ext) = $fname =~ /\.(\w+)$/) {
443 my $rg2 = $self->_getRecordGroup($rg->{groupName},$ext);
444 $rectype = $rg2->{recordType};
448 $rg->{databaseName} = "Default" unless ($rg->{databaseName});
450 # print STDERR "$rectype,$sysno,$match,$fname,$len\n";
454 return ($rg, $rectype, $sysno, $match, $fname, $buff, $len);
457 # -----------------------------------------------------------------------------
460 my ($self,$mapfile) = @_;
462 if ($self->{cql_mapfile} ne $mapfile) {
463 unless (-f $mapfile) {
464 croak("Cannot find $mapfile");
466 if (defined ($self->{cql_ct})) {
467 IDZebra::cql_transform_close($self->{cql_ct});
469 $self->{cql_ct} = IDZebra::cql_transform_open_fname($mapfile);
470 $self->{cql_mapfile} = $mapfile;
473 return ($self->{cql_mapfile});
477 my ($self, $cqlquery) = @_;
478 unless (defined($self->{cql_ct})) {
479 croak("CQL map file is not specified yet.");
481 my $res = "\0" x 2048;
482 my $r = IDZebra::cql2pqf($self->{cql_ct}, $cqlquery, $res, 2048);
483 unless ($r) {return (undef)};
489 # -----------------------------------------------------------------------------
491 # -----------------------------------------------------------------------------
493 my ($self, %args) = @_;
495 if ($args{cqlmap}) { $self->cqlmap($args{cqlmap}); }
502 unless ($query = $self->cql2pqf($args{cql})) {
503 croak ("Invalid CQL query: '$args{cql}'");
507 croak ("No query given to search");
510 my $rsname = $args{rsname} ? $args{rsname} : $self->_new_setname;
512 return ($self->_search_pqf($query, $rsname));
517 return ("set_".$self->{rscount}++);
521 my ($self, $query, $setname) = @_;
523 my $hits = IDZebra::search_PQF($self->{zh},
529 my $rs = IDZebra::Resultset->new($self,
531 recordCount => $hits,
532 errCode => $self->errCode,
533 errString => $self->errString);
538 my ($self, $query, $transfile) = @_;
543 my ($self, $query, $transfile) = @_;
546 # -----------------------------------------------------------------------------
549 # Sorting of multiple result sets is not supported by zebra...
550 # -----------------------------------------------------------------------------
553 my ($self, $sortspec, $setname, @sets) = @_;
557 foreach my $rs (@sets) {
558 push (@setnames, $rs->{name});
559 $count += $rs->{recordCount}; # is this really sure ??? It doesn't
563 my $status = IDZebra::sort($self->{zh},
569 my $errCode = $self->errCode;
570 my $errString = $self->errString;
572 if ($status || $errCode) {$count = 0;}
574 my $rs = IDZebra::Resultset->new($self,
576 recordCount => $count,
578 errString => $errString);
588 IDZebra::Session - A Zebra database server session for update and retrieval
592 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
595 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
601 Zebra is a high-performance, general-purpose structured text indexing and retrieval engine. It reads structured records in a variety of input formats (eg. email, XML, MARC) and allows access to them through exact boolean search expressions and relevance-ranked free-text queries.
603 Zebra supports large databases (more than ten gigabytes of data, tens of millions of records). It supports incremental, safe database updates on live systems. You can access data stored in Zebra using a variety of Index Data tools (eg. YAZ and PHP/YAZ) as well as commercial and freeware Z39.50 clients and toolkits.
605 =head1 OPENING AND CLOSING A ZEBRA SESSIONS
607 For the time beeing only local database services are supported, the same way as calling zebraidx or zebrasrv from the command shell. In order to open a local Zebra database, with a specific configuration file, use
609 $sess = IDZebra::Session->new(configFile => 'demo/zebra.cfg');
614 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg');
616 where $sess is going to be the object representing a Zebra Session. Whenever this variable gets out of scope, the session is closed, together with all active transactions, etc... Anyway, if you'd like to close the session, just say:
621 - close all transactions
622 - destroy all result sets
625 In the future different database access methods are going to be available,
628 $sess = IDZebra::Session->open(server => 'ostrich.technomat.hu:9999');
630 You can also use the B<record group> arguments described below directly when calling the constructor, or the open method:
632 $sess = IDZebra::Session->open(configFile => 'demo/zebra.cfg',
633 groupName => 'demo');
638 If you manage different sets of records that share common characteristics, you can organize the configuration settings for each type into "groups". See the Zebra manual on the configuration file (zebra.cfg).
640 For each open session a default record group is assigned. You can configure it in the constructor, or by the B<set_group> method:
642 $sess->group(groupName => ..., ...)
644 The following options are available:
650 This will select the named record group, and load the corresponding settings from the configuration file. All subsequent values will overwrite those...
652 =item B<databaseName>
654 The name of the (logical) database the updated records will belong to.
658 This path is used for directory updates (B<update>, B<delete> methods);
662 This option determines how to identify your records. See I<Zebra manual: Locating Records>
666 The record type used for indexing.
668 =item B<flagStoreData>
670 Specifies whether the records should be stored internally in the Zebra system files. If you want to maintain the raw records yourself, this option should be false (0). If you want Zebra to take care of the records for you, it should be true(1).
672 =item B<flagStoreKeys>
674 Specifies whether key information should be saved for a given group of records. If you plan to update/delete this type of records later this should be specified as 1; otherwise it should be 0 (default), to save register space.
680 =item B<fileVerboseLimit>
682 Skip log messages, when doing a directory update, and the specified number of files are processed...
684 =item B<databaseNamePath>
688 =item B<explainDatabase>
690 The name of the explain database to be used
694 Follow links when doing directory update.
698 You can use the same parameters calling all update methods.
700 =head1 TRANSACTIONS (WRITE LOCKS)
702 A transaction is a block of record update (insert / modify / delete) procedures. So, all call to such function will implicitly start a transaction, unless one is started by
706 For multiple per record updates it's efficient to start transactions explicitly: otherwise registers (system files, vocabularies, etc..) are updated one by one. After finishing all requested updates, use
708 $stat = $sess->end_trans;
710 The return value is a ZebraTransactionStatus object, containing the following members as a hash reference:
712 $stat->{processed} # Number of records processed
713 $stat->{updated} # Number of records processed
714 $stat->{deleted} # Number of records processed
715 $stat->{inserted} # Number of records processed
716 $stat->{stime} # System time used
717 $stat->{utime} # User time used
721 There are two ways to update data in a Zebra database using the perl API. You can update an entire directory structure just the way it's done by zebraidx:
723 $sess->update(path => 'lib');
725 This will update the database with the files in directory "lib", according to the current record group settings.
729 This will update the database with the files, specified by the default record group setting. I<path> has to be specified there...
731 $sess->update(groupName => 'demo1',
734 Update the database with files in "lib" according to the settings of group "demo1"
736 $sess->delete(groupName => 'demo1',
739 Delete the records derived from the files in directory "lib", according to the "demo1" group settings. Sounds complex? Read zebra documentation about identifying records.
741 You can also update records one by one, even directly from the memory:
743 $sysno = $sess->update_record(data => $rec1,
744 recordType => 'grs.perl.pod',
745 groupName => "demo1");
747 This will update the database with the given record buffer. Note, that in this case recordType is explicitly specified, as there is no filename given, and for the demo1 group, no default record type is specified. The return value is the system assigned id of the record.
749 You can also index a single file:
751 $sysno = $sess->update_record(file => "lib/IDZebra/Data1.pm");
753 Or, provide a buffer, and a filename (where filename will only be used to identify the record, if configured that way, and possibly to find out it's record type):
755 $sysno = $sess->update_record(data => $rec1,
756 file => "lib/IDZebra/Data1.pm");
758 And some crazy stuff:
760 $sysno = $sess->delete_record(sysno => $sysno);
762 where sysno in itself is sufficient to identify the record
764 $sysno = $sess->delete_record(data => $rec1,
765 recordType => 'grs.perl.pod',
766 groupName => "demo1");
768 This case the record is extracted, and if already exists, located in the database, then deleted...
770 $sysno = $sess->delete_record(data => $rec1,
772 recordType => 'grs.perl.pod',
773 groupName => "demo1");
775 Don't try this at home! This case, the record identifier string (which is normally generated according to the rules set in recordId directive of zebra.cfg) is provided directly....
778 B<Important:> Note, that one record can be updated only once within a transaction - all subsequent updates are skipped.
789 Peter Popovics, pop@technomat.hu
793 IDZebra, IDZebra::Data1, Zebra documentation