started re-modelling documentation to be less Z39.50- centric, GRS-centric and BIB1...

[idzebra-moved-to-github.git] / doc / administration.xml
diff --git a/doc/administration.xml b/doc/administration.xml

index b8c1f04..e5cdfcd 100644 (file)
--- a/doc/administration.xml
+++ b/doc/administration.xml
@@ -1,7 +1,13 @@
  <chapter id="administration">
- <!-- $Id: administration.xml,v 1.3 2002-04-09 13:26:26 adam Exp $ -->
+ <!-- $Id: administration.xml,v 1.20 2006-01-18 14:00:54 marc Exp $ -->
   <title>Administrating Zebra</title>
- 
+ <!-- ### It's a bit daft that this chapter (which describes half of
+          the configuration-file formats) is separated from
+          "recordmodel.xml" (which describes the other half) by the
+          instructions on running zebraidx and zebrasrv.  Some careful
+          re-ordering is required here.
+ -->
+
   <para>
    Unlike many simpler retrieval systems, Zebra supports safe, incremental
    updates to an existing index.
@@ -79,7 +85,7 @@
     Indexing is a per-record process, in which either insert/modify/delete
     will occur. Before a record is indexed search keys are extracted from
     whatever might be the layout the original record (sgml,html,text, etc..).
-   The Zebra system currently supports two fundamantal types of records:
+   The Zebra system currently supports two fundamental types of records:
     structured and simple text.
     To specify a particular extraction process, use either the
     command line option <literal>-t</literal> or specify a
@@ -99,8 +105,8 @@
    
    <para>
     You can edit the configuration file with a normal text editor.
-   parameter names and values are seperated by colons in the file. Lines
-   starting with a hash sign (<literal>&num;</literal>) are
+   parameter names and values are separated by colons in the file. Lines
+   starting with a hash sign (<literal>#</literal>) are
     treated as comments.
    </para>
    
@@ -146,13 +152,17 @@
     explained further in the following sections.
    </para>
    
+  <!--
+   FIXME - Didn't Adam make something to have multiple databases in multiple dirs...
+  -->
+  
    <para>
     <variablelist>
      
      <varlistentry>
       <term>
        <emphasis>group</emphasis>
-      .recordType&lsqb;<emphasis>.name</emphasis>&rsqb;:
+      .recordType[<emphasis>.name</emphasis>]:
        <replaceable>type</replaceable>
       </term>
       <listitem>
@@ -186,6 +196,7 @@
       <listitem>
        <para>
         Specifies the Z39.50 database name.
+       <!-- FIXME - now we can have multiple databases in one server. -H -->
        </para>
       </listitem>
      </varlistentry>
@@ -198,6 +209,7 @@
         group of records. If you plan to update/delete this type of
         records later this should be specified as 1; otherwise it
         should be 0 (default), to save register space.
+       <!-- ### this is the first mention of "register" -->
         See <xref linkend="file-ids"/>.
        </para>
       </listitem>
@@ -217,6 +229,7 @@
       </listitem>
      </varlistentry>
      <varlistentry>
+     <!-- ### probably a better place to define "register" -->
       <term>register: <replaceable>register-location</replaceable></term>
       <listitem>
        <para>
@@ -248,7 +261,7 @@
       <term>keyTmpDir: <replaceable>directory</replaceable></term>
       <listitem>
        <para>
-       Directory in which temporary files used during zebraidx' update
+       Directory in which temporary files used during zebraidx's update
         phase are stored. 
        </para>
       </listitem>
@@ -263,7 +276,7 @@
       </listitem>
      </varlistentry>
      <varlistentry>
-     <term>profilePath: <literal>path</literal></term>
+     <term>profilePath: <replaceable>path</replaceable></term>
       <listitem>
        <para>
         Specifies a path of profile specification files. 
@@ -292,6 +305,19 @@
         Specifies <replaceable>size</replaceable> of internal memory
         to use for the zebraidx program.
         The amount is given in megabytes - default is 4 (4 MB).
+       The more memory, the faster large updates happen, up to about
+       half the free memory available on the computer.
+      </para>
+     </listitem>
+    </varlistentry>
+    <varlistentry>
+     <term>tempfiles: <replaceable>Yes/Auto/No</replaceable></term>
+     <listitem>
+      <para>
+       Tells zebra if it should use temporary files when indexing. The
+       default is Auto, in which case zebra uses temporary files only
+       if it would need more that <replaceable>memMax</replaceable> 
+       megabytes of memory. This should be good for most uses.
        </para>
       </listitem>
      </varlistentry>
@@ -302,13 +328,69 @@
        <para>
         Specifies a directory base for Zebra. All relative paths
         given (in profilePath, register, shadow) are based on this
-       directory. This setting is useful if if you Zebra server
+       directory. This setting is useful if your Zebra server
         is running in a different directory from where
         <literal>zebra.cfg</literal> is located.
        </para>
       </listitem>
      </varlistentry>
  
+    <varlistentry>
+     <term>passwd: <replaceable>file</replaceable></term>
+     <listitem>
+      <para>
+       Specifies a file with description of user accounts for Zebra.
+       The format is similar to that known to Apache's htpasswd files
+       and UNIX' passwd files. Non-empty lines not beginning with
+       # are considered account lines. There is one account per-line.
+       A line consists of fields separate by a single colon character.
+       First field is username, second is password.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term>passwd.c: <replaceable>file</replaceable></term>
+     <listitem>
+      <para>
+       Specifies a file with description of user accounts for Zebra.
+       File format is similar to that used by the passwd directive except
+       that the password are encrypted. Use Apache's htpasswd or similar
+       for maintenanace.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term>perm.<replaceable>user</replaceable>:
+     <replaceable>permstring</replaceable></term>
+     <listitem>
+      <para>
+       Specifies permissions (priviledge) for a user that are allowed
+       to access Zebra via the passwd system. There are two kinds
+       of permissions currently: read (r) and write(w). By default
+       users not listed in a permission directive are given the read
+       priviledge. To specify permissions for a user with no
+       username, or Z39.50 anonymous style use
+       <literal>anonymous</literal>. The permstring consists of
+       a sequence of characters. Include character <literal>w</literal>
+       for write/update access, <literal>r</literal> for read access.
+      </para>
+     </listitem>
+    </varlistentry>
+
+    <varlistentry>
+      <term>dbaccess <replaceable>accessfile</replaceable></term>
+      <listitem>
+        <para>
+         Names a file which lists database subscriptions for individual users.
+         The access file should consists of lines of the form <literal>username:
+         dbnames</literal>, where dbnames is a list of database names, seprated by
+         '+'. No whitespace is allowed in the database list.
+       </para>
+      </listitem>
+    </varlistentry>
+
     </variablelist>
    </para>
    
@@ -318,14 +400,15 @@
    <title>Locating Records</title>
    
    <para>
-   The default behaviour of the Zebra system is to reference the
+   The default behavior of the Zebra system is to reference the
     records from their original location, i.e. where they were found when you
     ran <literal>zebraidx</literal>.
     That is, when a client wishes to retrieve a record
     following a search operation, the files are accessed from the place
     where you originally put them - if you remove the files (without
-   running <literal>zebraidx</literal> again, the client
-   will receive a diagnostic message.
+   running <literal>zebraidx</literal> again, the server will return
+   diagnostic number 14 (``System error in presenting records'') to
+   the client.
    </para>
    
    <para>
@@ -370,7 +453,7 @@
    <para>
     
     <screen>
-    profilePath: /usr/local/yaz
+    profilePath: /usr/local/idzebra/tab
      attset: bib1.att
      simple.recordType: text
      simple.database: textbase
@@ -418,7 +501,7 @@
     disk space than simpler indexing methods, but it makes it easier for
     you to keep the index in sync with a frequently changing set of data.
     If you combine this system with the <emphasis>safe update</emphasis>
-   facility (see below), you never have to take your server offline for
+   facility (see below), you never have to take your server off-line for
     maintenance or register updating purposes.
    </para>
    
@@ -428,9 +511,13 @@
     in the configuration file. In addition, you should set
     <literal>storeKeys</literal> to <literal>1</literal>, since the Zebra
     indexer must save additional information about the contents of each record
-   in order to modify the indices correctly at a later time.
+   in order to modify the indexes correctly at a later time.
    </para>
    
+   <!--
+    FIXME - There must be a simpler way to do this with Adams string tags -H
+     -->
+
    <para>
     For example, to update records of group <literal>esdd</literal>
     located below
@@ -466,13 +553,14 @@
     and then run <literal>zebraidx</literal> with the
     <literal>update</literal> command.
    </para>
+  <!-- ### what happens if a file contains multiple records? -->
  </sect1>
   
   <sect1 id="generic-ids">
    <title>Indexing with General Record IDs</title>
    
    <para>
-   When using this method you construct an (almost) arbritrary, internal
+   When using this method you construct an (almost) arbitrary, internal
     record key based on the contents of the record itself and other system
     information. If you have a group of records that explicitly associates
     an ID with each record, this method is convenient. For example, the
@@ -581,7 +669,7 @@
    </para>
    
    <para>
-   (see <xref linkend="data-model"/>
+   (see <xref linkend="grs-record-model"/>
      for details of how the mapping between elements of your records and
      searchable attributes is established).
    </para>
@@ -635,19 +723,22 @@
     each directory in the order specified and use the next specified
     directories as needed.
     The <emphasis>size</emphasis> is an integer followed by a qualifier
-   code, <literal>M</literal> for megabytes,
+   code, 
+   <literal>b</literal> for bytes,
     <literal>k</literal> for kilobytes.
+   <literal>M</literal> for megabytes,
+   <literal>G</literal> for gigabytes.
    </para>
    
    <para>
     For instance, if you have allocated two disks for your register, and
     the first disk is mounted
-   on <literal>/d1</literal> and has 200 Mb of free space and the
-   second, mounted on <literal>/d2</literal> has 300 Mb, you could
+   on <literal>/d1</literal> and has 2GB of free space and the
+   second, mounted on <literal>/d2</literal> has 3.6 GB, you could
     put this entry in your configuration file:
     
     <screen>
-    register: /d1:200M /d2:300M
+    register: /d1:2G /d2:3600M
     </screen>
     
    </para>
@@ -658,7 +749,7 @@
     your responsibility to ensure that enough space is available, and that
     other applications do not attempt to use the free space. In a large
     production system, it is recommended that you allocate one or more
-   filesystem exclusively to the Zebra register files.
+   file system exclusively to the Zebra register files.
    </para>
    
   </sect1>
@@ -764,14 +855,13 @@
      In order to make changes to the system take effect for the
      users, you'll have to submit a "commit" command after a
      (sequence of) update operation(s).
-    You can ask the indexer to commit the changes immediately
-    after the update operation:
     </para>
     
     <para>
      
      <screen>
-     $ zebraidx update /d1/records update /d2/more-records commit
+     $ zebraidx update /d1/records 
+     $ zebraidx commit
      </screen>
      
     </para>
@@ -783,7 +873,7 @@
     <para>
      
      <screen>
-     $ zebraidx -g books update /d1/records update /d2/more-records
+     $ zebraidx -g books update /d1/records  /d2/more-records
       $ zebraidx -g fun update /d3/fun-records
       $ zebraidx commit
      </screen>