Increase reckeys hash size from 1023 to 32767.

[idzebra-moved-to-github.git] / doc / recordmodel-grs.xml
diff --git a/doc/recordmodel-grs.xml b/doc/recordmodel-grs.xml

index a2c798e..68744b0 100644 (file)
--- a/doc/recordmodel-grs.xml
+++ b/doc/recordmodel-grs.xml
@@ -1,7 +1,6 @@
- <chapter id="record-model-grs">
-  <!-- $Id: recordmodel-grs.xml,v 1.3 2006-04-25 12:26:26 marc Exp $ -->
+ <chapter id="grs">
+  <!-- $Id: recordmodel-grs.xml,v 1.5 2006-10-11 12:37:23 adam Exp $ -->
    <title>GRS Record Model and Filter Modules</title>
-  
  
    <para>
     The record model described in this chapter applies to the fundamental,
@@ -11,7 +10,7 @@
    </para>
  
  
-  <sect1 id="grs-record-filters">
+  <section id="grs-filters">
     <title>GRS Record Filters</title>
     <para>
      Many basic subtypes of the <emphasis>grs</emphasis> type are
@@ -21,120 +20,116 @@
     <para>
      <variablelist>
       <varlistentry>
-      <term>grs.sgml</term>
+      <term><literal>grs.sgml</literal></term>
        <listitem>
         <para>
          This is the canonical input format
          described <xref linkend="grs-canonical-format"/>. It is using
          simple SGML-like syntax. 
         </para>
-       <!--
-       <para>
-         <literal>libidzebra1.4-mod-grs-sgml not packaged yet ??</literal>
-       </para>
-       -->
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.marc<!--.<emphasis>abstract syntax</emphasis>--></term>
+      <term><literal>grs.marc.</literal><replaceable>type</replaceable></term>
        <listitem>
         <para>
          This allows Zebra to read
          records in the ISO2709 (MARC) encoding standard. 
-        <!-- In this case, the
-        last parameter <emphasis>abstract syntax</emphasis> names the
+        Last parameter <replaceable>type</replaceable> names the
          <literal>.abs</literal> file (see below)
          which describes the specific MARC structure of the input record as
-        well as the indexing rules. -->
+        well as the indexing rules.
+       </para>
+       <para>The <literal>grs.marc</literal> uses an internal represtantion
+       which is not XML conformant. In particular MARC tags are
+       presented as elements with the same name. And XML elements
+       may not start with digits. Therefore this filter is only
+       suitable for systems returning GRS-1 and MARC records. For XML
+       use <literal>grs.marcxml</literal> filter instead (see below).
         </para>
         <para>
           The loadable <literal>grs.marc</literal> filter module
           is packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-marc</literal>
-        </para>
+        <literal>libidzebra2.0-mod-grs-marc</literal>
+       </para>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.marcxml<!--.<emphasis>abstract syntax</emphasis>--></term>
+      <term><literal>grs.marcxml.</literal><replaceable>type</replaceable></term>
        <listitem>
         <para>
-        This allows Zebra to read
-        records in the ISO2709??? (MARCXML) encoding standard.
+        This allows Zebra to read ISO2709 encoded records.
+        Last parameter <replaceable>type</replaceable> names the
+        <literal>.abs</literal> file (see below)
+        which describes the specific MARC structure of the input record as
+        well as the indexing rules.
         </para>
         <para>
-         The loadable <literal>grs.marcxml</literal> filter module
-         is also contained in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-marc</literal>
-        </para>
-      </listitem>
-     </varlistentry>
-     <varlistentry>
-      <term>grs.danbib</term>
-      <listitem>
-       <para>
-        The <literal>grs.danbib</literal> filter parses DanBib
-        records, a danish MARC record variant called DANMARC.
-        DanBib is the Danish Union Catalogue hosted by the
-        Danish Bibliographic Centre (DBC).
+       The internal representation for <literal>grs.marcxml</literal>
+       is the same as for <ulink url="&url.marcxml;">MARCXML</ulink>.
+       It slightly more complicated to work with than 
+       <literal>grs.marc</literal> but XML conformant.
         </para>
-       <para>The loadable  <literal>grs.danbib</literal> filter module
-         is packages in the GNU/Debian package 
-         <literal>libidzebra1.4-mod-grs-danbib</literal>.
+       <para>
+       The loadable <literal>grs.marcxml</literal> filter module
+       is also contained in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-marc</literal>
         </para>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.xml</term>
+      <term><literal>grs.xml</literal></term>
        <listitem>
         <para>
-        This filter reads XML records and uses <ulink url="http://expat.sourceforge.net/">Expat</ulink> to
+        This filter reads XML records and uses
+       <ulink url="http://expat.sourceforge.net/">Expat</ulink> to
          parse them and convert them into IDZebra's internal 
          <literal>grs</literal> record model.
-        Only one record per file
-        is supported. The filter is only available if Zebra/YAZ
-        is compiled with EXPAT support.
+        Only one record per file is supported, due to the fact XML does
+       not allow two documents to "follow" each other (there is no way
+       to know when a document is finished).
+       This filter is only available if Zebra is compiled with EXPAT support.
         </para>
         <para>
-         The loadable <literal>grs.xml</literal> filter module
-         is packagged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-xml</literal>
+       The loadable <literal>grs.xml</literal> filter module
+       is packagged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-xml</literal>
          </para>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.regx<!--.<emphasis>filter</emphasis>--></term>
+      <term><literal>grs.regx.</literal><replaceable>filter</replaceable></term>
        <listitem>
         <para>
          This enables a user-supplied Regular Expressions input
-        filter described in
-        <xref linkend="grs-regx-tcl"/>.
+        filter described in <xref linkend="grs-regx-tcl"/>.
         </para>
         <para>
-         The loadable  <literal>grs.regx</literal> filter module
-         is packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-regx</literal>
-        </para>
+       The loadable <literal>grs.regx</literal> filter module
+       is packaged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-regx</literal>
+       </para>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.tcl<!--.<emphasis>filter</emphasis>--></term>
+      <term><literal>grs.tcl.</literal><replaceable>filter</replaceable></term>
        <listitem>
         <para>
          Similar to grs.regx but using Tcl for rules, described in 
          <xref linkend="grs-regx-tcl"/>.
         </para>
         <para>
-         The loadable <literal>grs.tcl</literal> filter module
-         is also packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-regx</literal>
-        </para>
+       The loadable <literal>grs.tcl</literal> filter module
+       is also packaged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-regx</literal>
+       </para>
        </listitem>
       </varlistentry>
  
      </variablelist>
     </para>
  
-   <sect2 id="grs-canonical-format">
+   <section id="grs-canonical-format">
      <title>GRS Canonical Input Format</title>
  
      <para>
@@ -207,7 +202,7 @@
       structured data element such a <emphasis>Supplier</emphasis> element.
      </para>
  
-    <sect3>
+    <section id="grs-record-root">
       <title>Record Root</title>
  
       <para>
@@ -234,9 +229,9 @@
  
       </para>
  
-    </sect3>
+    </section>
  
-    <sect3><!-- ### we shouldn't make such a big deal about this -->
+    <section id="grs-variants">
       <title>Variants</title>
  
       <para>
@@ -272,7 +267,7 @@
        The available values for the <emphasis>class</emphasis> and
        <emphasis>type</emphasis> fields are given by the variant set
        that is associated with the current schema
-      (see <xref linkend="variant-set"/>).
+      (see <xref linkend="grs-variants"/>).
       </para>
  
       <para>
@@ -331,11 +326,11 @@
        of the end-user.
       </para>
  
-    </sect3>
+    </section>
  
-   </sect2>
+   </section>
  
-   <sect2 id="grs-regx-tcl">
+   <section id="grs-regx-tcl">
      <title>GRS REGX And TCL Input Filters</title>
  
      <para>
@@ -374,7 +369,7 @@
       <variablelist>
  
        <varlistentry>
-       <term>INIT</term>
+       <term><literal>INIT</literal></term>
         <listitem>
          <para>
           The action associated with this expression is evaluated
@@ -386,7 +381,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>BEGIN</term>
+       <term><literal>BEGIN</literal></term>
         <listitem>
          <para>
           Matches the beginning of the record. It can be used to
@@ -397,7 +392,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>END</term>
+       <term><literal>END</literal></term>
         <listitem>
          <para>
           Matches the end of the record - when all of the contents
@@ -406,15 +401,20 @@
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>/pattern/</term>
+       <term>
+       <literal>/</literal><replaceable>reg</replaceable><literal>/</literal>
+       </term>
         <listitem>
          <para>
-         Matches a string of characters from the input record.
+        Matches regular expression pattern <replaceable>reg</replaceable>
+        from the input record. The operators supported are the same
+        as for regular expression queries. Refer to 
+        <xref linkend="querymodel-regular"/>.
          </para>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>BODY</term>
+       <term><literal>BODY</literal></term>
         <listitem>
          <para>
           This keyword may only be used between two patterns.
@@ -423,7 +423,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>FINISH</term>
+       <term><literal>FINISH</literal></term>
         <listitem>
          <para>
           The expression associated with this pattern is evaluated
@@ -578,11 +578,11 @@
       mechanisms for modifying the elements of a record.
      </para>
  
-   </sect2>
+   </section>
  
-  </sect1>
+  </section>
  
-  <sect1 id="grs-internal-representation">
+  <section id="grs-internal-representation">
     <title>GRS Internal Record Representation</title>
  
     <para>
@@ -633,7 +633,7 @@
      different tag path.
     </para>
  
-   <sect2>
+   <section id="grs-tagged-elements">
      <title>Tagged Elements</title>
  
      <para>
@@ -650,9 +650,9 @@
       reached from the root of the record).
      </para>
  
-   </sect2>
+   </section>
  
-   <sect2>
+   <section id="grs-variant-details">
      <title>Variants</title>
  
      <para>
@@ -686,9 +686,9 @@
       type, value, corresponding to the variant mechanism of Z39.50.
      </para>
      
-   </sect2>
+   </section>
     
-   <sect2>
+   <section id="grs-data-elements">
      <title>Data Elements</title>
      
      <para>
@@ -702,11 +702,11 @@
     </para>
      -->
      
-   </sect2>
+   </section>
     
-  </sect1>
+  </section>
    
-  <sect1 id="record-model-grs-conf">
+  <section id="grs-conf">
     <title>GRS Record Model Configuration</title>
     
     <para>
@@ -717,7 +717,7 @@
      setting in the <literal>zebra.cfg</literal> file.
     </para>
  
-   <sect2>
+   <section id="grs-abstract-syntax">
      <title>The Abstract Syntax</title>
  
      <para>
@@ -810,9 +810,9 @@
       describe the given objects.
      </para>
  
-   </sect2>
+   </section>
  
-   <sect2>
+   <section id="grs-configuration-files">
      <title>The Configuration Files</title>
  
      <para>
@@ -841,9 +841,9 @@
       mandatory (m).
      </para>
      
-   </sect2>
+   </section>
     
-   <sect2 id="abs-file">
+   <section id="abs-file">
      <title>The Abstract Syntax (.abs) Files</title>
      
      <para>
@@ -954,7 +954,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>any <replaceable>tags</replaceable></term>
+       <term>all <replaceable>tags</replaceable></term>
         <listitem>
          <para>
           (o) This directive specifies a list of attributes
@@ -981,16 +981,16 @@
           the <replaceable>attributes</replaceable>
           specifies which attributes to use when indexing the element in a
           comma-separated list.
-         A ! in place of the attribute name is equivalent to
-         specifying an attribute name identical to the element name.
-         A - in place of the attribute name
+         A <literal>!</literal> in place of the attribute name is equivalent
+        to specifying an attribute name identical to the element name.
+         A <literal>-</literal> in place of the attribute name
           specifies that no indexing is to take place for the given element.
           The attributes can be qualified with <replaceable>field
            types</replaceable> to specify which
           character set should govern the indexing procedure for that field.
           The same data element may be indexed into several different
           fields, using different character set definitions.
-         See the <xref linkend="field-structure-and-character-sets"/>.
+         See the <xref linkend="fields-and-charsets"/>.
           The default field type is <literal>w</literal> for
           <emphasis>word</emphasis>.
          </para>
@@ -1208,9 +1208,9 @@
  
      </para>
  
-   </sect2>
+   </section>
  
-   <sect2 id="attset-files">
+   <section id="attset-files">
      <title>The Attribute Set (.att) Files</title>
  
      <para>
@@ -1294,9 +1294,9 @@
  
      </para>
  
-   </sect2>
+   </section>
  
-   <sect2>
+   <section id="grs-tag-files">
      <title>The Tag Set (.tag) Files</title>
  
      <para>
@@ -1452,9 +1452,9 @@
       </screen>
      </para>
  
-   </sect2>
+   </section>
  
-   <sect2 id="variant-set">
+   <section id="grs-var-files">
      <title>The Variant Set (.var) Files</title>
  
      <para>
@@ -1533,9 +1533,9 @@
  
      </para>
  
-   </sect2>
+   </section>
  
-   <sect2>
+   <section id="grs-est-files">
      <title>The Element Set (.est) Files</title>
  
      <para>
@@ -1673,9 +1673,9 @@
  
      </para>
  
-   </sect2>
+   </section>
  
-   <sect2 id="schema-mapping">
+   <section id="schema-mapping">
      <title>The Schema Mapping (.map) Files</title>
  
      <para>
@@ -1737,9 +1737,9 @@
       </variablelist>
      </para>
  
-   </sect2>
+   </section>
  
-   <sect2>
+   <section id="grs-mar-files">
      <title>The MARC (ISO2709) Representation (.mar) Files</title>
  
      <para>
@@ -1754,253 +1754,10 @@
        handled by the system.</emphasis>
      -->
  
-   </sect2>
-
-   <sect2 id="field-structure-and-character-sets">
-    <title>Field Structure and Character Sets
-    </title>
-
-    <para>
-     In order to provide a flexible approach to national character set
-     handling, Zebra allows the administrator to configure the set up the
-     system to handle any 8-bit character set &mdash; including sets that
-     require multi-octet diacritics or other multi-octet characters. The
-     definition of a character set includes a specification of the
-     permissible values, their sort order (this affects the display in the
-     SCAN function), and relationships between upper- and lowercase
-     characters. Finally, the definition includes the specification of
-     space characters for the set.
-    </para>
-
-    <para>
-     The operator can define different character sets for different fields,
-     typical examples being standard text fields, numerical fields, and
-     special-purpose fields such as WWW-style linkages (URx).
-    </para>
-
-    <sect3 id="default-idx-file">
-     <title>The default.idx file</title>
-     <para>
-      The field types, and hence character sets, are associated with data
-      elements by the .abs files (see above).
-      The file <literal>default.idx</literal>
-      provides the association between field type codes (as used in the .abs
-      files) and the character map files (with the .chr suffix). The format
-      of the .idx file is as follows
-     </para>
-
-     <para>
-      <variablelist>
-
-       <varlistentry>
-       <term>index <emphasis>field type code</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a new search index code.
-         The argument is a one-character code to be used in the
-         .abs files to select this particular index type. An index, roughly,
-         corresponds to a particular structure attribute during search. Refer
-         to <xref linkend="search"/>.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>sort <emphasis>field code type</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a 
-         sort index. The argument is a one-character code to be used in the
-         .abs fie to select this particular index type. The corresponding
-         use attribute must be used in the sort request to refer to this
-         particular sort index. The corresponding character map (see below)
-         is used in the sort process.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>completeness <emphasis>boolean</emphasis></term>
-       <listitem>
-        <para>
-         This directive enables or disables complete field indexing.
-         The value of the <emphasis>boolean</emphasis> should be 0
-         (disable) or 1. If completeness is enabled, the index entry will
-         contain the complete contents of the field (up to a limit), with words
-         (non-space characters) separated by single space characters
-         (normalized to " " on display). When completeness is
-         disabled, each word is indexed as a separate entry. Complete subfield
-         indexing is most useful for fields which are typically browsed (eg.
-         titles, authors, or subjects), or instances where a match on a
-         complete subfield is essential (eg. exact title searching). For fields
-         where completeness is disabled, the search engine will interpret a
-         search containing space characters as a word proximity search.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>charmap <emphasis>filename</emphasis></term>
-       <listitem>
-        <para>
-         This is the filename of the character
-         map to be used for this index for field type.
-        </para>
-       </listitem></varlistentry>
-      </variablelist>
-     </para>
-    </sect3>
-
-    <sect3 id="character-map-files">
-     <title>The character map file format</title>
-     <para>
-      The contents of the character map files are structured as follows:
-     </para>
+   </section>
+  </section>
  
-     <para>
-      <variablelist>
-
-       <varlistentry>
-       <term>lowercase <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the basic value set of the field type.
-         The format is an ordered list (without spaces) of the
-         characters which may occur in "words" of the given type.
-         The order of the entries in the list determines the
-         sort order of the index. In addition to single characters, the
-         following combinations are legal:
-        </para>
-
-        <para>
-
-         <itemizedlist>
-          <listitem>
-           <para>
-            Backslashes may be used to introduce three-digit octal, or
-            two-digit hex representations of single characters
-            (preceded by <literal>x</literal>).
-            In addition, the combinations
-            \\, \\r, \\n, \\t, \\s (space &mdash; remember that real
-            space-characters may not occur in the value definition), and
-            \\ are recognized, with their usual interpretation.
-           </para>
-          </listitem>
-
-          <listitem>
-           <para>
-            Curly braces {} may be used to enclose ranges of single
-            characters (possibly using the escape convention described in the
-            preceding point), eg. {a-z} to introduce the
-            standard range of ASCII characters.
-            Note that the interpretation of such a range depends on
-            the concrete representation in your local, physical character set.
-           </para>
-          </listitem>
-
-          <listitem>
-           <para>
-            paranthesises () may be used to enclose multi-byte characters -
-            eg. diacritics or special national combinations (eg. Spanish
-            "ll"). When found in the input stream (or a search term),
-            these characters are viewed and sorted as a single character, with a
-            sorting value depending on the position of the group in the value
-            statement.
-           </para>
-          </listitem>
-
-         </itemizedlist>
-
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>uppercase <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the
-         upper-case equivalencis to the value set (if any). The number and
-         order of the entries in the list should be the same as in the
-         <literal>lowercase</literal> directive.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>space <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the character
-         which separate words in the input stream. Depending on the
-         completeness mode of the field in question, these characters either
-         terminate an index entry, or delimit individual "words" in
-         the input stream. The order of the elements is not significant &mdash;
-         otherwise the representation is the same as for the
-         <literal>uppercase</literal> and <literal>lowercase</literal>
-         directives.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>map <emphasis>value-set</emphasis>
-        <emphasis>target</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a mapping between each of the
-         members of the value-set on the left to the character on the
-         right. The character on the right must occur in the value
-         set (the <literal>lowercase</literal> directive) of the
-         character set, but it may be a paranthesis-enclosed
-         multi-octet character. This directive may be used to map
-         diacritics to their base characters, or to map HTML-style
-         character-representations to their natural form, etc. The
-         map directive can also be used to ignore leading articles in
-         searching and/or sorting, and to perform other special
-         transformations. See section <xref
-         linkend="leading-articles"/>.
-        </para>
-       </listitem></varlistentry>
-      </variablelist>
-     </para>
-    </sect3>
-    <sect3 id="leading-articles">
-     <title>Ignoring leading articles</title>
-     <para>
-      In addition to specifying sort orders, space (blank) handling,
-      and upper/lowercase folding, you can also use the character map
-      files to make Zebra ignore leading articles in sorting records,
-      or when doing complete field searching.
-     </para>
-     <para>
-      This is done using the <literal>map</literal> directive in the
-      character map file. In a nutshell, what you do is map certain
-      sequences of characters, when they occur <emphasis> in the
-      beginning of a field</emphasis>, to a space. Assuming that the
-      character "@" is defined as a space character in your file, you
-      can do:
-      <screen>
-       map (^The\s) @
-       map (^the\s) @
-      </screen>
-      The effect of these directives is to map either 'the' or 'The',
-      followed by a space character, to a space. The hat ^ character
-      denotes beginning-of-field only when complete-subfield indexing
-      or sort indexing is taking place; otherwise, it is treated just
-      as any other character.
-     </para>
-     <para>
-      Because the <literal>default.idx</literal> file can be used to
-      associate different character maps with different indexing types
-      -- and you can create additional indexing types, should the need
-      arise -- it is possible to specify that leading articles should
-      be ignored either in sorting, in complete-field searching, or
-      both.
-     </para>
-     <para>
-      If you ignore certain prefixes in sorting, then these will be
-      eliminated from the index, and sorting will take place as if
-      they weren't there. However, if you set the system up to ignore
-      certain prefixes in <emphasis>searching</emphasis>, then these
-      are deleted both from the indexes and from query terms, when the
-      client specifies complete-field searching. This has the effect
-      that a search for 'the science journal' and 'science journal'
-      would both produce the same results.
-     </para>
-    </sect3>
-   </sect2>
-  </sect1>
-
-  <sect1 id="grs-exchange-formats">
+  <section id="grs-exchange-formats">
     <title>GRS Exchange Formats</title>
  
     <para>
@@ -2085,8 +1842,326 @@
     
      </itemizedlist>
     </para>
-  </sect1>
+  </section>
+  
+  <section id="grs-extended-marc-indexing">
+   <title>Extended indexing of MARC records</title>
+   
+   <para>Extended indexing of MARC records will help you if you need index a
+    combination of subfields, or index only a part of the whole field,
+    or use during indexing process embedded fields of MARC record.
+   </para>
+   
+   <para>Extended indexing of MARC records additionally allows:
+    <itemizedlist>
+     
+     <listitem>
+      <para>to index data in LEADER of MARC record</para>
+     </listitem>
+     
+     <listitem>
+      <para>to index data in control fields (with fixed length)</para>
+     </listitem>
+     
+     <listitem>
+      <para>to use during indexing the values of indicators</para>
+     </listitem>
+     
+     <listitem>
+      <para>to index linked fields for UNIMARC based formats</para>
+     </listitem>
+     
+    </itemizedlist>
+   </para>
+   
+   <note><para>In compare with simple indexing process the extended indexing
+     may increase (about 2-3 times) the time of indexing process for MARC
+     records.</para></note>
+   
+   <section id="formula">
+    <title>The index-formula</title>
+    
+    <para>At the beginning, we have to define the term
+     <emphasis>index-formula</emphasis> for MARC records. This term helps
+     to understand the notation of extended indexing of MARC records by Zebra.
+     Our definition is based on the document
+     <ulink url="http://www.rba.ru/rusmarc/soft/Z39-50.htm">"The table
+      of conformity for Z39.50 use attributes and RUSMARC fields"</ulink>.
+     The document is available only in russian language.</para>
+    
+    <para>
+     The <emphasis>index-formula</emphasis> is the combination of
+     subfields presented in such way:
+    </para>
+    
+    <screen>
+     71-00$a, $g, $h ($c){.$b ($c)} , (1)
+    </screen>
+    
+    <para>
+     We know that Zebra supports a Bib-1 attribute - right truncation.
+     In this case, the <emphasis>index-formula</emphasis> (1) consists from 
+     forms, defined in the same way as (1)</para>
+    
+    <screen>
+     71-00$a, $g, $h
+     71-00$a, $g
+     71-00$a
+    </screen>
+    
+    <note>
+     <para>The original MARC record may be without some elements, which included in <emphasis>index-formula</emphasis>.
+     </para>
+    </note>
+    
+    <para>This notation includes such operands as:
+     <variablelist>
+      
+      <varlistentry>
+       <term>#</term>
+       <listitem><para>It means whitespace character.</para></listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>-</term>
+       <listitem><para>The position may contain any value, defined by
+        MARC format.
+        For example, <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        70-#1$a, $g , (2)
+       </screen>
+       
+       <para>includes</para> 
+       
+       <screen>
+        700#1$a, $g
+        701#1$a, $g
+        702#1$a, $g
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>{...}</term>
+       <listitem>
+       <para>The repeatable elements are defined in figure-brackets {}.
+        For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        71-00$a, $g, $h ($c){.$b ($c)} , (3)
+       </screen>
+       
+       <para>includes</para>
+       
+       <screen>
+        71-00$a, $g, $h ($c). $b ($c)
+        71-00$a, $g, $h ($c). $b ($c). $b ($c)
+        71-00$a, $g, $h ($c). $b ($c). $b ($c). $b ($c)
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+     </variablelist>
+     
+     <note>
+      <para>
+       All another operands are the same as accepted in MARC world.
+      </para>
+     </note>
+    </para>
+   </section>
+   
+   <section id="notation">
+    <title>Notation of <emphasis>index-formula</emphasis> for Zebra</title>
+    
+    
+    <para>Extended indexing overloads <literal>path</literal> of
+     <literal>elm</literal> definition in abstract syntax file of Zebra
+     (<literal>.abs</literal> file). It means that names beginning with
+     <literal>"mc-"</literal> are interpreted by Zebra as
+     <emphasis>index-formula</emphasis>. The database index is created and
+     linked with <emphasis>access point</emphasis> (Bib-1 use attribute)
+     according to this formula.</para>
+    
+    <para>For example, <emphasis>index-formula</emphasis></para>
+    
+    <screen>
+     71-00$a, $g, $h ($c){.$b ($c)} , (4)
+    </screen>
+    
+    <para>in <literal>.abs</literal> file looks like:</para>
+    
+    <screen>
+     mc-71.00_$a,_$g,_$h_(_$c_){.$b_(_$c_)}
+    </screen>
+    
+    
+    <para>The notation of <emphasis>index-formula</emphasis> uses the operands:
+     <variablelist>
+      
+      <varlistentry>
+       <term>_</term>
+       <listitem><para>It means whitespace character.</para></listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>.</term>
+       <listitem><para>The position may contain any value, defined by
+        MARC format. For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        70-#1$a, $g , (5)
+       </screen>
+       
+       <para>matches <literal>mc-70._1_$a,_$g_</literal> and includes</para>
+       
+       <screen>
+        700_1_$a,_$g_
+        701_1_$a,_$g_
+        702_1_$a,_$g_
+       </screen>
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>{...}</term>
+       <listitem><para>The repeatable elements are defined in
+        figure-brackets {}. For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        71#00$a, $g, $h ($c) {.$b ($c)} , (6)
+       </screen>
+       
+       <para>matches 
+        <literal>mc-71.00_$a,_$g,_$h_(_$c_){.$b_(_$c_)}</literal> and
+        includes</para>
+       
+       <screen>
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_)
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_).$b_(_$c_)
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_).$b_(_$c_).$b_(_$c_)
+       </screen>
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>&#60;...&#62;</term>
+       <listitem><para>Embedded <emphasis>index-formula</emphasis> (for
+        linked fields) is between &#60;&#62;. For example,
+        <emphasis>index-formula</emphasis>
+       </para>
+       
+       <screen>
+        4--#-$170-#1$a, $g ($c) , (7)
+       </screen>
+       
+       <para>matches
+        <literal>mc-4.._._$1&#60;70._1_$a,_$g_(_$c_)&#62;_</literal> and
+        includes</para>
+       
+       <screen>
+        463_._$1&#60;70._1_$a,_$g_(_$c_)&#62;_
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+     </variablelist>
+    </para>
+    
+    <note>
+     <para>All another operands are the same as accepted in MARC world.</para>
+    </note>
+    
+    <section id="grs-examples">
+     <title>Examples</title>
+     
+     <para>
+      <orderedlist>
+       
+       <listitem>
+       
+       <para>indexing LEADER</para>
+       
+       <para>You need to use keyword "ldr" to index leader. For example,
+        indexing data from 6th and 7th position of LEADER</para>
+       
+       <screen>
+        elm mc-ldr[6] Record-type !
+        elm mc-ldr[7] Bib-level   !
+       </screen>
+       
+       </listitem>
+       
+       <listitem>
+       
+       <para>indexing data from control fields</para>
+       
+       <para>indexing date (the time added to database)</para>
+       
+       <screen>
+        elm mc-008[0-5] Date/time-added-to-db !        
+       </screen>
+       
+       <para>or for RUSMARC (this data included in 100th field)</para>
+       
+       <screen>
+        elm mc-100___$a[0-7]_ Date/time-added-to-db !
+       </screen>
+       
+       </listitem>
+       
+       <listitem>
+       
+       <para>using indicators while indexing</para>
+
+       <para>For RUSMARC <emphasis>index-formula</emphasis>
+        <literal>70-#1$a, $g</literal> matches</para>
+       
+       <screen>
+        elm 70._1_$a,_$g_ Author !:w,!:p
+       </screen>
+       
+       <para>When Zebra finds a field according to 
+        <literal>"70."</literal> pattern it checks the indicators. In this
+        case the value of first indicator doesn't mater, but the value of
+        second one must be whitespace, in another case a field is not 
+        indexed.</para>
+       </listitem>
+       
+       <listitem>
+       
+       <para>indexing embedded (linked) fields for UNIMARC based
+        formats</para>
+       
+       <para>For RUSMARC <emphasis>index-formula</emphasis> 
+        <literal>4--#-$170-#1$a, $g ($c)</literal> matches</para>
+       
+       <screen><![CDATA[
+        elm mc-4.._._$1<70._1_$a,_$g_(_$c_)>_ Author !:w,!:p
+        ]]></screen>
+       
+       <para>Data are extracted from record if the field matches to
+        <literal>"4.._."</literal> pattern and data in linked field
+        match to embedded
+        <emphasis>index-formula</emphasis>
+        <literal>70._1_$a,_$g_(_$c_)</literal>.</para>
+       
+       </listitem>
+       
+      </orderedlist>
+     </para>
+     
+     
+    </section>
+   </section>
  
+  </section>
+  
   </chapter>
   <!-- Keep this comment at the end of the file
   Local variables: