From a839681fa3f873ffab21dc15390f1c9fad29c659 Mon Sep 17 00:00:00 2001 From: dennis Date: Thu, 5 Aug 2010 14:33:42 +0200 Subject: [PATCH] Tools for indexing LOC to SOLR server post.jar: java client for POSTing loc.sh: script to convert from gz'ed marc records to turbomarc, parpar2 (internal) format to SOLR document format, and then POSTing it to a SOLR server Ignore temporary files. --- solr/.gitignore | 3 +++ solr/loc.sh | 63 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ solr/post.jar | Bin 0 -> 6004 bytes 3 files changed, 66 insertions(+) create mode 100644 solr/.gitignore create mode 100755 solr/loc.sh create mode 100644 solr/post.jar diff --git a/solr/.gitignore b/solr/.gitignore new file mode 100644 index 0000000..41134d6 --- /dev/null +++ b/solr/.gitignore @@ -0,0 +1,3 @@ +index.log +part*.dat.gz.* +data \ No newline at end of file diff --git a/solr/loc.sh b/solr/loc.sh new file mode 100755 index 0000000..e561896 --- /dev/null +++ b/solr/loc.sh @@ -0,0 +1,63 @@ +#!/bin/bash +LOG=index.log +MARCDUMP="yaz-marcdump" + +if [ "$SOLR_URL" == "" ] ; then + DEF_HOST=-Durl="http://localhost:8983/solr/update" +else + DEF_HOST=-Durl="$SOLR_URL" +fi + +if [ -d "./data" ] ; then + LOCDATA="./data" +else + LOCDATA=/extra/heikki/locdata +fi + +if [ ! -d "$LOCDATA" ] ; then + echo "$LOCDATA not a directory" + exit 1 +fi + +if [ "$1" == "" ] ; then + FILES="$LOCDATA/part*" +else + FILES="$*" +fi +#echo $FILES + +rm -f $LOG + +function convert() +{ + FILE=$2 + echo "zcat $1 > $FILE.mrc" + zcat $1 > $FILE.mrc + $MARCDUMP -f marc8 -t utf-8 -o turbomarc $FILE.mrc > $FILE.xml + xsltproc ../test/tmarc.xsl $FILE.xml > $FILE.pz + xsltproc ../etc/pz2-solr.xsl $FILE.pz > $FILE.solr + ls -l $FILE.* >> $LOG +} + +if [ "$TWO_PASS" == "1" ] ; then + for d in ${FILES} ; do + date "+%c converting $d" >>$LOG + FILE=`basename $1` + convert $d $FILE + done +fi + +for d in ${FILES} ; do + date "+%c converting $d" >>$LOG + BASE=`basename $d` + FILE=$BASE.solr + if [ ! -f "$FILE" ] ; then + convert $d $BASE + fi + date "+%c indexing $d" >>$LOG + java $DEF_HOST -jar post.jar $FILE + date "+%c indexing $d ended" >>$LOG + #rm tmp.* +done +date "+%c All done" >>$LOG +exit 0 diff --git a/solr/post.jar b/solr/post.jar new file mode 100644 index 0000000000000000000000000000000000000000..c1aff380fb3c56397aeb1fee1e6ac7e13b95583a GIT binary patch literal 6004 zcma)=WmuH!x5kI=1_{|nNjP*0NW;)Dv@}DHAVYT}DJ>u&ozgKN-ALyk(#R&H5gb7J z=+1w*&pGS8-sfE(?t88Eyx02l1FN8+;{tv}tIX|_-;zHL8~{2%OkJ&kDRh6c8tfrJeP^2rK@h6!E_cU11I`|BIU9Cu(;$ zsKZ|tY`~8OTxIC2n5X~%%J)Hq|H+~aeeUF74Tib8>A+wP%s&!&FH37DHz>@J%hCbj z>YAc$q(LT4ep~$Rz&fuXh;5~eeA$s*MLwrYA~GROAazpa*L62BXoQ2Ou&?G_YE-Hm zSzwUluvc*cHo2Proa^|TBjXPK`p=)MVUhrW<0lcQhxT3x3aAth>1>YKBHVR8K$u<* zC`%5>M&#uccUuk0F;VNiqZV2)<$*U%93JPKx`GV!29!WlDAWWCE($gSRuh zO(8pw1}yOJk#rhaJDl7F1-nI1innWi^55`RfIl0+e`^Axr;2{Q6YY82#2u2|LpA(- zVr!h2s8BRVOgmbnH)I!nK6EdDS^Twh;9d*koVynlZiwd=gF(BFECDq(UD>lugG+p9 z<^Til(`FCSaKDc)(Tw(0n~mQ7xFQwE_|oRN33P5J2V3mYw%1`JtC=b_c3vu)r1Z?b9K=M|_Dnil@Rb79^?_Aw=y4B~0@lfczcWgs-~x~qQmxWn^y^=hSryYuWzft(v}r%FK}k*1)( zZb5a}_8v0U8;S3)U0^GVyIvRZgdJ>3tXmyBwsJT7Q8G3bz)E7h1(Id6Bn9= z+x;TWZPb~RLMiAY&(b-&ts5IaR(MC&A5cDZBr~H}4Mnzq&h1rmA%rer?^C4D!tBbw z%nb*`qMh7N_E%f?KDi$fY+U60bp3*c8z5_1ddwJ0X-^v**2XO7?Sn$|6@y0KzD;0o zM8=Jpo5(rYKEyBYJy2$*7r2IFO0n^QW{;sIm@V{aN;oormg zu&r&vqD*oDxEv8yv89!@q~ zSo}HC4FZ7-90o7TiXX>~=ZTvvXWhu~HPp4jV0ZXQwC|~6DshViwD*ZI>~Z5PF{9GG>LW`=R@aCUc_dZ(T7~#C76$uA(Zn=^ z3>7w{NE20x3(JHfI%Qk&`G|eE&>QnR=ex(G<3=EDWwSf3Ex zkE11m7gxd&m>aWWeMzKAgK=@iYfBH^wYPlpbQb4BUIER0r}j-sb&=eNjF$8c;J~nO zg(6xblv%Ac*)qp)o@7HMbm{Q;Am~srwPjvz^Ko^WkbP&CdtH=+4W>styOtL#FjW;z zxXK~)O4ZUW0x+F{B46(IDgq*{#l8ugOYg&fdqvBN6_Iaa5cw)%*;o|I6&O$MRQD^- z{z=p(Pqkp;F{+jU>u$tlJ%$*hCEJ*fY;24X?Wx$a1@A8(xE)u>izg86k&fEKqZ=Vh zBwOk$WE<^}OVp$FXn!<7Mcf^Mc=gjAQb@1zZo;!&!n9pQXA|5<<^_xId2fcZ9UVT& zNhUo?9$8SYue#T7E)IEmNuo`8a0(FqL3KbzY zhiz{Ry?xoR0;png{zA+?uiQsJj>0{Qx|JD*I?tB;j4FNy6YvyAX!zE8LArq(qjrwG z{$hpzf3am_L-DKUMnVmZpGq3~jmoq7>5rHZvmBU}K9Am7^=BnR_Y8#FAxWEa^QxGs z52z7W17HBi9l&k7piZ+;Tq#$a@STky9A^gI0Bi3ICEG(Dh@wn$<5ZV#!D}J zyzs=41$WS+&z#U z{OSI*jA-bK*IzaG1xE$kRjb2D8&F+=dtFfqv8tkPL#z&yZ}R=^cwARD_U>qisOuQLJydAI~X%iJYWM3d$~W-Tt~jpz@@8Sj#Y3 zUm#L)S@t~qLYYp^R-1A3tjhddc+?7S`@O2l53dXmvH4a)89;-r1s=07ALXfZs;Hd^ zkMq-ftduWY8SPkKZMYAlZA-!4Y1M=XHKc;-M&jVc7B$P79Ma&@U!z>upXWFLfA?jBnKIxWp zo`&FKPwcdVww-;qsR^hk#rM+wn%xG=p_wHXJkf)%cl;6*Mp_iA?m5TL#L*i;#$D9a zm%HE|Tn$o2CR21Sp$;QgAii;-!qgr(VM!KaL~HXs z3i#%-%12~l5&`jv#)C)<5A^sMchz`ZM%7IWmV8mg`I@HFEsxL@4cHJ>lva{b>4Ddg zTW|ZGTjA-|F}v)&yuzwrf8*g}&c$FPaCJ}W+-4<+<*{V>XZNC`(#~gF)Vc*KnN2t5 ze9L{D?Da8}QtCF;k<~Q3Sr);z3%9ePr!E?Af@E0vYTp_cNZ1aw?s<&nbJJX~ATwTZ zitdKA)Rm3q9540ZR1bC+5V%`sE}v+;q+jTg_|TPaAGR3!MVFiFml&Okc%shkdURNY zGJpi?NcTD-yt+!|8)x>ZRYC&>f2Mzs&MuBz-`>hJ;z(^>Frlt~U}I`8S|+)2no&pF zE$8FsQM)b5JaQyTq|M5X^lEAtWm(NkVyy23li9LvS&IJ4;dC~xG}^(;gZ%m@(!9%A zLss#Su|t{pS^^C>tBfmw%X>YYMGrC4&1Req5k<9C*XkEw^=5B%UtaA`sHI?3d~f-W zo!wvUyuI0?;18{Z%)ds??awyQ@#hGGh_}FeSsG0N3j2G6@9?~?cP}v0AS*?=>kr1s zPuRPL6C3{-^3q{XLs{60sCwb#-9kHi# z4_voD{Q4oSv)alu{^H7ZsvxxQ`b9=yf}hKkC2g@casbJ&@0V6u7k!;Zw)$YrRY!CF z%XSu=Lu($~3=Q$~1b@T7U<$^;&E7gxn#N@M%9!V>dVpooTmaTR;+j(3xaqH+LSRwt_=T&%vSeO|I^u-#XICs==47h|s^$OEJf{lws@TCEfB?_0qjkLY=FlLN!e@ zcXMry$4d52HqTWTA1m!)7IfFcCCbz$F)!LUwS41t)zQ7KPfOmv71KIWw&xy(rU6R^ z$FV$OLv8IW3wJmK75saq^f#!WQO4xZU^DVo!NixgwB#*nh`x0=;}%eP301mo^Ws9F zrzM;#e@r09abxjU7A6C`9^Fi>&d+;cbKARUK&ZV@!G-JXnlb=3GJW(Z;9ArKG9)OtB(lZrqGoU zaij#?^+=)h1Z8Q1^&9AtepOERnA1)SR$`R97IDG6JZihGH%Nbp%^5_e!h3}+aQ8${ zLirs^X*Gj^1n2=P!Sw8Oj+8}0hstwg2@+>NAf794QTlTUCG`FcH3&t~G*A5-JuFdJ zRB2>;Vu$bzpe?vvBj}Fi_HmXR4bSP<1|<6>-k|uX{&WdL0qK<&6$B#+<;pKX8F@I| zNU`Ymp#x;jA*^JCb-&y{b9}lnt&<8aH6tkv>kMtYw+=V?^+kh9F$&eGLL#V^EQO=9 ze{PmsWB-eKQ{r>+HMDJsmJ_;^X3|NuLBkLO3LXyp2)VWB1}7P`(W9WmWgdP)vg&Xp z@5z{MV&^XsX~ae4+x$T<9L_ z1=A2|qGMExzViX_$;i&k1gXwlu$OVt%wUExOEb1EaTw5gD@;*WmXwSd=K2LtK8-_w zcqmd{x8Uy_5tC>YOqA6X548@HwGw@n8_VE_x3V2;X)szTwr3=fXk8x$-#^s7?mbkx zeDiDdkXikf%PD1ZV<-E^tFBWwgQ{~i=j9`36Wx6K0sN{C*BOyTR-&M${nOugOzuC3yXk{lp&5FOs%+95c=Bdh7f4<$%kvHQf)Kaq2i6TZmk#_)zY z&afn4Psf?#UO#3jP=*QVz7)3#IB?DN!PHqrUw$Wj&hfkm#*oud5HpW@+L#YtiC)5u z%+R)RpOzhD)nnqVL46{O=w*B5n2la@f_*)9LP;tNrqBqz?~?Y;1=foO_Y66c1nDD% z>+b143`k>qPRiqZact(< zK{g{4JP%~`2y##TXz7u%*?4-Aj!lm+1^|yhBwfCF1C-kjoPHIuyOF;n-8R*n2eM}O zFI29WG&f+o#bl#X>M>DnQ?#YuCf?#m5e+?{X;bYW`CK?!VX$AIa4nwloIr7Ps=M)( zK&P|i!%HUn>(kd@(KL~n*JsKoXcg;`-+)lvP*C5(kUW4kI!t{zp z<6-zVjrqN+N}>z#paqQVyRG1|TwMBs_A*c!Fn|cGf`Uqp_Rr|_J6!o5#DD|oAK$;| z=JP(@C=3REKmW4+3t0Wv=0BsGzuA9lzVJQ$opt`%^S^oM58C;A{22H8 zd-}ui7yJC%_%r(XxA7s$zZw5vq5tguK|}v`YyZUk2OIsf#LwyIpG69T!~dzo|IJGO p{-HnTWPdkVL;LSr{x;{oH~F2KfmJXte^{}