'전체'에 해당되는 글 22

  1. 2009/03/03 Searcher Sample
  2. 2009/03/03 Indexer Sample (1)
  3. 2009/02/26 lucene scoring(ver 2.4)
 

Searcher Sample

Lucene In Action | 2009/03/03 15:29 | Posted by gruter


package com.gruter.lia.meetlucene;

import java.io.File;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;


/**
* Lucene version 2.4
* Lucene In Action 1.4.2 색인 내의 검색 코드에 대한 버전 수정본
**/
public class Searcher {
      public static void main(String[] args) throws Exception {
            if (args.length != 2) {
              throw new Exception("Usage: java " + Searcher.class.getName()
                + " <index dir> <query>");
            }

            File indexDir = new File(args[0]);
            String q = args[1];

            if (!indexDir.exists() || !indexDir.isDirectory()) {
              throw new Exception(indexDir +
                " does not exist or is not a directory.");
            }

            search(indexDir, q);
          }

          public static void search(File indexDir, String q)
            throws Exception {
//            Directory fsDir = FSDirectory.getDirectory(indexDir, false);
            Directory fsDir = FSDirectory.getDirectory(indexDir);
            IndexSearcher is = new IndexSearcher(fsDir);

//            Query query = QueryParser.parse(q, "contents",
//              new StandardAnalyzer());
            QueryParser parser = new QueryParser("contents", new StandardAnalyzer());
            Query query = parser.parse(q);
            long start = new Date().getTime();
//            Hits hits = is.search(query);
            TopDocs docs = is.search(query, 100);
            long end = new Date().getTime();

//            System.err.println("Found " + hits.length() +
//              " document(s) (in " + (end - start) +
//              " milliseconds) that matched query '" +
//                q + "':");
//
//            for (int i = 0; i < hits.length(); i++) {
//              Document doc = hits.doc(i);
//              System.out.println(doc.get("filename"));
//            }

            System.err.println("Found " + docs.totalHits +
              " document(s) (in " + (end - start) +
              " milliseconds) that matched query '" +
                q + "':");
            ScoreDoc[] scores = docs.scoreDocs;
            float maxScore = docs.getMaxScore();
            for (int i=0 ;i<docs.scoreDocs.length;i++){           
                Document doc = is.doc(docs.scoreDocs[i].doc);
                  System.out.println(doc.get("filename"));
            }
          }
}

Indexer Sample

Lucene In Action | 2009/03/03 15:08 | Posted by gruter




package com.gruter.lia.meetlucene;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;

/**
* Lucene version 2.4
* Lucene In Action 1.4.1 색인만들기 코드에 대한 버전 수정본
**/

public class Indexer {

    public static void main(String[] args) throws Exception {
        if (args.length != 2) {
          throw new Exception("Usage: java " + Indexer.class.getName()
            + " <index dir> <data dir>");
        }
        File indexDir = new File(args[0]);
        File dataDir = new File(args[1]);

        long start = new Date().getTime();
        int numIndexed = index(indexDir, dataDir);
        long end = new Date().getTime();

        System.out.println("Indexing " + numIndexed + " files took "
          + (end - start) + " milliseconds");
      }

      public static int index(File indexDir, File dataDir)
        throws IOException {

        if (!dataDir.exists() || !dataDir.isDirectory()) {
          throw new IOException(dataDir
            + " does not exist or is not a directory");
        }

//        IndexWriter writer = new IndexWriter(indexDir,
//          new StandardAnalyzer(), true);

        IndexWriter writer = new IndexWriter(indexDir, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
        
        writer.setUseCompoundFile(false);

        indexDirectory(writer, dataDir);

//        int numIndexed = writer.docCount();
        int numIndexed = writer.numDocs();
        writer.optimize();
        writer.close();
        return numIndexed;
      }

      private static void indexDirectory(IndexWriter writer, File dir)
        throws IOException {

        File[] files = dir.listFiles();

        for (int i = 0; i < files.length; i++) {
          File f = files[i];
          if (f.isDirectory()) {
            indexDirectory(writer, f);  // recurse
          } else if (f.getName().endsWith(".txt")) {
            indexFile(writer, f);
          }
        }
      }

      private static void indexFile(IndexWriter writer, File f)
        throws IOException {

        if (f.isHidden() || !f.exists() || !f.canRead()) {
          return;
        }

        System.out.println("Indexing " + f.getCanonicalPath());

        Document doc = new Document();

//        doc.add(Field.Text("contents", new FileReader(f)));
        doc.add(new Field("contents", new FileReader(f)));
       doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.NO));
        writer.addDocument(doc);
      }

}

lucene scoring(ver 2.4)

루씬Tip | 2009/02/26 00:28 | Posted by gruter



how to get lucene (document) score.

TopDocs docs = searcher.search(query, null, 10,  defaultSort);
ScoreDoc[] scores = docs.scoreDocs;
float maxScore = docs.getMaxScore();
for (int i=0 ;i<docs.scoreDocs.length;i++){   
    Document doc = searcher.doc(docs.scoreDocs[i].doc);
    float docscore = scores[i].score / maxScore;
}


another sample : http://devyongsik.tistory.com/203