Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
wsxiaoys committed Aug 26, 2024
1 parent 08660a9 commit f124b2e
Show file tree
Hide file tree
Showing 4 changed files with 67 additions and 5 deletions.
7 changes: 4 additions & 3 deletions crates/tabby-common/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,11 @@ impl IndexSchema {
fn new() -> Self {
let mut builder = Schema::builder();

let field_corpus = builder.add_text_field("corpus", STRING | FAST);
let field_source_id = builder.add_text_field(FIELD_SOURCE_ID, STRING | FAST);
let field_corpus = builder.add_text_field("corpus", STRING | FAST | STORED);
let field_source_id = builder.add_text_field(FIELD_SOURCE_ID, STRING | FAST | STORED);
let field_id = builder.add_text_field("id", STRING | STORED);

let field_updated_at = builder.add_date_field(FIELD_UPDATED_AT, INDEXED);
let field_updated_at = builder.add_date_field(FIELD_UPDATED_AT, INDEXED | STORED);
let field_attributes = builder.add_text_field("attributes", STORED);

let field_chunk_id = builder.add_text_field(FIELD_CHUNK_ID, STRING | FAST | STORED);
Expand All @@ -107,6 +107,7 @@ impl IndexSchema {
),
);

// Chunks are only indexed for search; their size is usually large, so we don't store them.
let field_chunk_tokens = builder.add_text_field("chunk_tokens", STRING);
let schema = builder.build();

Expand Down
55 changes: 55 additions & 0 deletions crates/tabby-index-cli/src/commands/head.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
use std::path::Path;

use clap::Args;
use tabby_common::index::IndexSchema;
use tantivy::{DocAddress, DocSet, Document, Index, TantivyDocument, Term, TERMINATED};

#[derive(Args)]
pub struct HeadArgs {
/// Number of documents to display
#[clap(short, long, default_value = "1")]
num_docs: usize,

#[clap(short, long, default_value = "code")]
corpus: String,
}

pub fn run_head_cli(index_path: &Path, args: &HeadArgs) -> anyhow::Result<()> {
let index =
Index::open_in_dir(index_path)?;

let searcher = index.reader()?.searcher();
let schema = IndexSchema::instance();

let mut count = 0;
'outer: for (segment_ordinal, segment_reader) in searcher.segment_readers().iter().enumerate() {
let Ok(inverted_index) = segment_reader.inverted_index(schema.field_corpus) else {
continue;
};

let term_corpus = Term::from_field_text(schema.field_corpus, &args.corpus);
let Ok(Some(mut postings)) = inverted_index.read_postings(&term_corpus, tantivy::schema::IndexRecordOption::Basic) else {
continue;
};

let mut doc_id = postings.doc();
while doc_id != TERMINATED {
if !segment_reader.is_deleted(doc_id) {
let doc_address = DocAddress::new(segment_ordinal as u32, doc_id);
let doc: TantivyDocument = searcher.doc(doc_address).expect("Failed to read document");

let json = doc.to_json(&schema.schema);

println!("{}", json);

count += 1;
if count >= args.num_docs {
break 'outer;
}
}
doc_id = postings.advance();
}
}

Ok(())
}
4 changes: 3 additions & 1 deletion crates/tabby-index-cli/src/commands/mod.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
mod bench;
mod inspect;
mod head;

pub use self::inspect::run_inspect_cli;
pub use self::bench::{run_bench_cli, BenchArgs};
pub use self::bench::{run_bench_cli, BenchArgs};
pub use self::head::{run_head_cli, HeadArgs};
6 changes: 5 additions & 1 deletion crates/tabby-index-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ mod timer;
use std::path::{Path, PathBuf};

use clap::{Parser, Subcommand};
use commands::BenchArgs;
use commands::{BenchArgs, HeadArgs};

#[derive(Parser)]
#[command(author, version, about, long_about = None)]
Expand All @@ -22,6 +22,7 @@ struct Cli {
pub enum Commands {
Inspect,
Bench(BenchArgs),
Head(HeadArgs)
}

fn main() -> anyhow::Result<()> {
Expand All @@ -34,6 +35,9 @@ fn main() -> anyhow::Result<()> {
}
Commands::Bench(args) => {
commands::run_bench_cli(&index_dir, &args).map_err(|e| anyhow::anyhow!("{}", e))?;
},
Commands::Head(args) => {
commands::run_head_cli(&index_dir, &args)?;
}
};

Expand Down

0 comments on commit f124b2e

Please sign in to comment.