Skip to content

Commit

Permalink
fix:The link of upload files is broken in the chat view (pingcap#466)
Browse files Browse the repository at this point in the history
  • Loading branch information
MingWei Liu committed Dec 19, 2024
2 parents 00870b0 + 2f6eb12 commit ee32c05
Show file tree
Hide file tree
Showing 397 changed files with 19,067 additions and 13,184 deletions.
21 changes: 9 additions & 12 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,12 @@ TIDB_PASSWORD=
TIDB_DATABASE=
TIDB_SSL=true

# CAUTION: Do not change EMBEDDING_DIMS after initializing the database.
# Changing the embedding dimensions requires recreating the database and tables.
# The default EMBEDDING_DIMS and EMBEDDING_MAX_TOKENS are set for the OpenAI text-embedding-3-small model.
# If using a different embedding model, adjust these values according to the model's specifications.
# For example:
# openai/text-embedding-3-small: EMBEDDING_DIMS=1536, EMBEDDING_MAX_TOKENS=8191
# maidalun1020/bce-embedding-base_v1: EMBEDDING_DIMS=768, EMBEDDING_MAX_TOKENS=512
# BAAI/bge-m3: EMBEDDING_DIMS=1024, EMBEDDING_MAX_TOKENS=8192
EMBEDDING_DIMS=1536
# EMBEDDING_MAX_TOKENS should be equal or smaller than the embedding model's max tokens,
# it indicates the max size of document chunks.
EMBEDDING_MAX_TOKENS=8191
# EMBEDDING_MAX_TOKENS indicates the max size of document chunks.
#
# EMBEDDING_MAX_TOKENS should be smaller than the embedding model's max tokens due
# to the tokenizer difference. (see: https://github.com/pingcap/autoflow/issues/397)
#
# Go to https://tidb.ai/docs/embedding-model to check the max tokens of the embedding model.
#
# Notice: this variable will be deprecated in the future.
EMBEDDING_MAX_TOKENS=2048
7 changes: 5 additions & 2 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ jobs:
- name: Install Playwright Browsers
run: npx playwright install --with-deps chromium

- name: Fetch Images
- name: Prepare Tests
run: ./prepare-test.sh
env:
E2E_DOCKER_TAG_FRONTEND: ${{needs.docker-metadata.outputs.e2e-frontend}}
Expand Down Expand Up @@ -206,7 +206,10 @@ jobs:
name: "Deploy E2E Test Results"
runs-on: ubuntu-latest
needs: e2e-test
if: "!(contains(needs.*.result, 'skipped')) && !cancelled()"
if: |
always()
&& !contains(needs.e2e-test.result, 'skipped')
&& !contains(needs.e2e-test.result, 'cancelled')
defaults:
run:
working-directory: e2e
Expand Down
10 changes: 8 additions & 2 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,18 @@ In this section, you should have some prerequisites software installed on your l
* [Docker Compose](https://docs.docker.com/compose/install/)
* [Python](https://www.python.org/downloads/)
* [Node.js](https://nodejs.org/en/download/)
* [TiDB Serverless](https://pingcap.com/ai)
* [TiDB Cloud Serverless](https://pingcap.com/ai/?utm_source=tidb.ai&utm_medium=community) or [TiDB Self-Managed](https://www.pingcap.com/tidb-self-managed/?utm_source=tidb.ai&utm_medium=community)

#### Setting up your development environment

Setting up the project on your local machine is the first step to contributing to the project. You can clone the project from the GitHub repository and then start the project on your local machine. You can follow the instructions in the [Deployment Guide](https://tidb.ai/docs/deploy-with-docker) file to set up the project on your local machine.

To test your local changes, you can build and run the project using:

```bash
docker compose -f docker-compose.build.yml up
```

### Your First Contribution

All set to participate in the project? You can start by looking at the [open issues](https://github.com/pingcap/tidb.ai/issues) in this repo.
Expand Down Expand Up @@ -48,4 +54,4 @@ Please feel free to reach out to the maintainers if you have any questions or ne

## Discussion

If you have any questions or suggestions, please feel free to open a discussion in the [Discussions](https://github.com/pingcap/tidb.ai/discussions)
If you have any questions or suggestions, please feel free to open a discussion in the [Discussions](https://github.com/pingcap/tidb.ai/discussions)
46 changes: 26 additions & 20 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,36 +1,34 @@
<!-- markdownlint-disable MD033 MD041 -->

<div align="center">
<h1>autoflow</h1>
<h1>AutoFlow</h1>
<a href='https://www.pingcap.com/tidb-cloud-serverless/?utm_source=tidb.ai&utm_medium=community'>
<img src="https://raw.githubusercontent.com/pingcap/tidb.ai/main/frontend/app/public/nextra/icon-dark.svg" alt="TiDB.AI" width =100 height=100></img>
<img src="https://raw.githubusercontent.com/pingcap/tidb.ai/main/frontend/app/public/nextra/icon-dark.svg" alt="AutoFlow" width =100 height=100></img>
</a>
</div>

[![Backend Docker Image Version](https://img.shields.io/docker/v/tidbai/backend?sort=semver&arch=amd64&label=tidbai%2Fbackend&color=blue&logo=fastapi)](https://hub.docker.com/r/tidbai/backend)
[![Frontend Docker Image Version](https://img.shields.io/docker/v/tidbai/frontend?sort=semver&arch=amd64&label=tidbai%2Ffrontend&&color=blue&logo=next.js)](https://hub.docker.com/r/tidbai/frontend)
[![E2E Status](https://img.shields.io/github/check-runs/pingcap/tidb.ai/main?nameFilter=E2E%20Test&label=e2e)](https://tidb-ai-playwright.vercel.app/)
<a href="https://trendshift.io/repositories/12294" target="_blank"><img src="https://trendshift.io/api/badge/repositories/12294" alt="pingcap%2Fautoflow | Trendshift" style="width: 250px; height: 55px;" width="250" height="55"/></a>

[![Backend Docker Image Version](https://img.shields.io/docker/v/tidbai/backend?sort=semver&arch=amd64&label=tidbai%2Fbackend&color=blue&logo=fastapi)](https://hub.docker.com/r/tidbai/backend)
[![Frontend Docker Image Version](https://img.shields.io/docker/v/tidbai/frontend?sort=semver&arch=amd64&label=tidbai%2Ffrontend&&color=blue&logo=next.js)](https://hub.docker.com/r/tidbai/frontend)
[![E2E Status](https://img.shields.io/github/check-runs/pingcap/tidb.ai/main?nameFilter=E2E%20Test&label=e2e)](https://tidb-ai-playwright.vercel.app/)
</div>

## Introduction

An open source GraphRAG (Knowledge Graph) built on top of [TiDB Vector](https://www.pingcap.com/ai?utm_source=tidb.ai&utm_medium=community) and [LlamaIndex](https://github.com/run-llama/llama_index) and [DSPy](https://github.com/stanfordnlp/dspy).
AutoFlow is an open source graph rag (graphrag: knowledge graph rag) based knowledge base tool built on top of [TiDB Vector](https://www.pingcap.com/ai?utm_source=tidb.ai&utm_medium=community) and [LlamaIndex](https://github.com/run-llama/llama_index) and [DSPy](https://github.com/stanfordnlp/dspy).

- **Live Demo**: [TiDB.AI](https://tidb.ai)
- **Documentation**: [Docs](https://tidb.ai/docs/?utm_source=github&utm_medium=tidb.ai)
- **Live Demo**: [https://tidb.ai](https://tidb.ai?utm_source=tidb.ai&utm_medium=community)
- **Deployment Docs**: [Deployment Docs](https://tidb.ai/docs/?utm_source=github&utm_medium=tidb.ai)

## Features

1. **Perplexity-style Conversational Search page**: Our platform features an advanced built-in website crawler, designed to elevate your browsing experience. This crawler effortlessly navigates official and documentation sites, ensuring comprehensive coverage and streamlined search processes through sitemap URL scraping.

![out-of-box-conversational-search](https://github.com/pingcap/tidb.ai/assets/1237528/9cc87d32-14ac-47c6-b664-efa7ec53e751 "Image Title")

You can even edit the Knowledge Graph to add more information or correct any inaccuracies. This feature is particularly useful for enhancing the search experience and ensuring that the information provided is accurate and up-to-date.

![out-of-box-conversational-search](https://github.com/pingcap/tidb.ai/assets/1237528/7bc57b34-99b7-4c4b-a098-9ad33dd0dfdc "Image Title")
![Image](https://github.com/user-attachments/assets/50a4e5ce-8b93-446a-8ce7-11ed7844bd1e)

2. **Embeddable JavaScript Snippet**: Integrate our conversational search window effortlessly into your website by copying and embedding a simple JavaScript code snippet. This widget, typically placed at the bottom right corner of your site, facilitates instant responses to product-related queries.

![embeddable-javascript-snippet](https://github.com/pingcap/tidb.ai/assets/1237528/5a445231-a27a-4ae6-8287-a4f8cf7b64d0 "Image Title")
![Image](https://github.com/user-attachments/assets/f0dc82db-c14d-4863-a242-c7da3a719568)

## Deploy

Expand All @@ -44,14 +42,22 @@ An open source GraphRAG (Knowledge Graph) built on top of [TiDB Vector](https://
- [Next.js](https://nextjs.org/) – Framework
- [shadcn/ui](https://ui.shadcn.com/) - Design

## Contact Us

You can reach out to us on [@TiDB_Developer](https://twitter.com/TiDB_Developer) on Twitter.

## Contributing

We welcome contributions from the community. If you are interested in contributing to the project, please read the [Contributing Guidelines](/CONTRIBUTING.md).

<a href="https://next.ossinsight.io/widgets/official/compose-last-28-days-stats?repo_id=752946440" target="_blank" style="display: block" align="center">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="https://next.ossinsight.io/widgets/official/compose-last-28-days-stats/thumbnail.png?repo_id=752946440&image_size=auto&color_scheme=dark" width="655" height="auto">
<img alt="Performance Stats of pingcap/autoflow - Last 28 days" src="https://next.ossinsight.io/widgets/official/compose-last-28-days-stats/thumbnail.png?repo_id=752946440&image_size=auto&color_scheme=light" width="655" height="auto">
</picture>
</a>
<!-- Made with [OSS Insight](https://ossinsight.io/) -->

## License

TiDB.AI is open-source under the Apache License, Version 2.0. You can [find it here](/LICENSE.txt).
AutoFlow is open-source under the Apache License, Version 2.0. You can [find it here](/LICENSE.txt).

## Contact

You can reach out to us on [@TiDB_Developer](https://twitter.com/TiDB_Developer) on Twitter.
4 changes: 3 additions & 1 deletion backend/.gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,4 +166,6 @@ cython_debug/
.DS_Store

# VSCode
.vscode/
.vscode/

checkpoint.json
6 changes: 5 additions & 1 deletion backend/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,8 @@ run_dev_server:

run_dev_celery_worker:
@echo "Running celery..."
@rye run celery -A app.celery worker --pool solo --concurrency=1 --loglevel=DEBUG
@rye run celery -A app.celery worker -Q default

run_eval_dev_celery_worker:
@echo "Running evaluation celery..."
@rye run celery -A app.celery worker -Q evaluation --loglevel=debug --pool=solo
11 changes: 7 additions & 4 deletions backend/app/alembic/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,11 @@

from app.core.config import settings
from app.models import * # noqa
from app.models.knowledge_base_scoped.table_naming import KB_CHUNKS_TABLE_PATTERN, KB_ENTITIES_TABLE_PATTERN, \
KB_RELATIONSHIPS_TABLE_PATTERN
from app.models.knowledge_base_scoped.table_naming import (
KB_CHUNKS_TABLE_PATTERN,
KB_ENTITIES_TABLE_PATTERN,
KB_RELATIONSHIPS_TABLE_PATTERN,
)

# this is the Alembic Config object, which provides
# access to the values within the .ini file in use.
Expand Down Expand Up @@ -60,7 +63,7 @@ def run_migrations_offline():
target_metadata=target_metadata,
include_name=include_name,
literal_binds=True,
compare_type=True
compare_type=True,
)

with context.begin_transaction():
Expand Down Expand Up @@ -88,7 +91,7 @@ def run_migrations_online():
connection=connection,
target_metadata=target_metadata,
include_name=include_name,
compare_type=True
compare_type=True,
)

with context.begin_transaction():
Expand Down
41 changes: 41 additions & 0 deletions backend/app/alembic/versions/27a6723b767a_.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""empty message
Revision ID: 27a6723b767a
Revises: d2ad44deab20
Create Date: 2024-11-29 20:38:05.773083
"""

from alembic import op
import sqlalchemy as sa
import sqlmodel.sql.sqltypes
from tidb_vector.sqlalchemy import VectorType
from sqlalchemy.dialects import mysql

# revision identifiers, used by Alembic.
revision = "27a6723b767a"
down_revision = "d2ad44deab20"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.add_column(
"chat_messages",
sa.Column("is_best_answer", sa.Boolean(), server_default="0", nullable=False),
)
op.create_index(
"ix_chat_message_is_best_answer",
"chat_messages",
["is_best_answer"],
unique=False,
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index("ix_chat_message_is_best_answer", table_name="chat_messages")
op.drop_column("chat_messages", "is_best_answer")
# ### end Alembic commands ###
Original file line number Diff line number Diff line change
Expand Up @@ -5,36 +5,58 @@
Create Date: 2024-10-15 16:02:14.203584
"""

from alembic import op
import sqlalchemy as sa
import sqlmodel.sql.sqltypes
from tidb_vector.sqlalchemy import VectorType


# revision identifiers, used by Alembic.
revision = '749767db5505'
down_revision = '8093333c0d87'
revision = "749767db5505"
down_revision = "8093333c0d87"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.create_table('recommend_questions',
sa.Column('created_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
sa.Column('updated_at', sa.DateTime(timezone=True), server_default=sa.text('now()'), nullable=True),
sa.Column('id', sa.Integer(), nullable=False),
sa.Column('questions', sa.JSON(), nullable=True),
sa.Column('chat_message_id', sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(['chat_message_id'], ['chat_messages.id'], ),
sa.PrimaryKeyConstraint('id')
op.create_table(
"recommend_questions",
sa.Column(
"created_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column(
"updated_at",
sa.DateTime(timezone=True),
server_default=sa.text("now()"),
nullable=True,
),
sa.Column("id", sa.Integer(), nullable=False),
sa.Column("questions", sa.JSON(), nullable=True),
sa.Column("chat_message_id", sa.Integer(), nullable=False),
sa.ForeignKeyConstraint(
["chat_message_id"],
["chat_messages.id"],
),
sa.PrimaryKeyConstraint("id"),
)
op.create_index(
op.f("ix_recommend_questions_chat_message_id"),
"recommend_questions",
["chat_message_id"],
unique=False,
)
op.create_index(op.f('ix_recommend_questions_chat_message_id'), 'recommend_questions', ['chat_message_id'], unique=False)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.drop_index(op.f('ix_recommend_questions_chat_message_id'), table_name='recommend_questions')
op.drop_table('recommend_questions')
op.drop_index(
op.f("ix_recommend_questions_chat_message_id"), table_name="recommend_questions"
)
op.drop_table("recommend_questions")
# ### end Alembic commands ###
Loading

0 comments on commit ee32c05

Please sign in to comment.