Python Client Failing after 2 or more consecutive runs #66
-
Hi, I'm building an application that enables users to chat with their data. The application uses Nillion to store documents as secrets, and an Euclidean distance program calculates the distance between each document and a given query. It features an API endpoint that retrieves the most similar document for a given query and extends the prompts with the content of this document, allowing the LLM to provide more context-aware answers. The api endpoint is described as follows: @app.post("/chat-with-data/")
async def chat_with_data(chat_with_data_request: ChatWithDataRequest):
"""
Process a chat request with contextual data pulled from specified documents.
Args:
chat_with_data_request (ChatWithDataRequest): A request object containing:
- wallet_id (str): User's wallet identifier.
- doc_list (List[str]): A list of document identifiers in S3.
- prompt (str): Chat prompt provided by the user.
Returns:
Dict: A dictionary containing the chat response.
The function performs the following steps:
1. Retrieve and process documents from S3 to generate embeddings and text extracts.
2. Generate secrets for each document based on embeddings.
3. Compute an embedding for the query (prompt).
4. Generate a secret for the query.
5. Use a Euclidean distance program to find the document most relevant to the query.
6. Construct a full prompt using the most relevant document context.
7. Retrieve a completion from a chat model based on the full prompt.
8. Return the chat model's response.
"""
texts = []
embeddings = []
nillion_secrets = []
for i, doc_s3 in enumerate(chat_with_data_request.doc_list):
doc_emb, doc_text = document_helper.s3_doc_to_embeddings(
chat_with_data_request.wallet_id, doc_s3
)
embeddings.append(doc_emb)
texts.append(doc_text)
nil_secret = nillion_secrets_helper.generate_doc_secret_dict(i, doc_emb)
nillion_secrets.append(nil_secret)
query_embedding = document_helper.get_embeddings(chat_with_data_request.prompt)
secret_query = nillion_secrets_helper.generate_query_secret_dict(query_embedding)
nillion_secrets.append(secret_query)
logger.info(f"nil secrets: {nillion_secrets}")
logger.info(f"running nillion program")
program_result = await run_euclidean_distance(*nillion_secrets)
closest_distance_index = program_result["closest_document_index"]
logger.info(f"program_result: {program_result}")
prompt = chat_with_data_request.prompt
full_prompt = f"{prompt} /n context is {texts[closest_distance_index]}"
#logger.info(f"full_prompt: {full_prompt}")
response = chat_helper.get_completion(prompt)
response = chat_helper.get_completion(full_prompt)
logger.info(f"response: {response}")
# content = response['message']['content']
# content = response['choices'][0]['text']
return {"response": response} This endpoint uses a nillion Python client with the following content. #!/usr/bin/env python
import asyncio
import os
import sys
import tempfile
import shutil
import uuid
import logging
import psutil
import py_nillion_client as nillion
import pytest
from dotenv import load_dotenv
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), "..")))
from helpers.nillion_client_helper import create_nillion_client
from helpers.nillion_keypath_helper import getNodeKeyFromFile, getUserKeyFromFile
from helpers.nillion_secrets_inputs import merge_dictionaries
load_dotenv()
project_directory = os.path.dirname(os.path.abspath(__file__))
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def log_open_files_and_connections(when):
logger.info(f"Logging open files and connections {when}...")
# Log open files
process = psutil.Process(os.getpid())
open_files = process.open_files()
if open_files:
logger.info(f"Open files {when}:")
for file in open_files:
logger.info(f" {file.path}")
else:
logger.info(f"No open files detected {when}.")
# Log open connections
open_connections = process.connections()
if open_connections:
logger.info(f"Open connections {when}:")
for conn in open_connections:
logger.info(f" {conn}")
else:
logger.info(f"No open connections detected {when}.")
async def store_program_with_retries(client, cluster_id, program_name, tmp_program_path, retries=3):
for attempt in range(retries):
try:
action_id = await client.store_program(cluster_id, program_name, tmp_program_path)
return action_id
except nillion.TimeoutError as e:
logger.error(f"Attempt {attempt+1}/{retries} - Timeout error storing program: {e}")
if attempt + 1 == retries:
raise
except Exception as e:
logger.error(f"Unexpected error storing program: {e}")
raise
await asyncio.sleep(2 ** attempt) # Exponential backoff
return None async def run_euclidean_distance(*secret_inputs):
logger.info("Running euclidean_distance Client...")
try:
userkey_path = os.getenv("NILLION_USERKEY_PATH_PARTY_1")
nodekey_path = os.getenv("NILLION_NODEKEY_PATH_PARTY_1")
logger.info(f"Userkey path: {userkey_path}")
logger.info(f"Nodekey path: {nodekey_path}")
cluster_id = os.getenv("NILLION_CLUSTER_ID")
userkey = getUserKeyFromFile(userkey_path)
nodekey = getNodeKeyFromFile(nodekey_path)
except Exception as e:
logger.error("Error getting environment variables from .env file")
logger.error(e)
return
try:
# 1. Initialize NillionClient against nillion-devnet
client = create_nillion_client(userkey, nodekey)
logger.info("Created Nillion Client")
# 2. Get the user id and party id from NillionClient
party_id = client.party_id
user_id = client.user_id
logger.info(f"Got user_id: {user_id}")
logger.info(f"Got party_id: {party_id}")
# 3. Store a compiled Nada program in the network
program_name = "euclidean_distance" + str(uuid.uuid4())
script_dir = os.path.dirname(os.path.abspath(__file__))
program_mir_rel_path = "../programs-compiled/euclidean_distance.nada.bin"
program_mir_abs_path = os.path.abspath(os.path.join(script_dir, program_mir_rel_path))
logger.info(f"Program MIR Absolute Path: {program_mir_abs_path}")
try:
with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
shutil.copy(program_mir_abs_path, tmp_file.name)
tmp_program_path = tmp_file.name
logger.info(f"Temporary program path: {tmp_program_path}")
except Exception as e:
logger.error("Error copying the program to a temporary file")
logger.error(e)
return
try:
logger.info(f"Storing program {program_name}- cluster: {cluster_id} from pogram path {tmp_program_path} in the network")
log_open_files_and_connections("before storing program")
action_id = await store_program_with_retries(client, cluster_id, program_name, tmp_program_path, retries=2)
log_open_files_and_connections("after storing program")
except Exception as e:
logger.error(f"Error storing program: {e}")
return
finally:
os.remove(tmp_program_path)
logger.info(f"Stored program. action_id: {action_id}")
program_id = f"{user_id}/{program_name}"
logger.info(f"Stored program_id: {program_id}")
# 4. Create the 1st secret with bindings to the program
nillion_compute_secrets, nillion_compute_secrets_raw = merge_dictionaries(*secret_inputs)
secret_bindings = nillion.ProgramBindings(program_id)
party_name = "Party1"
secret_bindings.add_input_party(party_name, party_id)
# 5. Store the secret in the network and print the returned store_id
store_id = await client.store_secrets(cluster_id, secret_bindings, nillion_compute_secrets, None)
logger.info(f"Computing using program {program_id}")
logger.info(f"Use secret store_id: {store_id}")
# 6. Create compute bindings to set input and output parties
compute_bindings = nillion.ProgramBindings(program_id)
compute_bindings.add_input_party(party_name, party_id)
compute_bindings.add_output_party(party_name, party_id)
# 7. Compute on the program with 1st secret from the network, and the 2nd secret, provided at compute time
computation_time_secrets = nillion.Secrets({})
compute_id = await client.compute(
cluster_id,
compute_bindings,
[store_id],
computation_time_secrets,
nillion.PublicVariables({}),
)
# 8. Print the computation result
logger.info(f"The computation was sent to the network. compute_id: {compute_id}")
while True:
compute_event = await client.next_compute_event()
if isinstance(compute_event, nillion.ComputeFinishedEvent):
logger.info(f"✅ Compute complete for compute_id {compute_event.uuid}")
logger.info(f"🖥️ The result is {compute_event.result.value}")
return compute_event.result.value
except Exception as e:
logger.error("Error running euclidean_distance client")
logger.error(e)
return
finally:
log_open_files_and_connections("final") The application works perfectly on the first request to the given endpoint. However, making the same request again with the same payload produces the following error:
From the logs above, we can see that the client fails to store the program. To debug this, I added a UUID to generate a distinct program name for each execution and used a temporary file to hold the program intended for storage in the network. However, this approach has not been successful. It is important to note that if the application is reloaded, the flow works smoothly again, which leads me to believe that the issue may be due to some open connections or files on the client side. Any support on this issue would be greatly appreciated. |
Beta Was this translation helpful? Give feedback.
Replies: 6 comments 6 replies
-
@Davetbutler - I'll add this to our list for triage |
Beta Was this translation helpful? Give feedback.
-
@emanuel-skai thanks for this. Can you please validate the versions of your toolchain? for each cli used and the pip version. thanks! |
Beta Was this translation helpful? Give feedback.
-
Sure thing. Here's a list with all my dependencies.
|
Beta Was this translation helpful? Give feedback.
-
I hope this helps
|
Beta Was this translation helpful? Give feedback.
-
I followed the steps described in this Python quickstart guide. However, I noticed that the instructions seem slightly different from what I remember, as I don't recall seeing these lines before.
This is the result of pip freeze in my local environment
This is the Dockerfile I'm using to build my application image. I am noticing the same behavior in both my local environment and the development server. ###########
# BUILDER #
###########
# Use a slim Python image from the official Docker Hub as a base
FROM python:3.11-slim as builder
# Set the arguements for the build
ARG NIL_SDK_VERSION
#ARG DEBIAN_FRONTEND="noninteractive"
# Set the shell to bash with error handling and verbose output
SHELL ["/bin/bash", "-o", "pipefail", "-xe", "-c"]
# Update system packages and install gcc for compiling Python packages
RUN apt-get update && apt-get -y upgrade && apt-get install -y gcc curl git bash jq \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Test curl installation
RUN curl --version
# Set the working directory in the builder stage
WORKDIR /usr/src/app
# Set environment variables to not generate pyc files and not buffer outputs
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
PUID=1000 \
PGID=1000
# Copy the requirements file into the image
COPY ./requirements-nillion.txt .
COPY ./requirements.txt .
# COPY .ENV.SAMPLE PROVIDE BY NILLION
COPY .env.sample .
# Copy the bootstrap script into the image
COPY ./bootstrap-local-environment.sh .
# Upgrade pip and prepare installation wheels for Python dependencies
RUN pip install --upgrade pip && \
pip wheel --no-cache-dir --no-deps --wheel-dir /usr/src/app/wheels -r requirements.txt
# Install Foundry
RUN curl -L https://foundry.paradigm.xyz | bash
RUN echo "source $HOME/.bashrc" >> ~/.bash_profile && \
bash -c "source $HOME/.bash_profile && foundryup"
# Find the installation path of foundryup
RUN echo "Foundry tools are installed in: $(dirname $(which foundryup))"
#########
# FINAL #
#########
# Use the same slim Python image for the final stage
FROM python:3.11-slim
# Install curl and create a directory for the app user
RUN apt-get update && \
apt-get -y upgrade && \
apt-get install -y \
curl \
procps \
git \
bash \
jq \
&& apt-get clean \
&& rm -rf /var/lib/apt/lists/*
# Set the shell to bash with error handling and verbose output
SHELL ["/bin/bash", "-o", "pipefail", "-xe", "-c"]
RUN mkdir -p /home/app
# Create a system group and user to run the app
RUN addgroup --system app && adduser --system --group app
# Define environment variables for the application home and user's home
ENV HOME=/home/app \
APP_HOME=/home/app/web \
PATH="/home/app/.nilup/bin:$PATH"
# Create the application working directory
RUN mkdir $APP_HOME
WORKDIR $APP_HOME
# Set additional environment variables
ENV PYTHONDONTWRITEBYTECODE=1 \
PYTHONUNBUFFERED=1 \
ENVIRONMENT=prod \
TESTING=0 \
PUID=1000 \
PGID=1000
# Install the pre-built Python dependencies from wheels
COPY --from=builder /usr/src/app/wheels /wheels
COPY --from=builder /usr/src/app/requirements.txt .
RUN pip install --upgrade pip && \
pip install --no-cache /wheels/* && \
pip install "gunicorn==21.0.1" && \
pip install "uvicorn[standard]==0.23.1"
# Install nilup and add it to the PATH via bashrc, with debugging logs
RUN echo "Installing nilup in the final stage"
# Assuming you've set NIL_SDK_VERSION as an ARG or ENV
ARG NIL_SDK_VERSION=0.2.1
RUN curl -o install.sh https://nilup.nilogy.xyz/install.sh && \
bash install.sh && \
/home/app/.nilup/bin/nilup init && \
/home/app/.nilup/bin/nilup install ${NIL_SDK_VERSION} && \
/home/app/.nilup/bin/nilup use ${NIL_SDK_VERSION} && \
echo "Nilup installed in $(which nilup)" && \
echo "nillion installed in $(which nillion)" && \
echo $PATH
# Set PATH to include the directory where nilup and nillion binaries are installed
ENV PATH="/home/app/.nilup/bin:$PATH"
ENV PATH=/home/app/.local/bin:/home/app/.foundry/bin:/home/app/.nilup/bin:/home/app/.nillion/bin:${PATH}
# Test accessibility of a specific application
RUN echo "Checking if nilup"
RUN which nilup
# Copy Foundry binaries from the builder
COPY --from=builder /root/.foundry/bin /usr/local/bin
# Set PATH to include the directory where foundry binaries are installed
ENV PATH="/usr/local/bin:$PATH"
# Check if foundryup is accessible
RUN which foundryup || echo "foundryup is not accessible"
# Remove the installation script
RUN rm install.sh
# Copy the application code to the container
COPY . $APP_HOME
# Change ownership of all files to the app user
RUN chown -R app:app $HOME
# Copy and configure the entrypoint script
COPY entrypoint.sh /usr/local/bin/entrypoint.sh
RUN chmod +x /usr/local/bin/entrypoint.sh
ENTRYPOINT ["/usr/local/bin/entrypoint.sh"]
# Switch to the app user
USER app
# Define the command to run the application
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--log-level", "debug"] And the respective entrypoint.sh #!/bin/bash
# Ensure the PATH includes the nilup and nillion directory
export PATH="/home/app/.nilup/bin:$PATH"
# Output the current PATH for debugging purposes
echo "Current PATH: $PATH"
# Check the locations of nilup and nillion to ensure they are in the PATH
echo "Checking the location of nilup:"
which nilup
echo "Checking the location of nillion:"
which nillion
# Display the versions of nilup and nillion to confirm correct installation
echo "Checking the version of nilup:"
nilup --version
echo "Checking the version of nillion:"
nillion --version
# Create a virtual environment and activate it
#echo "Creating and activating the virtual environment..."
#bash ./create_venv.sh
#source .venv/bin/activate
#echo "Copying the .env.sample file to .env"
cp .env.sample .env
echo "Checking .env file"
cat .env
# listing the files in the current directory
echo "Listing the files in the current directory (entrypoint)..."
ls -all
# make sure pkill is installed
# Check the installation of nillion dev-net
echo "Checking the location of nillion-devnet:"
which nillion-devnet
# Display the permissions of nillion dev-net
echo "Displaying the permissions of nillion-devnet:"
ls -l $(which nillion-devnet)
# Check if Foundry tools are available
echo "Checking for Foundry tools..."
which foundry
which anvil
# Run local environment setup script
echo "Running local environment bootstrap script..."
./bootstrap-local-environment.sh
#echo "No bootstrap-local-environment.sh script to run."
#echo "bootstrap-local-environment.sh script completed."
#echo "Deactivating the virtual environment..."
#deactivate
echo "Memory after bootstrap:"
free -h
#echo "make sure the app is present"
ls -all
# List the files in the /tmp directory after bootstrap
echo "Listing the files in the /tmp directory after bootstrap..."
ls -all /tmp
# Execute the command passed to the Docker container
exec "$@" |
Beta Was this translation helpful? Give feedback.
-
The root problem was that the I suspect that the destructor of a python client instance does not have enough logic to dismantle all the network setup in memory/object/process space - thus - when a new client with same nodekey is created, a conflict occurs and routing of messaging in the network is then broken. Suggested actions:
Workaround:
cc: @Davetbutler |
Beta Was this translation helpful? Give feedback.
The root problem was that the
client
was getting destroyed and a newclient
object established with the same node key.I suspect that the destructor of a python client instance does not have enough logic to dismantle all the network setup in memory/object/process space - thus - when a new client with same nodekey is created, a conflict occurs and routing of messaging in the network is then broken.
Suggested actions:
Workaround:
client
instance creationcc: @Davetbutler