Skip to content

Commit

Permalink
Add subnet tag to node config hash generation (#250)
Browse files Browse the repository at this point in the history
* Add subnet tag to node config hash generation; fix GPU dockerfile for volumens fix

* Adjust Dockefile so it installs ray-on-golem globally
  • Loading branch information
lucekdudek authored Jun 19, 2024
1 parent c70fe17 commit b17eb11
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 5 deletions.
4 changes: 3 additions & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ COPY ray_on_golem/__init__.py /app/ray_on_golem/__init__.py

RUN pip install poetry && \
poetry config virtualenvs.create false
RUN poetry install --no-interaction --no-ansi --only ray

RUN pip config set global.index-url https://pypi.dev.golem.network/simple
RUN pip install pillow

RUN python -m venv --system-site-packages /root/venv
RUN /bin/bash -c "source /root/venv/bin/activate && poetry install --no-interaction --no-ansi --only ray"
RUN bash -c "echo source /root/venv/bin/activate >> /root/.bashrc"

COPY ray_on_golem /app/ray_on_golem/

Expand Down
3 changes: 3 additions & 0 deletions examples/GPU.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ RUN pip install pillow
RUN pip install numpy numba
RUN pip config set global.index-url https://pypi.dev.golem.network/simple

RUN python -m venv --system-site-packages /root/venv
RUN bash -c "echo source /root/venv/bin/activate >> /root/.bashrc"

COPY ray_on_golem /app/ray_on_golem/

RUN rm -r /root/.cache
Expand Down
2 changes: 1 addition & 1 deletion ray_on_golem/server/cluster/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ async def _get_or_create_manager_stack(
node_config: NodeConfigData,
subnet_tag: str,
) -> ManagerStack:
stack_hash = node_config.get_hash()
stack_hash = node_config.get_hash(subnet_tag)

async with self._manager_stacks_locks[stack_hash]:
stack = self._manager_stacks.get(stack_hash)
Expand Down
1 change: 0 additions & 1 deletion ray_on_golem/server/cluster/nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -333,7 +333,6 @@ async def _upload_node_configuration(
await self._run_command(
context, f"echo 'export PATH=$PATH:/root/.local/bin' >> /root/.bashrc"
)
await self._run_command(context, f"echo 'source /root/venv/bin/activate' >> /root/.bashrc")

await self._run_command(context, "mkdir -p /root/.ssh")
await self._run_command(
Expand Down
4 changes: 2 additions & 2 deletions ray_on_golem/server/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ class NodeConfigData(BaseModel):
class Config:
extra = "forbid"

def get_hash(self) -> str:
return hashlib.md5(self.json().encode()).hexdigest()
def get_hash(self, subnet_tag: str) -> str:
return hashlib.md5(self.json().encode()).hexdigest() + "-" + subnet_tag


class ProviderParametersData(BaseModel):
Expand Down

0 comments on commit b17eb11

Please sign in to comment.