Skip to content

Commit

Permalink
Add serve command (#2040)
Browse files Browse the repository at this point in the history
* Add serve command

to run the HTTP server command very similar to what is used in Replicate
production. By making this command more within reach, my hope is that
more Cog users will build familiarity with and opinions about the HTTP
server interface.

Connected to PLAT-259

* We must serve the Great Alphabet 🙇

Co-authored-by: F <[email protected]>
Signed-off-by: Dan Buch <[email protected]>

* Add a note about the `serve` command to README

* Write the forwarded localhost server address in serve output

---------

Signed-off-by: Dan Buch <[email protected]>
Co-authored-by: F <[email protected]>
  • Loading branch information
meatballhat and erbridge authored Nov 8, 2024
1 parent 966af51 commit e46c4f3
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 9 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,16 @@ $ curl http://localhost:5000/predictions -X POST \
-d '{"input": {"image": "https://.../input.jpg"}}'
```

Or, combine build and run via the `serve` command:

```console
$ cog serve -p 8080
$ curl http://localhost:8080/predictions -X POST \
-H 'Content-Type: application/json' \
-d '{"input": {"image": "https://.../input.jpg"}}'
```

<!-- NOTE (bfirsh): Development environment instructions intentionally left out of readme for now, so as not to confuse the "ship a model to production" message.

In development, you can also run arbitrary commands inside the Docker environment:
Expand Down
1 change: 1 addition & 0 deletions pkg/cli/root.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ https://github.com/replicate/cog`,
newPredictCommand(),
newPushCommand(),
newRunCommand(),
newServeCommand(),
newTrainCommand(),
)

Expand Down
98 changes: 98 additions & 0 deletions pkg/cli/serve.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
package cli

import (
"runtime"
"strings"

"github.com/replicate/cog/pkg/config"
"github.com/replicate/cog/pkg/docker"
"github.com/replicate/cog/pkg/image"
"github.com/replicate/cog/pkg/util"
"github.com/replicate/cog/pkg/util/console"
"github.com/spf13/cobra"
)

var (
port = 8393
)

func newServeCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "serve",
Short: "Run a prediction HTTP server",
Long: `Run a prediction HTTP server.
Generate and run an HTTP server based on the declared model inputs and outputs.`,
RunE: cmdServe,
Args: cobra.MaximumNArgs(0),
SuggestFor: []string{"http"},
}

addBuildProgressOutputFlag(cmd)
addUseCudaBaseImageFlag(cmd)
addUseCogBaseImageFlag(cmd)
addGpusFlag(cmd)

cmd.Flags().IntVarP(&port, "port", "p", port, "Port on which to listen")

return cmd
}

func cmdServe(cmd *cobra.Command, arg []string) error {
cfg, projectDir, err := config.GetConfig(projectDirFlag)
if err != nil {
return err
}

imageName, err := image.BuildBase(cfg, projectDir, buildUseCudaBaseImage, DetermineUseCogBaseImage(cmd), buildProgressOutput)
if err != nil {
return err
}

gpus := ""
if gpusFlag != "" {
gpus = gpusFlag
} else if cfg.Build.GPU {
gpus = "all"
}

args := []string{
"python",
"--check-hash-based-pycs", "never",
"-m", "cog.server.http",
"--await-explicit-shutdown", "true",
}

runOptions := docker.RunOptions{
Args: args,
Env: envFlags,
GPUs: gpus,
Image: imageName,
Volumes: []docker.Volume{{Source: projectDir, Destination: "/src"}},
Workdir: "/src",
}

if util.IsAppleSiliconMac(runtime.GOOS, runtime.GOARCH) {
runOptions.Platform = "linux/amd64"
}

runOptions.Ports = append(runOptions.Ports, docker.Port{HostPort: port, ContainerPort: 5000})

console.Info("")
console.Infof("Running '%[1]s' in Docker with the current directory mounted as a volume...", strings.Join(args, " "))
console.Info("")
console.Infof("Serving at http://127.0.0.1:%[1]v", port)
console.Info("")

err = docker.Run(runOptions)
// Only retry if we're using a GPU but but the user didn't explicitly select a GPU with --gpus
// If the user specified the wrong GPU, they are explicitly selecting a GPU and they'll want to hear about it
if runOptions.GPUs == "all" && err == docker.ErrMissingDeviceDriver {
console.Info("Missing device driver, re-trying without GPU")

runOptions.GPUs = ""
err = docker.Run(runOptions)
}

return err
}
10 changes: 1 addition & 9 deletions test-integration/test_integration/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,17 +137,9 @@ def cog_server_http_run(project_dir: str):
server = subprocess.Popen(
[
"cog",
"run",
"-e",
f"PORT={port}",
"serve",
"-p",
str(port),
"python",
"--check-hash-based-pycs",
"never",
"-m",
"cog.server.http",
"--await-explicit-shutdown=true",
],
cwd=project_dir,
# NOTE: inheriting stdout and stderr from the parent process when running
Expand Down

0 comments on commit e46c4f3

Please sign in to comment.