From ec635800fe596ac920e6c78195d49272e608d6c5 Mon Sep 17 00:00:00 2001
From: efugier <mail@emilienfugier.net>
Date: Thu, 9 Nov 2023 15:02:19 +0100
Subject: [PATCH] feat(everything): usable config, readme, working with files
 and more

---
 Cargo.toml              |   1 +
 README.md               | 134 +++++++++++++++++++++++++++++++++++++++-
 src/config.rs           |  16 ++---
 src/cutsom_prompt.rs    |  39 ++++++++++++
 src/input_processing.rs |  17 ++---
 src/main.rs             |  88 +++++++++++++++++++++-----
 src/request.rs          |  35 +++--------
 7 files changed, 273 insertions(+), 57 deletions(-)
 create mode 100644 src/cutsom_prompt.rs
diff --git a/Cargo.toml b/Cargo.toml
index 343ec8a..2e2449f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -7,6 +7,7 @@ edition = "2021"
 
 [dependencies]
 toml = "*"
+log = "*"
 clap = { version = "*", features = ["derive"] }
 ureq = { version="*", features = ["json"] }
 serde = { version = "*", features = ["derive"] }
diff --git a/README.md b/README.md
index fed9a60..69540cc 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,134 @@
 # pipelm
-chat gpt interface form cli
+WIP cli interface to language models to bring them in the Unix ecosystem
+
+```
+Usage: pipelm [OPTIONS] [PROMPT]
+
+Arguments:
+  [PROMPT]  prompt in the config to fetch
+
+Options:
+  -c, --command <COMMAND>                custom prompt, incompatible with [PROMTP]
+  -b, --before <BEFORE>                  prefix to add before custom prompt
+  -a, --after <AFTER>                    suffix to add after the imput and the custom prompt
+  -s, --system-message <SYSTEM_MESSAGE>  a system "config" message to send before the prompt
+      --api <API>                        which api to hit [default: openai]
+  -m, --model <MODEL>                    which model (of the api) to use [default: gpt-3.5-turbo]
+  -f, --file <FILE>                      file to read input from
+  -h, --help                             Print help
+  -V, --version                          Print version
+```
+
+## A few examples to get started
+
+```
+cat Cargo.toml | pipelm -c "write a short poem about the content of the file"
+
+A file named package,
+Holds the keys of a software's age.
+With a name, version, and edition too,
+The content speaks of something new.
+
+Dependencies lie within,
+With toml, clap, ureq, and serde in,
+The stars denote any version will do,
+As long as the features are included, too.
+
+A short poem of the file's content,
+A glimpse into the software's intent.
+With these keys and dependencies,
+A program is born, fulfilling needs.
+```
+
+```
+cat my_file.json | pipelm -c "translate to yaml" > my_file.yaml
+```
+
+```
+cat my_stuff.py | pipelm \
+  -c "write a parametrized test suite for the following code using pytest" \
+  -s "output only the code, as a standalone file"
+  -b "```" -a "```" > test.py
+```
+
+If you find yourself reusing prompts often, you can create a dedicated config entries and it becomes the following:
+
+```
+cat my_stuff.py | pipelm write_tests > test.py
+```
+
+see example in the next section.
+
+## Vim
+
+You can also integrate this with your editor. For instance in Vim
+
+```
+:'<,'> | tee >(pipelm write_test)
+```
+
+will append at the end of the current selection tests written by the language model for what was selected.
+
+With some remapping you may have the whole thing attached to few keystrokes e.g. `<leader>wt`.
+
+These are only some ideas to get started
+
+# Configuration
+
+- by default lives at `$HOME/.config/pipelm`
+- the directory can be set using the `PIPELM_CONFIG_PATH` environement variable
+
+Two files are used:
+
+`.api_configs.toml`
+
+```toml
+[openai]  # each api has their own config section with api and url
+url = "https://api.openai.com/v1/chat/completions"
+api_key = "your api key"
+```
+
+`prompts.toml`
+
+```toml
+[default]  # a prompt is a section
+api = "openai"
+model = "gpt-4-1106-preview"
+
+[[default.messages]]  # then you can list messages
+role = "system"
+content = """\
+You are a poetic assistant, skilled in explaining complex programming \
+concepts with creative flair.\
+"""
+
+[[default.messages]]
+role = "user"
+# the following placeholder string #[<input>] will be replaced by the input
+# each message seeks it and replaces it
+content = "#[<input>]" 
+
+[write_test]  # a prompt is a section
+api = "openai"
+model = "gpt-4-1106-preview"
+
+[[write_test.messages]]  # then you can list messages
+role = "system"
+content = """\
+You are a very skilled programmer with an keen eye for detail. You always make sure to write clean \
+code and you value clarity particularly highly. \
+When asked for code, output only the code to write directly. Don't provide explanation.\
+"""
+
+[[write_test.messages]]
+role = "user"
+# the following placeholder string #[<input>] will be replaced by the input
+# each message seeks it and replaces it
+content ='''Write tests using pytest for the following code. Parametrized it if appropriate.
+
+#[<input>]
+'''
+```
+
+see [the config setup file](./src/config.rs) for more details.
+
diff --git a/src/config.rs b/src/config.rs
index 5475cf2..e25ba0a 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -4,7 +4,7 @@ use std::fs;
 use std::path::PathBuf;
 
 #[derive(Debug, Deserialize)]
-pub struct ServiceConfig {
+pub struct ApiConfig {
     #[serde(skip_serializing)] // internal use only
     pub api_key: String,
     pub url: String,
@@ -13,7 +13,7 @@ pub struct ServiceConfig {
 #[derive(Debug, Deserialize, Serialize)]
 pub struct Prompt {
     #[serde(skip_serializing)] // internal use only
-    pub service: String,
+    pub api: String,
     pub model: String,
     pub messages: Vec<Message>,
 }
@@ -28,7 +28,7 @@ pub const PLACEHOLDER_TOKEN: &str = "#[<input>]";
 
 const DEFAULT_CONFIG_PATH: &str = ".config/pipelm/";
 const CUSTOM_CONFIG_ENV_VAR: &str = "PIPELM_CONFIG_PATH";
-const API_KEYS_FILE: &str = ".api_keys.toml";
+const API_KEYS_FILE: &str = ".api_configs.toml";
 const PROMPT_FILE: &str = "prompts.toml";
 
 fn resolve_config_path() -> PathBuf {
@@ -38,18 +38,18 @@ fn resolve_config_path() -> PathBuf {
     }
 }
 
-pub fn get_service_config(service: &str) -> ServiceConfig {
+pub fn get_api_config(api: &str) -> ApiConfig {
     let api_keys_path = resolve_config_path().join(API_KEYS_FILE);
     let content = fs::read_to_string(&api_keys_path)
         .unwrap_or_else(|error| panic!("Could not read file {:?}, {:?}", api_keys_path, error));
 
-    let mut service_configs: HashMap<String, ServiceConfig> = toml::from_str(&content).unwrap();
+    let mut api_configs: HashMap<String, ApiConfig> = toml::from_str(&content).unwrap();
 
-    service_configs.remove(service).unwrap_or_else(|| {
+    api_configs.remove(api).unwrap_or_else(|| {
         panic!(
             "Prompt {} not found, availables ones are: {:?}",
-            service,
-            service_configs.keys().collect::<Vec<_>>()
+            api,
+            api_configs.keys().collect::<Vec<_>>()
         )
     })
 }
diff --git a/src/cutsom_prompt.rs b/src/cutsom_prompt.rs
new file mode 100644
index 0000000..fa49910
--- /dev/null
+++ b/src/cutsom_prompt.rs
@@ -0,0 +1,39 @@
+use log::debug;
+
+use crate::config::{Message, Prompt, PLACEHOLDER_TOKEN};
+
+pub fn customize_prompt(
+    mut prompt: Prompt,
+    command: &Option<String>,
+    before: &Option<String>,
+    after: &Option<String>,
+    system_message: &Option<String>,
+) -> Prompt {
+    debug!("test");
+    let empty_prompt = prompt.messages.is_empty();
+
+    if let Some(message_content) = system_message {
+        prompt.messages.push(Message {
+            role: "system".to_string(),
+            content: message_content.to_owned(),
+        });
+    }
+    if command.is_some() {
+        let mut prompt_message: String = [before, command, after]
+            .into_iter()
+            .filter_map(|x| x.to_owned())
+            .collect();
+        prompt_message.push_str(PLACEHOLDER_TOKEN);
+        prompt.messages.push(Message {
+            role: "user".to_string(),
+            content: prompt_message,
+        });
+    } else if empty_prompt {
+        // no command and an empty prompt -> use input as prompt
+        prompt.messages.push(Message {
+            role: "user".to_string(),
+            content: PLACEHOLDER_TOKEN.to_string(),
+        });
+    }
+    prompt
+}
diff --git a/src/input_processing.rs b/src/input_processing.rs
index 78add66..865c8a7 100644
--- a/src/input_processing.rs
+++ b/src/input_processing.rs
@@ -1,7 +1,9 @@
-use crate::config::{get_service_config, Prompt, PLACEHOLDER_TOKEN};
-use crate::request::{make_authenticated_request, OpenAiResponse};
+use log::debug;
 use std::io::{Read, Result, Write};
 
+use crate::config::{get_api_config, Prompt, PLACEHOLDER_TOKEN};
+use crate::request::{make_authenticated_request, OpenAiResponse};
+
 // [tmp] mostly template to write tests
 pub fn chunk_process_input<R: Read, W: Write>(
     input: &mut R,
@@ -34,7 +36,7 @@ pub fn chunk_process_input<R: Read, W: Write>(
 }
 
 pub fn process_input_with_request<R: Read, W: Write>(
-    prompt: &mut Prompt,
+    mut prompt: Prompt,
     input: &mut R,
     output: &mut W,
 ) -> Result<()> {
@@ -51,14 +53,15 @@ pub fn process_input_with_request<R: Read, W: Write>(
     for message in prompt.messages.iter_mut() {
         message.content = message.content.replace(PLACEHOLDER_TOKEN, &input)
     }
-    let service_config = get_service_config(&prompt.service);
-    let response: OpenAiResponse = make_authenticated_request(service_config, prompt)
+    let api_config = get_api_config(&prompt.api);
+    let response: OpenAiResponse = make_authenticated_request(api_config, prompt)
         .unwrap()
         .into_json()?;
 
-    println!("{}", response.choices.first().unwrap().message.content);
+    let response_text = response.choices.first().unwrap().message.content.as_str();
+    debug!("{}", &response_text);
 
-    output.write_all(input.as_bytes())?;
+    output.write_all(response_text.as_bytes())?;
 
     Ok(())
 }
diff --git a/src/main.rs b/src/main.rs
index 7895ddc..83d610c 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,10 @@
-use clap::Parser;
+use clap::{Args, Parser};
+use log::debug;
+use std::fs;
 use std::io;
+use std::io::Read;
+
+mod cutsom_prompt;
 mod input_processing;
 mod request;
 
@@ -7,19 +12,57 @@ mod request;
 mod config;
 
 #[derive(Debug, Parser)]
-#[command(author, version, about, long_about = None)]
+#[command(
+    author = "Emilien Fugier",
+    version = "0.1",
+    about = "WIP cli interface to language model to bring them in the Unix echosystem",
+    long_about = None
+)]
 struct Cli {
-    #[arg(default_value_t = String::from("default"))]
-    prompt: String,
-    #[arg(short, long, default_value_t = String::from("openai"))]
-    command: String,
+    /// prompt in the config to fetch
+    #[arg(group = "prompt_from_config")]
+    prompt: Option<String>,
+    #[command(flatten)]
+    custom_prompt_args: CustomPrompt,
+    /// a system "config" message to send before the prompt
+    #[arg(short, long)]
+    system_message: Option<String>,
+    /// which api to hit
+    #[arg(long, default_value_t = String::from("openai"))]
+    api: String,
+    #[arg(short, long, default_value_t = String::from("gpt-3.5-turbo"))]
+    /// which model (of the api) to use
+    model: String,
+    /// file to read input from
+    #[arg(short, long)]
+    file: Option<String>,
+}
+
+#[derive(Debug, Args)]
+#[group(id = "custom_prompt", conflicts_with = "prompt_from_config")]
+struct CustomPrompt {
+    /// custom prompt, incompatible with [PROMTP]
+    #[arg(short, long, group = "custom_prompt")]
+    command: Option<String>,
+    /// prefix to add before custom prompt
+    #[arg(short, long, group = "custom_prompt")]
+    before: Option<String>,
+    /// suffix to add after the imput and the custom prompt
+    #[arg(short, long, group = "custom_prompt")]
+    after: Option<String>,
 }
 
 fn main() {
     let args = Cli::parse();
 
     let mut output = io::stdout();
-    let mut input = io::stdin();
+    let mut input: Box<dyn Read> = match args.file {
+        Some(file) => Box::new(
+            fs::File::open(&file)
+                .unwrap_or_else(|error| panic!("File {} not found. {:?}", file, error)),
+        ),
+        _ => Box::new(io::stdin()),
+    };
 
     // case for testing
     // TODO: mock API
@@ -39,17 +82,30 @@ fn main() {
 
     let mut prompts = config::get_prompts();
 
-    let available_prompts: Vec<&String> = prompts.keys().collect();
-    let prompt_not_found_error = format!(
-        "Prompt {} not found, availables ones are: {:?}",
-        &args.prompt, &available_prompts
+    let prompt = match args.prompt {
+        Some(prompt) => {
+            let available_prompts: Vec<&String> = prompts.keys().collect();
+            let prompt_not_found_error = format!(
+                "Prompt {} not found, availables ones are: {:?}",
+                &prompt, &available_prompts
+            );
+            prompts.remove(&prompt).expect(&prompt_not_found_error)
+        }
+        None => config::Prompt {
+            api: args.api,
+            model: args.model,
+            messages: Vec::new(),
+        },
+    };
+    let prompt = cutsom_prompt::customize_prompt(
+        prompt,
+        &args.custom_prompt_args.command,
+        &args.custom_prompt_args.before,
+        &args.custom_prompt_args.after,
+        &args.system_message,
     );
 
-    let prompt = prompts
-        .get_mut(&args.prompt)
-        .expect(&prompt_not_found_error);
-
-    println!("{:?}", prompt);
+    debug!("{:?}", prompt);
 
     if let Err(e) = input_processing::process_input_with_request(prompt, &mut input, &mut output) {
         eprintln!("Error: {}", e);
diff --git a/src/request.rs b/src/request.rs
index 0e50d13..b1d2459 100644
--- a/src/request.rs
+++ b/src/request.rs
@@ -1,6 +1,8 @@
+use log::debug;
 use serde::{Deserialize, Serialize};
+use std::fmt::Debug;
 
-use crate::config::ServiceConfig;
+use crate::config::ApiConfig;
 
 #[derive(Debug, Deserialize)]
 pub struct Message {
@@ -30,34 +32,17 @@ pub struct OpenAiResponse {
     pub model: String,
     pub choices: Vec<Choice>,
     pub usage: Usage,
-    pub system_fingerprint: String,
+    pub system_fingerprint: Option<String>,
 }
 
 pub fn make_authenticated_request(
-    service_config: ServiceConfig,
-    data: impl Serialize,
+    api_config: ApiConfig,
+    data: impl Serialize + Debug,
 ) -> Result<ureq::Response, ureq::Error> {
-    println!("Trying to reach openai with {}", service_config.api_key);
-    ureq::post(&service_config.url)
+    debug!("Trying to reach openai with {}", api_config.api_key);
+    debug!("request content: {:?}", data);
+    ureq::post(&api_config.url)
         .set("Content-Type", "application/json")
-        .set(
-            "Authorization",
-            &format!("Bearer {}", service_config.api_key),
-        )
+        .set("Authorization", &format!("Bearer {}", api_config.api_key))
         .send_json(data)
-    //     .send_json(ureq::json!(
-    //         {
-    //     "model": "gpt-4-1106-preview",
-    //     "messages": [
-    //       {
-    //         "role": "system",
-    //         "content": "You are a poetic assistant, skilled in explaining complex programming concepts with creative flair."
-    //       },
-    //       {
-    //         "role": "user",
-    //         "content": data.messages.last().unwrap().content
-    //       }
-    //     ]
-    //     })
-    // )
 }