diff --git a/README.md b/README.md
index 7bb263821..fb66a89a5 100644
--- a/README.md
+++ b/README.md
@@ -83,34 +83,21 @@ is to ensure that there is a well-defined interface between their output and
 user-defined code. **Outlines** provides ways to control the generation of
 language models to make their output more predictable.
 
-### Early stopping
-
-You can stop the generation after a given sequence has been found:
-
-``` python
-import outlines.text.generate as generate
-import outlines.models as models
-
-model = models.transformers("gpt2")
-answer = generate.continuation(model, stop=["."])("Tell me a one-sentence joke.")
-```
-
 ### Multiple choices
 
 You can reduce the completion to a choice between multiple possibilities:
 
 ``` python
-import outlines.text.generate as generate
-import outlines.models as models
+import outlines
 
-model = models.transformers("gpt2")
+model = outlines.models.transformers("gpt2")
 
 prompt = """You are a sentiment-labelling assistant.
 Is the following review positive or negative?
 
 Review: This restaurant is just awesome!
 """
-answer = generate.choice(model, ["Positive", "Negative"])(prompt)
+answer = outlines.generate.choice(model, ["Positive", "Negative"])(prompt)
 ```
 
 ### Type constraint
@@ -119,16 +106,15 @@ You can instruct the model to only return integers or floats:
 
 
 ``` python
-import outlines.text.generate as generate
-import outlines.models as models
+import outlines
 
-model = models.transformers("gpt2")
+model = outlines.models.transformers("gpt2")
 
 prompt = "1+1="
-answer = generate.integer(model)(prompt)
+answer = outlines.generate.format(model, int)(prompt)
 
 prompt = "sqrt(2)="
-answer = generate.float(model)(prompt)
+answer = outlines.generate.format(model, float)(prompt)
 ```
 
 ### Efficient regex-guided generation
@@ -138,15 +124,13 @@ Outlines also comes with fast regex-guided generation. In fact, the `choice`,
 hood:
 
 ``` python
-import outlines.models as models
-import outlines.text.generate as generate
+import outlines
 
-
-model = models.transformers("gpt2-medium")
+model = outlines.models.transformers("gpt2-medium")
 
 prompt = "Is 1+1=2? "
-unguided = generate.continuation(model, max_tokens=30)(prompt)
-guided = generate.regex(model, r"\s*([Yy]es|[Nn]o|[Nn]ever|[Aa]lways)", max_tokens=30)(
+unguided = outlines.generate.continuation(model, max_tokens=30)(prompt)
+guided = outlines.generate.regex(model, r"\s*([Yy]es|[Nn]o|[Nn]ever|[Aa]lways)", max_tokens=30)(
     prompt
 )
 
@@ -162,15 +146,13 @@ print(guided)
 ```
 
 ``` python
-import outlines.models as models
-import outlines.text.generate as generate
-
+import outlines
 
-model = models.transformers("gpt2-medium")
+model = outlines.models.transformers("gpt2-medium")
 
 prompt = "What is the IP address of the Google DNS servers? "
 unguided = generate.continuation(model, max_tokens=30)(prompt)
-guided = generate.regex(
+guided = outlines.generate.regex(
     model,
     r"((25[0-5]|2[0-4]\d|[01]?\d\d?)\.){3}(25[0-5]|2[0-4]\d|[01]?\d\d?)",
     max_tokens=30,
@@ -199,9 +181,7 @@ Outlines 〰 allows to guide the generation process so the output is *guaranteed
 from enum import Enum
 from pydantic import BaseModel, constr
 
-import outlines.models as models
-import outlines.text.generate as generate
-
+import outlines
 import torch
 
 
@@ -228,10 +208,10 @@ class Character(BaseModel):
     strength: int
 
 
-model = models.transformers("gpt2", device="cuda")
+model = outlines.models.transformers("gpt2", device="cuda")
 
 # Construct guided sequence generator
-generator = generate.json(model, Character, max_tokens=100)
+generator = outlines.generate.json(model, Character, max_tokens=100)
 
 # Draw a sample
 rng = torch.Generator(device="cuda")
@@ -269,14 +249,14 @@ The method works with union types, optional types, arrays, nested schemas, etc.
 Outlines can infer the structure of the output from the signature of a function. The result is a dictionary, and can be passed directly to the function using the usual dictionary expansion syntax `**`:
 
 ```python
-from outlines import models
-from outlines import text
+import outlines
+
 
 def add(a: int, b: int):
     return a + b
 
-model = models.transformers("mistralai/Mistral-7B")
-generator = text.generate.json(model, add)
+model = outlines.models.transformers("mistralai/Mistral-7B")
+generator = outlines.generate.json(model, add)
 result = generator("Return two integers named a and b respectively. a is odd and b even.")
 
 print(add(**result))
@@ -300,9 +280,7 @@ Template functions require no superfluous abstraction, they use the Jinja2
 templating engine to help build complex prompts in a concise manner:
 
 ``` python
-import outlines.text as text
-import outlines.models as models
-
+import outlines
 
 examples = [
     ("The food was digusting", "Negative"),
@@ -311,7 +289,7 @@ examples = [
     ("The waiter was rude", "Negative")
 ]
 
-@text.prompt
+@outlines.prompt
 def labelling(to_label, examples):
     """You are a sentiment-labelling assistant.
 
@@ -321,9 +299,9 @@ def labelling(to_label, examples):
     {{ to_label }} //
     """
 
-model = models.transformers("gpt2")
+model = outlines.models.transformers("gpt2")
 prompt = labelling("Just awesome", examples)
-answer = text.generate.continuation(model, max_tokens=100)(prompt)
+answer = outlines.generate.continuation(model, max_tokens=100)(prompt)
 ```
 
 ### Tools
@@ -337,7 +315,7 @@ extract the function's name, description, signature and source:
 
 ``` python
 from typing import Callable, List
-import outlines.text as text
+import outlines
 
 
 def google_search(query: str):
@@ -350,7 +328,7 @@ def wikipedia_search(query: str):
     pass
 
 
-@text.prompt
+@outlines.prompt
 def my_commands(tools: List[Callable]):
     """AVAILABLE COMMANDS:
 
@@ -374,7 +352,7 @@ extract the expected response's schema:
 
 ``` python
 from pydantic import BaseModel, Field
-import outlines.text as text
+import outlines
 
 
 class Joke(BaseModel):
@@ -384,7 +362,7 @@ class Joke(BaseModel):
     )
 
 
-@text.prompt
+@outlines.prompt
 def joke_ppt(response_model):
     """Tell a joke and explain why the joke is funny.
 
diff --git a/docs/examples/chain_of_density.md b/docs/examples/chain_of_density.md
index 3eb5f4a12..0ee36ad45 100644
--- a/docs/examples/chain_of_density.md
+++ b/docs/examples/chain_of_density.md
@@ -29,9 +29,9 @@ The prompt also asks the model to return a list of JSON objects that contain the
 We can now implement the prompt provided in the paper:
 
 ```python
-from outlines import text
+import outlines
 
-@text.prompt
+@outlines.prompt
 def chain_of_density(article):
     """Article: {{ article }}
 
@@ -86,12 +86,10 @@ class Summaries(BaseModel):
 We now generate the prompt by passing the article we want to summarize to the template. We load a quantized version of Mistral-7B using the AutoAWQ library, and then use JSON-guided generation to generate the summaries:
 
 ```python
-from outlines import models
-
-model = models.awq("TheBloke/Mistral-7B-OpenOrca-AWQ")
+model = outlines.models.awq("TheBloke/Mistral-7B-OpenOrca-AWQ")
 
 prompt = chain_of_density(article)
-result = text.generate.json(model, Summaries)(prompt)
+result = outlines.generate.json(model, Summaries)(prompt)
 ```
 
 We can now check the results:
diff --git a/docs/examples/dating_profiles.md b/docs/examples/dating_profiles.md
index 3b365431f..894a0a1fa 100644
--- a/docs/examples/dating_profiles.md
+++ b/docs/examples/dating_profiles.md
@@ -10,8 +10,7 @@ import torch
 import transformers
 from pydantic import BaseModel, conlist, constr
 
-import outlines.models as models
-import outlines.text as text
+import outlines
 ```
 
 ## Defining the profile with Pydantic
@@ -59,7 +58,7 @@ We will use Outlines' prompt templating abilities to generate the prompt for us.
 
 ```python
 
-@text.prompt
+@outlines.prompt
 def dating_profile_prompt(description: str, examples: list[Example]):
     """
     You are a world-renowned matchmaker who understands the modern dating
@@ -136,7 +135,7 @@ config = transformers.AutoConfig.from_pretrained(
     "mosaicml/mpt-7b-8k-instruct", trust_remote_code=True
 )
 config.init_device = "meta"
-model = models.transformers(
+model = outlines.models.transformers(
     model_name="mosaicml/mpt-7b-8k-instruct",
     device="cuda",
     model_kwargs={
@@ -163,7 +162,7 @@ it's  a good excuse for a date. I watch the latest series because I'm paying,
 with my hard-earned money, for every streaming service."""
 
 prompt = dating_profile_prompt(new_description, samples)
-profile = text.generate.json(model, DatingProfile)(prompt)
+profile = outlines.generate.json(model, DatingProfile)(prompt)
 parsed_profile = DatingProfile.model_validate_json(profile)
 ```
 
diff --git a/outlines/__init__.py b/outlines/__init__.py
index d211706d5..7c8414af0 100644
--- a/outlines/__init__.py
+++ b/outlines/__init__.py
@@ -1,5 +1,6 @@
 """Outlines is a Generative Model Programming Framework."""
 import outlines.generate
+import outlines.models
 import outlines.text.generate
 from outlines.base import vectorize
 from outlines.caching import clear_cache, disable_cache, get_cache
diff --git a/outlines/models/openai.py b/outlines/models/openai.py
index 45509a9f4..47e83345b 100644
--- a/outlines/models/openai.py
+++ b/outlines/models/openai.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-import outlines
+from outlines.base import vectorize
 from outlines.caching import cache
 
 __all__ = ["OpenAI", "openai"]
@@ -282,7 +282,7 @@ def __repr__(self):
 
 
 @cache(ignore="client")
-@functools.partial(outlines.vectorize, signature="(),(),(),()->(s),()")
+@functools.partial(vectorize, signature="(),(),()->(s)")
 async def generate_chat(
     prompt: str,
     system_prompt: Union[str, None],