Merge pull request #145 from stanford-crfm/jonathan/1124-weekly-assets

weekly update
stanford-crfm · Jan 9, 2024 · 3a2c2ba · 3a2c2ba
2 parents e1fbc20 + 58c0385
commit 3a2c2ba
Show file tree

Hide file tree

Showing 8 changed files with 243 additions and 11 deletions.
diff --git a/assets/01ai.yaml b/assets/01ai.yaml
@@ -1,22 +1,22 @@
 ---
 - type: model
   name: Yi
-  organization: 01.AI
-  description: Yi is a LLM that can accept input/outputs in both English and Chinese.
+  organization: 01 AI
+  description: The Yi series models are large language models trained from scratch by developers at 01 AI.
   created_date: 2023-11-02
   url: https://github.com/01-ai/Yi
   model_card: https://huggingface.co/01-ai/Yi-34B
   modality: text; text
-  analysis: Evaluated on common sense reasoning and reading comprehension, analogous to LLaMA 2's analysis. 
+  analysis: Evaluated on standard language benchmarks, common sense reasoning, and reading comprehension in comparison to SoTA LLMs. 
   size: 34B parameters (dense)
   dependencies: []
   training_emissions: unknown
   training_time: unknown
   training_hardware: unknown
-  quality_control: ''
+  quality_control: Model underwent supervised fine-tuning, leading to a greater diversity of responses.
   access: open
   license: Apache 2.0
-  intended_uses: Academic research and free commercial usage
-  prohibited_uses: ''
-  monitoring: none
+  intended_uses: ''
+  prohibited_uses: none
+  monitoring: unknown
   feedback: https://huggingface.co/01-ai/Yi-34B/discussions
diff --git a/assets/ai2.yaml b/assets/ai2.yaml
@@ -115,3 +115,97 @@
   monitoring: Quality filtration, deduplication, and risk mitigation via logistic
     qualifiers and regular expressions used.
   feedback: ''
+
+- type: dataset
+  name: Tulu-V2-mix
+  organization: AI2
+  description: Tulu-V2-mix is a dataset composed of many high-quality instruction datasets that results in stronger performance across a variety of reasoning and knowledge-probing tasks.
+  created_date: 2023-11-20
+  url: https://arxiv.org/pdf/2311.10702.pdf
+  datasheet: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture
+  modality: text
+  size:
+    value: unknown
+    explanation: Magnitude of size is around 100M tokens, given the length distribution of dataset provided in model card.
+  sample: []
+  analysis: Models trained with dataset evaluated on downstream performance.
+  dependencies: [FLAN Collection, Open Assistant 1, ShareGPT, Alpaca dataset, Code Alpaca, LIMA, WizardLM, OpenOrca]
+  included: ''
+  excluded: ''
+  quality_control: ''
+  access: open
+  license: ODC-BY
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/datasets/allenai/tulu-v2-sft-mixture/discussions
+
+- type: model
+  name: Tulu 2
+  organization: AI2
+  description: Tulu 2 is a language model trained on the new Tulu-v2-mix dataset and fine-tuned on more state of the art language models.
+  created_date: 2023-11-20
+  url: https://arxiv.org/pdf/2311.10702.pdf
+  model_card: https://huggingface.co/allenai/tulu-2-70b
+  modality: text; text
+  analysis: Evaluated on MT-Bench and AlpacaEval.
+    compared to other chatbots.
+  size: 70B parameters (dense)
+  dependencies: [LLaMA 2, Tulu-V2-mix]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: AI2 ImpACT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/tulu-2-70b/discussions
+
+- type: model
+  name: Tulu 2 DPO
+  organization: AI2
+  description: Tulu 2 DPO is created in a similar manner to Tulu 2, but with Direct Preference Optimization (DPO).
+  created_date: 2023-11-20
+  url: https://arxiv.org/pdf/2311.10702.pdf
+  model_card: https://huggingface.co/allenai/tulu-2-dpo-70b
+  modality: text; text
+  analysis: Evaluated on MT-Bench and AlpacaEval.
+    compared to other chatbots.
+  size: 70B parameters (dense)
+  dependencies: [LLaMA 2, Tulu-V2-mix]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: AI2 ImpACT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/tulu-2-dpo-70b/discussions
+
+- type: model
+  name: Code Tulu 2
+  organization: AI2
+  description: Code Tulu 2 is a fine-tuned version of Code LLaMA that was trained on a mix of publicly available, synthetic and human datasets.
+  created_date: 2023-11-20
+  url: https://arxiv.org/pdf/2311.10702.pdf
+  model_card: https://huggingface.co/allenai/codetulu-2-13b
+  modality: text; code, text
+  analysis: Evaluated on MT-Bench and AlpacaEval.
+    compared to other chatbots.
+  size: 13B parameters (dense)
+  dependencies: [Code LLaMA, Tulu-V2-mix]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: open
+  license: AI2 ImpACT
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions
+
diff --git a/assets/deepseek.yaml b/assets/deepseek.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Deepseek
+  organization: Deepseek AI
+  description: Deepseek is a 67B parameter model with Grouped-Query Attention trained on 2 trillion tokens from scratch.
+  created_date: 2023-11-29
+  url: https://github.com/deepseek-ai/DeepSeek-LLM
+  model_card: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base
+  modality: text; text
+  analysis: Deepseek and baseline models (for comparison) evaluated on a series of representative benchmarks, both in English and Chinese.
+  size: 67B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: Training dataset comprised of diverse data composition and pruned and deduplicated.
+  access: open
+  license: MIT
+  intended_uses: ''
+  prohibited_uses: none
+  monitoring: unknown
+  feedback: https://huggingface.co/deepseek-ai/deepseek-llm-67b-base/discussions
diff --git a/assets/inflection.yaml b/assets/inflection.yaml
@@ -45,3 +45,26 @@
   monthly_active_users: ''
   user_distribution: ''
   failures: ''
+
+- type: model
+  name: Inflection-2
+  organization: Inflection AI
+  description: Inflection-2 is the best model in the world for its compute class and the second most capable LLM in the world, according to benchmark evaluation, as of its release.
+  created_date: 2023-11-22
+  url: https://inflection.ai/inflection-2
+  model_card: none
+  modality: text; text
+  analysis: Evaluated against state of the art models on benchmarks, and found to be most performant model outside of GPT-4.
+  size: unknown
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: 5000 NVIDIA H100 GPUs
+  quality_control: ''
+  access: closed
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
+
diff --git a/assets/meta.yaml b/assets/meta.yaml
@@ -695,6 +695,28 @@
   monitoring: ''
   feedback: ''
 
+- type: model
+  name: Code LLaMA
+  organization: Meta
+  description: Code Llama is a collection of pretrained and fine-tuned generative text models ranging in scale from 7 billion to 34 billion parameters.
+  created_date: 2023-08-24
+  url: https://ai.meta.com/research/publications/code-llama-open-foundation-models-for-code/
+  model_card: https://huggingface.co/codellama/CodeLlama-34b-hf
+  modality: text; code, text
+  analysis: Evaluated on several code benchmarks like HumanEval and MBPP.
+  size: 34B parameters (dense)
+  dependencies: [LLaMA 2]
+  training_emissions: 65.3 tCO2eq
+  training_time: 400K GPU hours
+  training_hardware: A100-80GB GPUs
+  quality_control: ''
+  access: open
+  license: LLaMA 2
+  intended_uses: Code Llama and its variants is intended for commercial and research use in English and relevant programming languages.
+  prohibited_uses: Use in any manner that violates applicable laws or regulations (including trade compliance laws). Use in languages other than English. Use in any other way that is prohibited by the Acceptable Use Policy and Licensing Agreement for Code Llama and its variants.
+  monitoring: ''
+  feedback: https://huggingface.co/allenai/codetulu-2-13b/discussions
+
 - type: model
   name: Emu Video
   organization: Meta
@@ -765,3 +787,4 @@
   prohibited_uses: ''
   monitoring: none
   feedback: none
+
diff --git a/assets/qwen.yaml b/assets/qwen.yaml
@@ -0,0 +1,22 @@
+---
+- type: model
+  name: Qwen
+  organization: Qwen AI
+  description: Qwen is a Transformer-based large language model, which is pretrained on a large volume of data, including web texts, books, codes, etc.
+  created_date: 2023-11-26
+  url: https://arxiv.org/pdf/2309.16609.pdf
+  model_card: https://huggingface.co/Qwen/Qwen-72B
+  modality: text; text
+  analysis: Evaluated on MMLU, C-Eval, GSM8K, MATH, HumanEval, MBPP, BBH, CMMLU, which are currently popular benchmarks, to test the model’s Chinese and English knowledge capabilities, translation, mathematical reasoning, coding and other capabilities.
+  size: 72B parameters (dense)
+  dependencies: []
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: none
+  access: open
+  license: Apache 2.0
+  intended_uses: ''
+  prohibited_uses: none
+  monitoring: unknown
+  feedback: https://huggingface.co/Qwen/Qwen-72B/discussions
diff --git a/assets/stability.yaml b/assets/stability.yaml
@@ -94,6 +94,51 @@
   monthly_active_users: ''
   user_distribution: ''
   failures: ''
+
+- type: model
+  name: Stable Video Diffusion
+  organization: Stability AI
+  description: Stable Video Diffusion is a latent diffusion model trained to generate short video clips from an image conditioning.
+  created_date: 2023-11-21
+  url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf
+  model_card: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt
+  modality: text; video
+  analysis: Evaluated via a user study comparing preferences between Stable Video Diffusion and competing text-to-video models.
+  size: unknown
+  dependencies: [Large Video Dataset]
+  training_emissions: unknown
+  training_time: unknown
+  training_hardware: unknown
+  quality_control: ''
+  access: limited
+  license: unknown
+  intended_uses: Intended for research purposes only.
+  prohibited_uses: Using the model to generate representations of real-world people or events. 
+  monitoring: ''
+  feedback: https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt/discussions
+
+- type: dataset
+  name: Large Video Dataset
+  organization: Stability AI
+  description: Large Video Dataset is the dataset that trained Stable Video Diffusion, consisting of over 212 years of content.
+  created_date: 2023-11-21
+  url: https://static1.squarespace.com/static/6213c340453c3f502425776e/t/655ce779b9d47d342a93c890/1700587395994/stable_video_diffusion.pdf
+  datasheet: ''
+  modality: video with caption
+  size: 580M annotated video clip pairs
+  sample: []
+  analysis: Large Video Dataset compared to publicly available research datasets on general statistics before and after filtering.
+  dependencies: [WebVid-10M, CoCa, V-BLIP]
+  included: ''
+  excluded: ''
+  quality_control: Dataset annotated with dense optical flow, and low optical flow videos are removed.
+  access:  closed
+  license: unknown
+  intended_uses: ''
+  prohibited_uses: ''
+  monitoring: ''
+  feedback: none
+
 - type: application
   name: Sky Replacer
   organization: Stability AI
@@ -114,3 +159,4 @@
   monthly_active_users: ''
   user_distribution: ''
   failures: ''
+
diff --git a/assets/xai.yaml b/assets/xai.yaml
@@ -2,19 +2,21 @@
 - type: model
   name: Grok-1
   organization: xAI
-  description: Grok is an AI modeled after the Hitchhiker’s Guide to the Galaxy, intended to answer almost anything and even suggest what questions to ask.
+  description: Grok is a text chatbot modeled after the Hitchhiker’s Guide to the Galaxy, so intended to answer almost anything and even suggest what questions to ask.
   created_date: 2023-11-04
-  url: https://x.ai/
+  url: https://grok.x.ai/
   model_card: https://x.ai/model-card/
   modality: text; text
   analysis: Grok-1 was evaluated on a range of reasoning benchmark tasks and on curated foreign mathematic examination questions.
-  size: unknown
+  size: 
+    explanation: No model size specified, but Grok-1 is larger than predecessor Grok-0 (33B parameters), as stated in the Grok announcement at https://x.ai/.
+    value: unknown
   dependencies: []
   training_emissions: unknown
   training_time: unknown
   training_hardware: unknown
   quality_control: none
-  access: closed
+  access: limited
   license: unknown
   intended_uses: Grok-1 is intended to be used as the engine behind Grok for natural language processing tasks including question answering, information retrieval, creative writing and coding assistance.
   prohibited_uses: none