diff --git a/README.md b/README.md index 8fefd8b..4cf41e4 100755 --- a/README.md +++ b/README.md @@ -10,7 +10,7 @@ tmi-web is a social science research tool for managing, analyzing, coding, and v bundle exec sidekiq -### Clear sidekiq queue +### Clear sidekiq (background job) queue Sidekiq.redis(&:flushdb) @@ -31,6 +31,12 @@ tmi-web is a social science research tool for managing, analyzing, coding, and v rake neo4j:migrate +### Generate rdocs + + rdoc -op doc + +## Dashboards + ### neo4j hosting https://console.neo4j.io diff --git a/app/models/category.rb b/app/models/category.rb index b522fc6..4ea6bb7 100644 --- a/app/models/category.rb +++ b/app/models/category.rb @@ -1,40 +1,47 @@ +# A Category is a label applied to a group of related Codes within a provided context. +# For example, a category may refer to a subset of the codes related to "age". +# Categories are machine-derived. As such, they are influenced by biases in external training data. +# Careful human discernment of categories is required to identify and address these biases. + class Category - # A Category is a label applied to a group of related Codes within a provided context. - # For example, a category may refer to a subset of the codes related to "age". - # Categories are machine-derived. As such, they are influenced by biases in external training data. - # Careful human discernment of categories is required to identify and address these biases. - include ActiveGraph::Node - + property :name property :context - + validates :name, presence: true validates :context, presence: true has_many :out, :codes, rel_class: :CategorizedAs, dependent: :delete_orphans - PROMPT = %{ + PROMPT = %{ You are a social researcher doing data analysis. Please generate a list of the 20 most relevant themes from the following list of codes. The themes should be all lowercase and contain no punctuation. Codes should be stripped of quotation marks. Return each code with an array of its categories in JSON format. Use this JSON as the format: - - { + + { "themes" : [ - { + { "theme": "foo", "codes": [ "bar", "bat", "baz"] } ] } - - The codes are as follows: + + The codes are as follows: } + # Regenerates Category objects based on codes within a given context. + # This method uses the Clients::OpenAi client passing the codes as an argument to the prompt. + # The agent returns an array of themes, which are then captured as Category objects. + # + # @param context [String] the context value to use to filter codes for categorization. + # @return [String] the JSON response returned by the API. + def self.from(context) codes = Code.where(context: context) response = Clients::OpenAi.request("#{PROMPT} #{codes.map(&:name).join(',')}") return unless response['themes'] - + Category.where(context: context).destroy_all response['themes'].each do |record| @@ -50,5 +57,5 @@ def self.from(context) def self.histogram(context) where(context: context).inject({}) { |acc, category| acc[category.name] = category.codes.count; acc } end - -end + +end diff --git a/lib/clients/open_ai.rb b/lib/clients/open_ai.rb index e2b4a79..e45995d 100644 --- a/lib/clients/open_ai.rb +++ b/lib/clients/open_ai.rb @@ -5,7 +5,7 @@ class Clients::OpenAi # # @param prompt [String] the text of the prompt to send to the model. # @return [String] the JSON response returned by the API. - + def self.request(prompt) client = OpenAI::Client.new response = client.chat( @@ -15,7 +15,7 @@ def self.request(prompt) messages: [{ role: "user", content: prompt }], temperature: 0.7, } - ) + ) return JSON.parse(response.dig("choices", 0, "message", "content")) end