diff --git a/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitecturePrompt.java b/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitecturePrompt.java index 16b26d5..1e85cbc 100644 --- a/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitecturePrompt.java +++ b/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitecturePrompt.java @@ -10,7 +10,12 @@ public enum LLMArchitecturePrompt { %s """, - "Now provide a list that only covers the component names. Omit common prefixes and suffixes in the names."), // + """ + Now provide a list that only covers the component names in camel case. Omit common prefixes and suffixes. + Output format: + - Name1 + - Name2 + """), // CODE_ONLY_V1( """ You get the Packages of a software project. Your task is to summarize the Packages w.r.t. the high-level architecture of the system. Try to identify possible components. @@ -19,10 +24,18 @@ public enum LLMArchitecturePrompt { %s """, - "Now provide a list that only covers the component names. Omit common prefixes and suffixes in the names."), // + """ + Now provide a list that only covers the component names. Omit common prefixes and suffixes in the names in camel case. + Output format: + - Name1 + - Name2 + """), // AGGREGATION_V1(""" You get a list of possible component names. Your task is to aggregate the list and remove duplicates. - Also filter out component names that are very generic. Do not repeat what you filtered out. Only provide the final enumeration. + Also filter out component names that are very generic. Provide only the final component names in camel case. + Output format: + - Name1 + - Name2 Possible component names: diff --git a/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitectureProviderInformant.java b/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitectureProviderInformant.java index 5b9f3b9..a7b2085 100644 --- a/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitectureProviderInformant.java +++ b/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LLMArchitectureProviderInformant.java @@ -81,9 +81,8 @@ protected void process() { // Remove any not letter characters componentNames = componentNames.stream() - .map(it -> it.replaceAll("[^a-zA-Z0-9 \\-_]", "").replaceAll("\\s+", " ").trim()) + // .map(it -> it.replaceAll("[^a-zA-Z0-9 \\-_]", "").replaceAll("\\s+", " ").trim()) .map(it -> it.replace("Components", "").replace("Component", "").trim()) - .map(it -> it.replace(" ", "")) .filter(it -> !it.isBlank()) .distinct() .sorted() @@ -162,31 +161,19 @@ private void parseComponentNames(String response, List componentNames) { } line = line.trim(); - // Version 5: 1. Name (NotImportant) or 2. Name (SomeString) - if (line.matches("^\\d+\\.\\s+.+\\s*\\(.*\\)$")) { - componentNames.add(line.split("\\.\\s+")[1].split("\\s*\\(.*\\)")[0]); - } - // Version 1: 1. **Name** or 2. **Name** - else if (line.matches("^\\d+\\.\\s*\\*\\*.*\\*\\*$")) { - componentNames.add(line.split("\\*\\*")[1]); - } - // Version 2: 1. Name or 2. Name - else if (line.matches("^\\d+\\.\\s+.*$")) { - componentNames.add(line.split("\\d+\\.\\s+")[1]); - } - // Version 3: - **Name** - else if (line.matches("^([-*])\\s+\\*\\*.*\\*\\*$")) { - componentNames.add(line.split("\\*\\*")[1]); - } - // Version 4: - Name - else if (line.matches("^([-*])\\s+.*$")) { - componentNames.add(line.split("([-*])\\s+")[1]); - } - // Version 5: Name, Name2, Name3, (at least 4 names .. otherwise we match to many things) - else if (line.matches("(.*,\\s+){3,}.*")) { - componentNames.addAll(List.of(line.split(",\\s+"))); - } else { - logger.warn("Could not parse line: {}", line); + if (line.startsWith("-")) { + // Defined Format "- Name1" + var name = line.substring(1).trim(); + componentNames.add(name); + } /* else if (Character.isDigit(line.charAt(0)) && line.contains(".")) { + // Fallback Format: 1. Name + var name = line.split("\\.", 2)[1].trim(); + // We defined camel case ... so all after the space might be additional information + if (name.contains(" ")) + name = name.split(" ", 2)[0].trim(); + componentNames.add(name); + }*/ else { + logger.warn("Could not parse component name: {}", line); } } } diff --git a/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LargeLanguageModel.java b/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LargeLanguageModel.java index af6aae7..6c6d594 100644 --- a/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LargeLanguageModel.java +++ b/stages-tlr/model-provider/src/main/java/edu/kit/kastel/mcse/ardoco/tlr/models/informants/LargeLanguageModel.java @@ -69,7 +69,13 @@ private static ChatLanguageModel createOpenAiModel(String model) { if (apiKey == null || orgId == null) { throw new IllegalArgumentException("OPENAI_API_KEY and OPENAI_ORG_ID must be set as environment variables"); } - return new OpenAiChatModel.OpenAiChatModelBuilder().modelName(model).apiKey(apiKey).organizationId(orgId).seed(SEED).temperature(0.0).build(); + return new OpenAiChatModel.OpenAiChatModelBuilder().modelName(model) + .apiKey(apiKey) + .organizationId(orgId) + .seed(SEED) + .temperature(0.0) + .timeout(Duration.ofMinutes(10)) + .build(); } private static ChatLanguageModel createOllamaModel(String model) {