From 5c4a45bf5398651a92f288eefe3605a963a9d580 Mon Sep 17 00:00:00 2001 From: Jerome Lelong Date: Wed, 25 Dec 2024 18:06:58 +0100 Subject: [PATCH] Parse glossary bib files to populate intellisense --- src/completion/completer/citation.ts | 5 +- src/completion/completer/glossary.ts | 147 +++++++++++++++++++++++---- src/core/cache.ts | 37 +++++++ src/outline/structure/bibtex.ts | 2 +- src/types.ts | 2 + 5 files changed, 169 insertions(+), 24 deletions(-) diff --git a/src/completion/completer/citation.ts b/src/completion/completer/citation.ts index b54d1307a..59818cd84 100644 --- a/src/completion/completer/citation.ts +++ b/src/completion/completer/citation.ts @@ -44,7 +44,10 @@ export const bibTools = { parseAbbrevations } -function expandField(abbreviations: {[key: string]: string}, value: bibtexParser.FieldValue): string { +function expandField(abbreviations: {[key: string]: string}, value: bibtexParser.FieldValue | undefined): string { + if (value === undefined) { + return '' + } if (value.kind === 'concat') { const args = value.content as bibtexParser.FieldValue[] return args.map(arg => expandField(abbreviations, arg)).join(' ') diff --git a/src/completion/completer/glossary.ts b/src/completion/completer/glossary.ts index fda300e98..09b1807ae 100644 --- a/src/completion/completer/glossary.ts +++ b/src/completion/completer/glossary.ts @@ -1,11 +1,14 @@ import * as vscode from 'vscode' import type * as Ast from '@unified-latex/unified-latex-types' +import { bibtexParser } from 'latex-utensils' import { lw } from '../../lw' import { GlossaryType } from '../../types' import type { CompletionProvider, FileCache, GlossaryItem } from '../../types' import { argContentToStr } from '../../utils/parser' import { getLongestBalancedString } from '../../utils/utils' +import { bibTools } from './citation' +const logger = lw.log('Intelli', 'Glossary') export const provider: CompletionProvider = { from } export const glossary = { parse, @@ -13,17 +16,19 @@ export const glossary = { } const data = { + // The keys are the labels of the glossary items. glossaries: new Map(), - acronyms: new Map() + acronyms: new Map(), + // The keys are the paths of the `.bib` files. + bibEntries: new Map(), } -interface GlossaryEntry { - label: string | undefined, - description: string | undefined -} +lw.watcher.bib.onCreate(uri => parseBibFile(uri.fsPath)) +lw.watcher.bib.onChange(uri => parseBibFile(uri.fsPath)) +lw.watcher.bib.onDelete(uri => removeEntriesInFile(uri.fsPath)) function from(result: RegExpMatchArray): vscode.CompletionItem[] { - updateAll() + updateAll(getIncludedBibs(lw.root.file.path)) let suggestions: Map if (result[1] && result[1].match(/^ac/i)) { @@ -38,14 +43,58 @@ function from(result: RegExpMatchArray): vscode.CompletionItem[] { } function getItem(token: string): GlossaryItem | undefined { - updateAll() + updateAll(getIncludedBibs(lw.root.file.path)) return data.glossaries.get(token) || data.acronyms.get(token) } -function updateAll() { +/** + * Returns the array of the paths of glossary `.bib` files referenced from `file`. + * + * @param file The path of a LaTeX file. + * @param visitedTeX Internal use only. + */ +function getIncludedBibs(file?: string, visitedTeX: string[] = []): string[] { + if (file === undefined) { + return [] + } + const cache = lw.cache.get(file) + if (cache === undefined) { + return [] + } + let bibs = Array.from(cache.glossarybibfiles) + visitedTeX.push(file) + for (const child of cache.children) { + if (visitedTeX.includes(child.filePath)) { + // Already included + continue + } + bibs = Array.from(new Set(bibs.concat(getIncludedBibs(child.filePath, visitedTeX)))) + } + return bibs +} + +/** + * Returns aggregated glossary entries from `.bib` files and glossary items defined on LaTeX files included in the root file. + * + * @param bibFiles The array of the paths of `.bib` files. If `undefined`, the keys of `bibEntries` are used. + */ +function updateAll(bibFiles: string[]) { // Extract cached references const glossaryList: string[] = [] + // From bib files + bibFiles.forEach(file => { + const entries = data.bibEntries.get(file) + entries?.forEach(entry => { + if (entry.type === GlossaryType.glossary) { + data.glossaries.set(entry.label, entry) + } else { + data.acronyms.set(entry.label, entry) + } + glossaryList.push(entry.label) + }) + }) + lw.cache.getIncludedTeX().forEach(cachedFile => { const cachedGlossaries = lw.cache.get(cachedFile)?.elements.glossary if (cachedGlossaries === undefined) { @@ -61,7 +110,7 @@ function updateAll() { }) }) - // Remove references that has been deleted + // Remove references that have been deleted data.glossaries.forEach((_, key) => { if (!glossaryList.includes(key)) { data.glossaries.delete(key) @@ -74,6 +123,64 @@ function updateAll() { }) } +/** + * Parse a glossary `.bib` file. The results are stored in this instance. + * + * @param fileName The path of `.bib` file. + */ +async function parseBibFile(fileName: string) { + logger.log(`Parsing glossary .bib entries from ${fileName}`) + const configuration = vscode.workspace.getConfiguration('latex-workshop', vscode.Uri.file(fileName)) + if ((await lw.external.stat(vscode.Uri.file(fileName))).size >= (configuration.get('bibtex.maxFileSize') as number) * 1024 * 1024) { + logger.log(`Bib file is too large, ignoring it: ${fileName}`) + data.bibEntries.delete(fileName) + return + } + const newEntry: GlossaryItem[] = [] + const bibtex = await lw.file.read(fileName) + logger.log(`Parse BibTeX AST from ${fileName} .`) + const ast = await lw.parser.parse.bib(vscode.Uri.file(fileName), bibtex ?? '') + if (ast === undefined) { + logger.log(`Parsed 0 bib entries from ${fileName}.`) + lw.event.fire(lw.event.FileParsed, fileName) + return + } + const abbreviations = bibTools.parseAbbrevations(ast) + ast.content + .filter(bibtexParser.isEntry) + .forEach((entry: bibtexParser.Entry) => { + if (entry.internalKey === undefined) { + return + } + let type: GlossaryType + if ( ['entry'].includes(entry.entryType) ) { + type = GlossaryType.glossary + } else { + type = GlossaryType.acronym + } + const name = bibTools.expandField(abbreviations, entry.content.find(field => field.name === 'name')?.value) + const description = bibTools.expandField(abbreviations, entry.content.find(field => field.name === 'description')?.value) + const item: GlossaryItem = { + type, + label: entry.internalKey, + filePath: fileName, + position: new vscode.Position(entry.location.start.line - 1, entry.location.start.column - 1), + kind: vscode.CompletionItemKind.Reference, + detail: name + ': ' + description + } + newEntry.push(item) + }) + data.bibEntries.set(fileName, newEntry) + logger.log(`Parsed ${newEntry.length} glossary bib entries from ${fileName} .`) + void lw.outline.reconstruct() + lw.event.fire(lw.event.FileParsed, fileName) +} + +function removeEntriesInFile(file: string) { + logger.log(`Remove parsed bib entries for ${file}`) + data.bibEntries.delete(file) +} + function parse(cache: FileCache) { if (cache.ast !== undefined) { cache.elements.glossary = parseAst(cache.ast, cache.filePath) @@ -84,12 +191,13 @@ function parse(cache: FileCache) { function parseAst(node: Ast.Node, filePath: string): GlossaryItem[] { let glos: GlossaryItem[] = [] - let entry: GlossaryEntry = { label: '', description: '' } + let label: string = '' + let description: string = '' let type: GlossaryType | undefined if (node.type === 'macro' && ['newglossaryentry', 'provideglossaryentry'].includes(node.content)) { type = GlossaryType.glossary - let description = argContentToStr(node.args?.[1]?.content || [], true) + description = argContentToStr(node.args?.[1]?.content || [], true) const index = description.indexOf('description=') if (index >= 0) { description = description.slice(index + 12) @@ -101,28 +209,23 @@ function parseAst(node: Ast.Node, filePath: string): GlossaryItem[] { } else { description = '' } - entry = { - label: argContentToStr(node.args?.[0]?.content || []), - description - } + label = argContentToStr(node.args?.[0]?.content || []) } else if (node.type === 'macro' && ['longnewglossaryentry', 'longprovideglossaryentry', 'newacronym', 'newabbreviation', 'newabbr'].includes(node.content)) { if (['longnewglossaryentry', 'longprovideglossaryentry'].includes(node.content)) { type = GlossaryType.glossary } else { type = GlossaryType.acronym } - entry = { - label: argContentToStr(node.args?.[1]?.content || []), - description: argContentToStr(node.args?.[3]?.content || []), - } + label = argContentToStr(node.args?.[1]?.content || []) + description = argContentToStr(node.args?.[3]?.content || []) } - if (type !== undefined && entry.label && entry.description && node.position !== undefined) { + if (type !== undefined && label && description && node.position !== undefined) { glos.push({ type, filePath, position: new vscode.Position(node.position.start.line - 1, node.position.start.column - 1), - label: entry.label, - detail: entry.description, + label, + detail: description, kind: vscode.CompletionItemKind.Reference }) } diff --git a/src/core/cache.ts b/src/core/cache.ts index 4088f92a5..5a1493dad 100644 --- a/src/core/cache.ts +++ b/src/core/cache.ts @@ -250,6 +250,7 @@ async function refreshCache(filePath: string, rootPath?: string): Promise { lw.completion.subsuperscript.parse(fileCache) lw.completion.input.parseGraphicsPath(fileCache) await updateBibfiles(fileCache) + await updateGlossaryBibFiles(fileCache) const elapsed = performance.now() - start logger.log(`Updated elements in ${elapsed.toFixed(2)} ms: ${fileCache.filePath} .`) } @@ -516,6 +518,41 @@ async function updateBibfiles(fileCache: FileCache) { } } +/** + * Updates the glossary files associated with a given file cache. + * + * This function parses the content of a file cache to find `\GlsXtrLoadResources`` + * using a regular expression. It extracts the file paths specified in these + * macros, resolves their full paths, and adds them to the set of glossary + * files in the file cache. If a glossary file is not excluded, it logs the + * action, adds the file to the cache, and ensures that it is being watched for + * changes. + * + * @param {FileCache} fileCache - The file cache object to update with + * bibliography files. + */ +async function updateGlossaryBibFiles(fileCache: FileCache) { + const glossaryReg = /\\GlsXtrLoadResources\s*\[.*?src=\{([^}]+)\}.*?\]/gs + + let result: RegExpExecArray | null + while ((result = glossaryReg.exec(fileCache.contentTrimmed)) !== null) { + const bibs = (result[1] ? result[1] : result[2]).split(',').map(bib => bib.trim()) + + for (const bib of bibs) { + const bibPath = await utils.resolveFile([path.dirname(fileCache.filePath)], bib, '.bib') + if (!bibPath || isExcluded(bibPath)) { + continue + } + fileCache.glossarybibfiles.add(bibPath) + logger.log(`Glossary bib ${bibPath} from ${fileCache.filePath} .`) + const bibUri = vscode.Uri.file(bibPath) + if (!lw.watcher.bib.has(bibUri)) { + lw.watcher.bib.add(bibUri) + } + } + } +} + /** * Loads and processes a .fls file related to a specified file path. * diff --git a/src/outline/structure/bibtex.ts b/src/outline/structure/bibtex.ts index 294bf177d..dc6b52990 100644 --- a/src/outline/structure/bibtex.ts +++ b/src/outline/structure/bibtex.ts @@ -11,7 +11,7 @@ const logger = lw.log('Structure', 'BibTeX') * Convert a bibtexParser.FieldValue to a string * @param field the bibtexParser.FieldValue to parse */ -function fieldValueToString(field: bibtexParser.FieldValue, abbreviations: {[abbr: string]: string}): string { +export function fieldValueToString(field: bibtexParser.FieldValue, abbreviations: {[abbr: string]: string}): string { if (field.kind === 'concat') { return field.content.map(value => fieldValueToString(value, abbreviations)).reduce((acc, cur) => {return acc + ' # ' + cur}) } else if (field.kind === 'abbreviation') { diff --git a/src/types.ts b/src/types.ts index d26d33f1b..c2ea14273 100644 --- a/src/types.ts +++ b/src/types.ts @@ -37,6 +37,8 @@ export type FileCache = { }[], /** The array of the paths of `.bib` files referenced from the LaTeX file */ bibfiles: Set, + /** The array of the paths of `.bib` files listed by `\GlsXtrLoadResources` to provide glossary entries */ + glossarybibfiles: Set, /** A dictionary of external documents provided by `\externaldocument` of * `xr` package. The value is its prefix `\externaldocument[prefix]{*}` */ external: {[filePath: string]: string},