Skip to content

Commit

Permalink
Parse glossary bib files to populate intellisense
Browse files Browse the repository at this point in the history
  • Loading branch information
jlelong committed Dec 25, 2024
1 parent 31614f3 commit 5c4a45b
Show file tree
Hide file tree
Showing 5 changed files with 169 additions and 24 deletions.
5 changes: 4 additions & 1 deletion src/completion/completer/citation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,10 @@ export const bibTools = {
parseAbbrevations
}

function expandField(abbreviations: {[key: string]: string}, value: bibtexParser.FieldValue): string {
function expandField(abbreviations: {[key: string]: string}, value: bibtexParser.FieldValue | undefined): string {
if (value === undefined) {
return ''
}
if (value.kind === 'concat') {
const args = value.content as bibtexParser.FieldValue[]
return args.map(arg => expandField(abbreviations, arg)).join(' ')
Expand Down
147 changes: 125 additions & 22 deletions src/completion/completer/glossary.ts
Original file line number Diff line number Diff line change
@@ -1,29 +1,34 @@
import * as vscode from 'vscode'
import type * as Ast from '@unified-latex/unified-latex-types'
import { bibtexParser } from 'latex-utensils'
import { lw } from '../../lw'
import { GlossaryType } from '../../types'
import type { CompletionProvider, FileCache, GlossaryItem } from '../../types'
import { argContentToStr } from '../../utils/parser'
import { getLongestBalancedString } from '../../utils/utils'
import { bibTools } from './citation'

const logger = lw.log('Intelli', 'Glossary')
export const provider: CompletionProvider = { from }
export const glossary = {
parse,
getItem
}

const data = {
// The keys are the labels of the glossary items.
glossaries: new Map<string, GlossaryItem>(),
acronyms: new Map<string, GlossaryItem>()
acronyms: new Map<string, GlossaryItem>(),
// The keys are the paths of the `.bib` files.
bibEntries: new Map<string, GlossaryItem[]>(),
}

interface GlossaryEntry {
label: string | undefined,
description: string | undefined
}
lw.watcher.bib.onCreate(uri => parseBibFile(uri.fsPath))
lw.watcher.bib.onChange(uri => parseBibFile(uri.fsPath))
lw.watcher.bib.onDelete(uri => removeEntriesInFile(uri.fsPath))

function from(result: RegExpMatchArray): vscode.CompletionItem[] {
updateAll()
updateAll(getIncludedBibs(lw.root.file.path))
let suggestions: Map<string, GlossaryItem>

if (result[1] && result[1].match(/^ac/i)) {
Expand All @@ -38,14 +43,58 @@ function from(result: RegExpMatchArray): vscode.CompletionItem[] {
}

function getItem(token: string): GlossaryItem | undefined {
updateAll()
updateAll(getIncludedBibs(lw.root.file.path))
return data.glossaries.get(token) || data.acronyms.get(token)
}

function updateAll() {
/**
* Returns the array of the paths of glossary `.bib` files referenced from `file`.
*
* @param file The path of a LaTeX file.
* @param visitedTeX Internal use only.
*/
function getIncludedBibs(file?: string, visitedTeX: string[] = []): string[] {
if (file === undefined) {
return []
}
const cache = lw.cache.get(file)
if (cache === undefined) {
return []
}
let bibs = Array.from(cache.glossarybibfiles)
visitedTeX.push(file)
for (const child of cache.children) {
if (visitedTeX.includes(child.filePath)) {
// Already included
continue
}
bibs = Array.from(new Set(bibs.concat(getIncludedBibs(child.filePath, visitedTeX))))
}
return bibs
}

/**
* Returns aggregated glossary entries from `.bib` files and glossary items defined on LaTeX files included in the root file.
*
* @param bibFiles The array of the paths of `.bib` files. If `undefined`, the keys of `bibEntries` are used.
*/
function updateAll(bibFiles: string[]) {
// Extract cached references
const glossaryList: string[] = []

// From bib files
bibFiles.forEach(file => {
const entries = data.bibEntries.get(file)
entries?.forEach(entry => {
if (entry.type === GlossaryType.glossary) {
data.glossaries.set(entry.label, entry)
} else {
data.acronyms.set(entry.label, entry)
}
glossaryList.push(entry.label)
})
})

lw.cache.getIncludedTeX().forEach(cachedFile => {
const cachedGlossaries = lw.cache.get(cachedFile)?.elements.glossary
if (cachedGlossaries === undefined) {
Expand All @@ -61,7 +110,7 @@ function updateAll() {
})
})

// Remove references that has been deleted
// Remove references that have been deleted
data.glossaries.forEach((_, key) => {
if (!glossaryList.includes(key)) {
data.glossaries.delete(key)
Expand All @@ -74,6 +123,64 @@ function updateAll() {
})
}

/**
* Parse a glossary `.bib` file. The results are stored in this instance.
*
* @param fileName The path of `.bib` file.
*/
async function parseBibFile(fileName: string) {
logger.log(`Parsing glossary .bib entries from ${fileName}`)
const configuration = vscode.workspace.getConfiguration('latex-workshop', vscode.Uri.file(fileName))
if ((await lw.external.stat(vscode.Uri.file(fileName))).size >= (configuration.get('bibtex.maxFileSize') as number) * 1024 * 1024) {
logger.log(`Bib file is too large, ignoring it: ${fileName}`)
data.bibEntries.delete(fileName)
return
}
const newEntry: GlossaryItem[] = []
const bibtex = await lw.file.read(fileName)
logger.log(`Parse BibTeX AST from ${fileName} .`)
const ast = await lw.parser.parse.bib(vscode.Uri.file(fileName), bibtex ?? '')
if (ast === undefined) {
logger.log(`Parsed 0 bib entries from ${fileName}.`)
lw.event.fire(lw.event.FileParsed, fileName)
return
}
const abbreviations = bibTools.parseAbbrevations(ast)
ast.content
.filter(bibtexParser.isEntry)
.forEach((entry: bibtexParser.Entry) => {
if (entry.internalKey === undefined) {
return
}
let type: GlossaryType
if ( ['entry'].includes(entry.entryType) ) {
type = GlossaryType.glossary
} else {
type = GlossaryType.acronym
}
const name = bibTools.expandField(abbreviations, entry.content.find(field => field.name === 'name')?.value)
const description = bibTools.expandField(abbreviations, entry.content.find(field => field.name === 'description')?.value)
const item: GlossaryItem = {
type,
label: entry.internalKey,
filePath: fileName,
position: new vscode.Position(entry.location.start.line - 1, entry.location.start.column - 1),
kind: vscode.CompletionItemKind.Reference,
detail: name + ': ' + description
}
newEntry.push(item)
})
data.bibEntries.set(fileName, newEntry)
logger.log(`Parsed ${newEntry.length} glossary bib entries from ${fileName} .`)
void lw.outline.reconstruct()
lw.event.fire(lw.event.FileParsed, fileName)
}

function removeEntriesInFile(file: string) {
logger.log(`Remove parsed bib entries for ${file}`)
data.bibEntries.delete(file)
}

function parse(cache: FileCache) {
if (cache.ast !== undefined) {
cache.elements.glossary = parseAst(cache.ast, cache.filePath)
Expand All @@ -84,12 +191,13 @@ function parse(cache: FileCache) {

function parseAst(node: Ast.Node, filePath: string): GlossaryItem[] {
let glos: GlossaryItem[] = []
let entry: GlossaryEntry = { label: '', description: '' }
let label: string = ''
let description: string = ''
let type: GlossaryType | undefined

if (node.type === 'macro' && ['newglossaryentry', 'provideglossaryentry'].includes(node.content)) {
type = GlossaryType.glossary
let description = argContentToStr(node.args?.[1]?.content || [], true)
description = argContentToStr(node.args?.[1]?.content || [], true)
const index = description.indexOf('description=')
if (index >= 0) {
description = description.slice(index + 12)
Expand All @@ -101,28 +209,23 @@ function parseAst(node: Ast.Node, filePath: string): GlossaryItem[] {
} else {
description = ''
}
entry = {
label: argContentToStr(node.args?.[0]?.content || []),
description
}
label = argContentToStr(node.args?.[0]?.content || [])
} else if (node.type === 'macro' && ['longnewglossaryentry', 'longprovideglossaryentry', 'newacronym', 'newabbreviation', 'newabbr'].includes(node.content)) {
if (['longnewglossaryentry', 'longprovideglossaryentry'].includes(node.content)) {
type = GlossaryType.glossary
} else {
type = GlossaryType.acronym
}
entry = {
label: argContentToStr(node.args?.[1]?.content || []),
description: argContentToStr(node.args?.[3]?.content || []),
}
label = argContentToStr(node.args?.[1]?.content || [])
description = argContentToStr(node.args?.[3]?.content || [])
}
if (type !== undefined && entry.label && entry.description && node.position !== undefined) {
if (type !== undefined && label && description && node.position !== undefined) {
glos.push({
type,
filePath,
position: new vscode.Position(node.position.start.line - 1, node.position.start.column - 1),
label: entry.label,
detail: entry.description,
label,
detail: description,
kind: vscode.CompletionItemKind.Reference
})
}
Expand Down
37 changes: 37 additions & 0 deletions src/core/cache.ts
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,7 @@ async function refreshCache(filePath: string, rootPath?: string): Promise<Promis
elements: {},
children: [],
bibfiles: new Set(),
glossarybibfiles: new Set(),
external: {}}
caches.set(filePath, fileCache)
rootPath = rootPath || lw.root.file.path
Expand Down Expand Up @@ -474,6 +475,7 @@ async function updateElements(fileCache: FileCache): Promise<void> {
lw.completion.subsuperscript.parse(fileCache)
lw.completion.input.parseGraphicsPath(fileCache)
await updateBibfiles(fileCache)
await updateGlossaryBibFiles(fileCache)
const elapsed = performance.now() - start
logger.log(`Updated elements in ${elapsed.toFixed(2)} ms: ${fileCache.filePath} .`)
}
Expand Down Expand Up @@ -516,6 +518,41 @@ async function updateBibfiles(fileCache: FileCache) {
}
}

/**
* Updates the glossary files associated with a given file cache.
*
* This function parses the content of a file cache to find `\GlsXtrLoadResources``
* using a regular expression. It extracts the file paths specified in these
* macros, resolves their full paths, and adds them to the set of glossary
* files in the file cache. If a glossary file is not excluded, it logs the
* action, adds the file to the cache, and ensures that it is being watched for
* changes.
*
* @param {FileCache} fileCache - The file cache object to update with
* bibliography files.
*/
async function updateGlossaryBibFiles(fileCache: FileCache) {
const glossaryReg = /\\GlsXtrLoadResources\s*\[.*?src=\{([^}]+)\}.*?\]/gs

let result: RegExpExecArray | null
while ((result = glossaryReg.exec(fileCache.contentTrimmed)) !== null) {
const bibs = (result[1] ? result[1] : result[2]).split(',').map(bib => bib.trim())

for (const bib of bibs) {
const bibPath = await utils.resolveFile([path.dirname(fileCache.filePath)], bib, '.bib')
if (!bibPath || isExcluded(bibPath)) {
continue
}
fileCache.glossarybibfiles.add(bibPath)
logger.log(`Glossary bib ${bibPath} from ${fileCache.filePath} .`)
const bibUri = vscode.Uri.file(bibPath)
if (!lw.watcher.bib.has(bibUri)) {
lw.watcher.bib.add(bibUri)
}
}
}
}

/**
* Loads and processes a .fls file related to a specified file path.
*
Expand Down
2 changes: 1 addition & 1 deletion src/outline/structure/bibtex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ const logger = lw.log('Structure', 'BibTeX')
* Convert a bibtexParser.FieldValue to a string
* @param field the bibtexParser.FieldValue to parse
*/
function fieldValueToString(field: bibtexParser.FieldValue, abbreviations: {[abbr: string]: string}): string {
export function fieldValueToString(field: bibtexParser.FieldValue, abbreviations: {[abbr: string]: string}): string {
if (field.kind === 'concat') {
return field.content.map(value => fieldValueToString(value, abbreviations)).reduce((acc, cur) => {return acc + ' # ' + cur})
} else if (field.kind === 'abbreviation') {
Expand Down
2 changes: 2 additions & 0 deletions src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ export type FileCache = {
}[],
/** The array of the paths of `.bib` files referenced from the LaTeX file */
bibfiles: Set<string>,
/** The array of the paths of `.bib` files listed by `\GlsXtrLoadResources` to provide glossary entries */
glossarybibfiles: Set<string>,
/** A dictionary of external documents provided by `\externaldocument` of
* `xr` package. The value is its prefix `\externaldocument[prefix]{*}` */
external: {[filePath: string]: string},
Expand Down

0 comments on commit 5c4a45b

Please sign in to comment.