From 3651de60730604f6ea0c967e9b061a724aae1f95 Mon Sep 17 00:00:00 2001 From: Andre Wanlin Date: Sat, 27 Apr 2024 15:19:09 -0500 Subject: [PATCH 1/4] Added new dedicated module for the Linguist Tags Processor Signed-off-by: Andre Wanlin Ran prettier and dedupe Signed-off-by: Andre Wanlin Added missing packages Signed-off-by: Andre Wanlin Added tag processor to example app Signed-off-by: Andre Wanlin Fixed service to service auth error Signed-off-by: Andre Wanlin Ran dedupe Signed-off-by: Andre Wanlin Fixed type errors Signed-off-by: Andre Wanlin Ran prettier Signed-off-by: Andre Wanlin Updated API report Signed-off-by: Andre Wanlin --- .../linguist/.changeset/polite-pots-smile.md | 6 + .../linguist/packages/backend/package.json | 1 + .../linguist/packages/backend/src/index.ts | 5 + .../.eslintrc.js | 1 + .../README.md | 187 +++++ .../api-report.md | 58 ++ .../config.d.ts | 54 ++ .../package.json | 52 ++ .../src/index.ts | 8 + .../src/module.ts | 28 + .../processor/LinguistTagsProcessor.test.ts | 370 +++++++++ .../src/processor/LinguistTagsProcessor.ts | 294 ++++++++ .../LinguistTagsProcessor.test.ts.snap | 707 ++++++++++++++++++ .../src/processor/index.ts | 20 + .../plugins/linguist-backend/README.md | 116 --- .../plugins/linguist-backend/api-report.md | 2 +- .../src/processor/LinguistTagsProcessor.ts | 5 +- workspaces/linguist/yarn.lock | 210 +++++- 18 files changed, 1996 insertions(+), 128 deletions(-) create mode 100644 workspaces/linguist/.changeset/polite-pots-smile.md create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/.eslintrc.js create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/README.md create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/api-report.md create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/package.json create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/index.ts create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/module.ts create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.test.ts create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.ts create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/__snapshots__/LinguistTagsProcessor.test.ts.snap create mode 100644 workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/index.ts diff --git a/workspaces/linguist/.changeset/polite-pots-smile.md b/workspaces/linguist/.changeset/polite-pots-smile.md new file mode 100644 index 0000000000..eb8bcbb14e --- /dev/null +++ b/workspaces/linguist/.changeset/polite-pots-smile.md @@ -0,0 +1,6 @@ +--- +'@backstage-community/plugin-linguist-backend': patch +'@backstage-community/plugin-catalog-backend-module-linguist-tags-processor': patch +--- + +Added new dedicated module for the Linguist Tags Processor and deprecated the version in the Linguist Backend diff --git a/workspaces/linguist/packages/backend/package.json b/workspaces/linguist/packages/backend/package.json index 4f3ce749f7..742897eb79 100644 --- a/workspaces/linguist/packages/backend/package.json +++ b/workspaces/linguist/packages/backend/package.json @@ -21,6 +21,7 @@ "build-image": "docker build ../.. -f Dockerfile --tag backstage" }, "dependencies": { + "@backstage-community/plugin-catalog-backend-module-linguist-tags-processor": "workspace:^", "@backstage-community/plugin-linguist-backend": "workspace:^", "@backstage/backend-common": "^0.23.2", "@backstage/backend-defaults": "^0.3.3", diff --git a/workspaces/linguist/packages/backend/src/index.ts b/workspaces/linguist/packages/backend/src/index.ts index a4ed6ca013..e2f3d027ad 100644 --- a/workspaces/linguist/packages/backend/src/index.ts +++ b/workspaces/linguist/packages/backend/src/index.ts @@ -26,6 +26,11 @@ backend.add(import('@backstage/plugin-catalog-backend/alpha')); backend.add( import('@backstage/plugin-catalog-backend-module-scaffolder-entity-model'), ); +backend.add( + import( + '@backstage-community/plugin-catalog-backend-module-linguist-tags-processor' + ), +); // permission plugin backend.add(import('@backstage/plugin-permission-backend/alpha')); diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/.eslintrc.js b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/.eslintrc.js new file mode 100644 index 0000000000..e2a53a6ad2 --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/.eslintrc.js @@ -0,0 +1 @@ +module.exports = require('@backstage/cli/config/eslint-factory')(__dirname); diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/README.md b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/README.md new file mode 100644 index 0000000000..e97a1182d2 --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/README.md @@ -0,0 +1,187 @@ +# Linguist Tags Processor backend module for the Catalog plugin + +## Overview + +The Linguist Tags Processor can be added into your catalog as a way to incorporate the language breakdown from Linguist as `metadata.tags` on your entities. Doing so enables the ability to easily filter for entities in your catalog index based on the language of the source repository. + +## Setup + +To setup the Linguist Tags Processor you'll need to first run this command to add the package: + +```sh +# From your Backstage root directory +yarn --cwd packages/backend add @backstage-community/plugin-catalog-backend-module-linguist-tags-processor +``` + +Then in your `/packages/backend/src/index.ts` file you simply add the following line: + +```diff + import { createBackend } from '@backstage/backend-defaults'; + + const backend = createBackend(); + + // ... other feature additions + ++ backend.add(import('@backstage-community/plugin-catalog-backend-module-linguist-tags-processor')); + + backend.start(); +``` + +### Legacy Setup + +To setup the Linguist Tags Processor when using the legacy backend you'll first need to run this command to add the package: + +```sh +# From your Backstage root directory +yarn --cwd packages/backend add @backstage-community/plugin-catalog-backend-module-linguist-tags-processor +``` + +Then you will need to make the following changes in your `/packages/backend/src/plugins/catalog.ts` file: + +```ts +import { LinguistTagsProcessor } from '@backstage-community/plugin-linguist-backend'; +// ... +export default async function createPlugin( + // ... + builder.addProcessor( + LinguistTagsProcessor.fromConfig(env.config, { + logger: env.logger, + discovery: env.discovery, + }) + ); +``` + +```diff + import { CatalogBuilder } from '@backstage/plugin-catalog-backend'; + import { ScaffolderEntitiesProcessor } from '@backstage/plugin-catalog-backend-module-scaffolder-entity-model'; + import { Router } from 'express'; + import { PluginEnvironment } from '../types'; ++ import { LinguistTagsProcessor } from '@backstage-community/plugin-catalog-backend-module-linguist-tags-processor'; + + export default async function createPlugin( + env: PluginEnvironment, + ): Promise { + const builder = await CatalogBuilder.create(env); + builder.addProcessor(new ScaffolderEntitiesProcessor()); ++ builder.addProcessor(LinguistTagsProcessor.fromConfig(env.config, { logger: env.logger, discovery: env.discovery }); + const { processingEngine, router } = await builder.build(); + await processingEngine.start(); + return router; + } +``` + +### Processor Options + +The processor can be configured in `app-config.yaml`, here is an example Linguist Tag Processor configuration: + +```yaml +linguist: + tagsProcessor: + bytesThreshold: 1000 + languageTypes: ['programming', 'markup'] + languageMap: + Dockerfile: '' + TSX: 'react' + tagPrefix: 'lang:' + cacheTTL: + hours: 24 +``` + +#### `languageMap` + +The `languageMap` option allows you to build a custom map of linguist languages to how you want them to show up as tags. The keys should be exact matches to languages in the [linguist dataset](https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml) and the values should be how they render as backstage tags. These values will be used "as is" and will not be further transformed. + +Keep in mind that backstage has [character requirements for tags](https://backstage.io/docs/features/software-catalog/descriptor-format#tags-optional). If your map emits an invalid tag, it will cause an error during processing and your entity will not be processed. + +If you map a key to `''`, it will not be emitted as a tag. This can be useful if you want to ignore some of the linguist languages. + +```yaml +linguist: + tagsProcessor: + languageMap: + # You don't want dockerfile to show up as a tag + Dockerfile: '' + # Be more specific about what the file is + HCL: terraform + # A more casual tag for a formal name + Protocol Buffer: protobuf +``` + +#### `tagPrefix` + +The `tagPrefix` option allows you to provide a prefix to all tags created by linguist. Keep in mind that backstage has [character requirements for tags](https://backstage.io/docs/features/software-catalog/descriptor-format#tags-optional). If your prefix emits an invalid tag, it will cause an error during processing and your entity will not be processed. + +As an example, use the following config to get tags like `lang:java` instead of just `java`. + +```yaml +linguist: + tagsProcessor: + tagPrefix: 'lang:' +``` + +#### `cacheTTL` + +The `cacheTTL` option allows you to determine for how long this processor will cache languages for an `entityRef` before refreshing from the linguist backend. As this processor will run continuously, this cache is supplied to limit the load done on the linguist DB and API. + +By default, this processor will cache languages for 30 minutes before refreshing from the linguist database. + +You can optionally disable the cache entirely by passing in a `cacheTTL` duration of 0 minutes. + +```yaml +linguist: + tagsProcessor: + cacheTTL: { minutes: 0 } +``` + +#### `bytesThreshold` + +The `bytesThreshold` option allows you to control a number of bytes threshold which must be surpassed before a language tag will be emitted by this processor. As an example, some repositories may have short build scripts written in Bash, but you may only want the main language of the project emitted (an alternate way to control this is to use the `languageMap` to map `Shell` languages to `undefined`). + +```yaml +linguist: + tagsProcessor: + # Ignore languages with less than 5000 bytes in a repo. + bytesThreshold: 5000 +``` + +#### `languageTypes` + +The `languageTypes` option allows you to control what categories of linguist languages are automatically added as tags. By default, this will only include language tags of type `programming`, but you can pass in a custom array here to allow adding other language types. + +You can see the full breakdown of linguist supported languages [in their repo](https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml). + +For example, you may want to also include languages of type `data` + +```yaml +linguist: + tagsProcessor: + languageTypes: + - programming + - data +``` + +#### `shouldProcessEntity` + +The `shouldProcessEntity` is a function you can pass into the processor which determines which entities should have language tags fetched from linguist and added to the entity. By default, this will only run on entities of `kind: Component`, however this function let's you fully customize which entities should be processed. + +> Note: this is not currently supported with the new backend system + +As an example, you may choose to extend this to support both `Component` and `Resource` kinds along with allowing an opt-in annotation on the entity which entity authors can use. + +As this option is a function, it cannot be configured in `app-config.yaml`. You must pass this as an option within typescript. + +```ts +LinguistLanguageTagsProcessor.fromConfig(env.config, { + logger: env.logger, + discovery: env.discovery, + shouldProcessEntity: (entity: Entity) => { + if ( + ['Component', 'Resource'].includes(entity.kind) && + entity.metadata.annotations?.['some-custom-annotation'] + ) { + return true; + } + return false; + }, +}); +``` diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/api-report.md b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/api-report.md new file mode 100644 index 0000000000..2de880e6ed --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/api-report.md @@ -0,0 +1,58 @@ +## API Report File for "@backstage-community/plugin-catalog-backend-module-linguist-tags-processor" + +> Do not edit this file. It is a report generated by [API Extractor](https://api-extractor.com/). + +```ts +import { AuthService } from '@backstage/backend-plugin-api'; +import { BackendFeature } from '@backstage/backend-plugin-api'; +import { CatalogProcessor } from '@backstage/plugin-catalog-node'; +import { CatalogProcessorCache } from '@backstage/plugin-catalog-node'; +import { Config } from '@backstage/config'; +import { DiscoveryService } from '@backstage/backend-plugin-api'; +import { Entity } from '@backstage/catalog-model'; +import { HumanDuration } from '@backstage/types'; +import { LanguageType } from '@backstage-community/plugin-linguist-common'; +import { LoggerService } from '@backstage/backend-plugin-api'; + +// @public (undocumented) +const catalogModuleLinguistTagsProcessor: () => BackendFeature; +export default catalogModuleLinguistTagsProcessor; + +// @public +export class LinguistTagsProcessor implements CatalogProcessor { + constructor(options: LinguistTagsProcessorOptions); + // (undocumented) + static fromConfig( + config: Config, + options: LinguistTagsProcessorOptions, + ): LinguistTagsProcessor; + // (undocumented) + getProcessorName(): string; + preProcessEntity( + entity: Entity, + _: any, + __: any, + ___: any, + cache: CatalogProcessorCache, + ): Promise; +} + +// @public +export interface LinguistTagsProcessorOptions { + // (undocumented) + auth: AuthService; + bytesThreshold?: number; + cacheTTL?: HumanDuration; + // (undocumented) + discovery: DiscoveryService; + languageMap?: Record; + languageTypes?: LanguageType[]; + // (undocumented) + logger: LoggerService; + shouldProcessEntity?: ShouldProcessEntity; + tagPrefix?: string; +} + +// @public +export type ShouldProcessEntity = (entity: Entity) => boolean; +``` diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts new file mode 100644 index 0000000000..e6ba743d7b --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts @@ -0,0 +1,54 @@ +/* + * Copyright 2023 The Backstage Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { TaskScheduleDefinition } from '@backstage/backend-tasks'; +import { HumanDuration } from '@backstage/types'; +import { Options as LinguistJsOptions } from 'linguist-js/dist/types'; + +export interface Config { + /** Configuration options for the linguist plugin */ + linguist?: { + /** Options for the tags processor */ + tagsProcessor?: { + /** + * Determines how many bytes of a language should be in a repo + * for it to be added as an entity tag. Defaults to 0. + */ + bytesThreshold?: number; + /** + * The types of linguist languages that should be processed. Can be + * any of "programming", "data", "markup", "prose". Defaults to ["programming"]. + */ + languageTypes?: string[]; + /** + * A custom mapping of linguist languages to how they should be rendered as entity tags. + * If a language is mapped to '' it will not be included as a tag. + */ + languageMap?: { + [language: string]: string | undefined; + }; + /** + * How long to cache entity languages for in memory. Used to avoid constant db hits during + * processing. Defaults to 30 minutes. + */ + cacheTTL?: HumanDuration; + /** + * An optional prefix to apply to all created tags from linguist + */ + tagPrefix?: string; + }; + }; +} diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/package.json b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/package.json new file mode 100644 index 0000000000..5aa4903d80 --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/package.json @@ -0,0 +1,52 @@ +{ + "name": "@backstage-community/plugin-catalog-backend-module-linguist-tags-processor", + "description": "The linguist-tags-processor backend module for the catalog plugin.", + "version": "0.1.0", + "main": "src/index.ts", + "types": "src/index.ts", + "license": "Apache-2.0", + "private": true, + "publishConfig": { + "access": "public", + "main": "dist/index.cjs.js", + "types": "dist/index.d.ts" + }, + "repository": { + "type": "git", + "url": "https://github.com/backstage/community-plugins", + "directory": "workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor" + }, + "backstage": { + "role": "backend-plugin-module" + }, + "scripts": { + "start": "backstage-cli package start", + "build": "backstage-cli package build", + "lint": "backstage-cli package lint", + "test": "backstage-cli package test", + "clean": "backstage-cli package clean", + "prepack": "backstage-cli package prepack", + "postpack": "backstage-cli package postpack" + }, + "dependencies": { + "@backstage-community/plugin-linguist-common": "workspace:^", + "@backstage/backend-common": "^0.21.7", + "@backstage/backend-plugin-api": "^0.6.17", + "@backstage/catalog-model": "^1.4.5", + "@backstage/config": "^1.2.0", + "@backstage/plugin-catalog-node": "^1.11.1", + "@backstage/types": "^1.1.1", + "node-fetch": "^2.6.7" + }, + "devDependencies": { + "@backstage/backend-tasks": "^0.5.22", + "@backstage/backend-test-utils": "^0.3.7", + "@backstage/cli": "^0.26.3", + "js-yaml": "^4.1.0", + "linguist-js": "^2.5.3" + }, + "files": [ + "dist" + ], + "configSchema": "config.d.ts" +} diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/index.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/index.ts new file mode 100644 index 0000000000..4b31d5be5b --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/index.ts @@ -0,0 +1,8 @@ +/** + * The Linguist Tags Processor backend module for the Catalog plugin. + * + * @packageDocumentation + */ + +export { catalogModuleLinguistTagsProcessor as default } from './module'; +export * from './processor'; diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/module.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/module.ts new file mode 100644 index 0000000000..7640b32416 --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/module.ts @@ -0,0 +1,28 @@ +import { + coreServices, + createBackendModule, +} from '@backstage/backend-plugin-api'; +import { catalogProcessingExtensionPoint } from '@backstage/plugin-catalog-node/alpha'; +import { LinguistTagsProcessor } from './processor'; + +/** @public */ +export const catalogModuleLinguistTagsProcessor = createBackendModule({ + pluginId: 'catalog', + moduleId: 'linguist-tags-processor', + register(reg) { + reg.registerInit({ + deps: { + catalog: catalogProcessingExtensionPoint, + config: coreServices.rootConfig, + logger: coreServices.logger, + discovery: coreServices.discovery, + auth: coreServices.auth, + }, + async init({ catalog, config, logger, discovery, auth }) { + catalog.addProcessor( + LinguistTagsProcessor.fromConfig(config, { logger, discovery, auth }), + ); + }, + }); + }, +}); diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.test.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.test.ts new file mode 100644 index 0000000000..52d061373f --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.test.ts @@ -0,0 +1,370 @@ +/* + * Copyright 2023 The Backstage Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { + LinguistTagsProcessor, + LinguistTagsProcessorOptions, + sanitizeTag, +} from './LinguistTagsProcessor'; +import { ConfigReader } from '@backstage/config'; +import { getVoidLogger } from '@backstage/backend-common'; +import { CatalogProcessorCache } from '@backstage/plugin-catalog-node'; +import { Entity, makeValidator } from '@backstage/catalog-model'; +import { DiscoveryService } from '@backstage/backend-plugin-api'; +import fetch, { Response } from 'node-fetch'; +import * as path from 'path'; +import yaml from 'js-yaml'; +import * as fs from 'fs'; +import { mockServices } from '@backstage/backend-test-utils'; + +const { isValidTag } = makeValidator(); + +jest.mock('node-fetch', () => jest.fn()); +const mockedFetch: jest.MockedFunction = + fetch as jest.MockedFunction; + +const discovery: DiscoveryService = { + getBaseUrl: jest.fn().mockResolvedValue('http://example.com/api/linguist'), + getExternalBaseUrl: jest.fn(), +}; + +const auth = mockServices.auth.mock({ + getPluginRequestToken: async () => ({ token: 'abc123' }), +}); + +let state: Record = {}; +const mockCacheGet = jest + .fn() + .mockImplementation(async (key: string) => state[key]); +const mockCacheSet = jest.fn().mockImplementation((key: string, value: any) => { + state[key] = value; +}); +const cache: CatalogProcessorCache = { + get: mockCacheGet, + set: mockCacheSet, +}; + +describe('sanitizeTag', () => { + const linguistDataSet = yaml.load( + fs.readFileSync( + path.resolve(require.resolve('linguist-js'), '../../ext/languages.yml'), + 'utf-8', + ), + ) as Object; + const languages = Object.keys(linguistDataSet); + test('Should clean up all linguist languages', () => { + const invalid = languages + .map(sanitizeTag) + .filter(lang => !isValidTag(lang)); + expect(invalid).toStrictEqual([]); + // Keep a snapshot here so that as new languages are added to linguist, + // we can spot check them to make sure the transformer for them makes sense. + expect(languages.map(sanitizeTag)).toMatchSnapshot(); + }); +}); + +describe('LinguistTagsProcessor', () => { + afterEach(() => { + mockedFetch.mockReset(); + mockCacheGet.mockClear(); + mockCacheSet.mockClear(); + state = {}; + }); + + test('Should construct fromConfig', () => { + const config = new ConfigReader({ + linguist: {}, + }); + expect(() => { + return LinguistTagsProcessor.fromConfig(config, { + logger: getVoidLogger(), + discovery, + auth, + }); + }).not.toThrow(); + }); + + test('Should assign valid language tags', async () => { + const processor = buildProcessor({}); + + mockFetchImplementation(); + const entity = baseEntity(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(entity.metadata.tags).toStrictEqual([ + 'c++', + 'asp-dot-net', + 'java', + 'common-lisp', + ]); + + entity.metadata.tags?.forEach(tag => { + expect(isValidTag(tag)).toBeTruthy(); + }); + }); + + test('Should use tag prefix if provided', async () => { + const processor = buildProcessor({ tagPrefix: 'lang:' }); + + mockFetchImplementation(); + const entity = baseEntity(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(entity.metadata.tags).toStrictEqual([ + 'lang:c++', + 'lang:asp-dot-net', + 'lang:java', + 'lang:common-lisp', + ]); + + entity.metadata.tags?.forEach(tag => { + expect(isValidTag(tag)).toBeTruthy(); + }); + }); + + test('Should not duplicate existing tags', async () => { + const processor = buildProcessor({}); + + mockFetchImplementation(); + const entity = baseEntity(); + entity.metadata.tags = ['existing', 'tags', 'java']; + + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(entity.metadata.tags).toStrictEqual([ + 'existing', + 'tags', + 'java', + 'c++', + 'asp-dot-net', + 'common-lisp', + ]); + }); + + test('Should not process Resource entities by default', async () => { + const processor = buildProcessor({}); + + mockFetchImplementation(); + const entity = baseEntity(); + entity.kind = 'Resource'; + + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(0); + expect(entity.metadata.tags).toStrictEqual(undefined); + }); + + test('Can process Resource entities by overriding shouldProcessEntity', async () => { + const processor = buildProcessor({ + shouldProcessEntity: (entity: Entity) => { + return entity.kind === 'Resource'; + }, + }); + + mockFetchImplementation(); + const entity = baseEntity(); + entity.kind = 'Resource'; + + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(entity.metadata.tags).toStrictEqual([ + 'c++', + 'asp-dot-net', + 'java', + 'common-lisp', + ]); + }); + + test('Can omit languages using languageMap', async () => { + const processor = buildProcessor({ + languageMap: { + Java: '', + 'ASP.net': '', + }, + }); + + mockFetchImplementation(); + const entity = baseEntity(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(entity.metadata.tags).toStrictEqual(['c++', 'common-lisp']); + }); + + test('Can rewrite langs using languageMap', async () => { + const processor = buildProcessor({ + languageMap: { + Java: 'notjava', + }, + }); + + mockFetchImplementation(); + const entity = baseEntity(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(entity.metadata.tags).toStrictEqual([ + 'c++', + 'asp-dot-net', + 'notjava', + 'common-lisp', + ]); + }); + + test('Can omit languages less than bytesThreshold', async () => { + const processor = buildProcessor({ + bytesThreshold: 5000, + }); + + mockFetchImplementation(); + const entity = baseEntity(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(entity.metadata.tags).toStrictEqual(['java', 'common-lisp']); + }); + + test('Can include languages that arent programming', async () => { + const processor = buildProcessor({ + languageTypes: ['data'], + }); + + mockFetchImplementation(); + const entity = baseEntity(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(entity.metadata.tags).toStrictEqual(['yaml', 'json']); + }); + + test('Refetches from API when cache disabled', async () => { + const processor = buildProcessor({ + cacheTTL: { minutes: 0 }, + }); + + mockFetchImplementation(); + const entity = baseEntity(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(mockCacheGet).toHaveBeenCalledTimes(0); + expect(mockCacheSet).toHaveBeenCalledTimes(0); + mockedFetch.mockClear(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(mockCacheGet).toHaveBeenCalledTimes(0); + expect(mockCacheSet).toHaveBeenCalledTimes(0); + }); + + test('Caches across runs with cache enabled', async () => { + const processor = buildProcessor({ + cacheTTL: { minutes: 5 }, + }); + + mockFetchImplementation(); + const entity = baseEntity(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(1); + expect(mockCacheGet).toHaveBeenCalledTimes(1); + expect(mockCacheSet).toHaveBeenCalledTimes(1); + + mockedFetch.mockClear(); + mockCacheGet.mockClear(); + mockCacheSet.mockClear(); + await processor.preProcessEntity(entity, null, null, null, cache); + expect(mockedFetch).toHaveBeenCalledTimes(0); + expect(mockCacheGet).toHaveBeenCalledTimes(1); + expect(mockCacheSet).toHaveBeenCalledTimes(0); + }); +}); + +function mockFetchImplementation(): void { + mockedFetch.mockResolvedValue({ + json: jest.fn().mockResolvedValue({ + languageCount: 6, + totalBytes: 43823, + processedDate: '2023-06-20T21:37:48.337Z', + breakdown: [ + { + name: 'YAML', + percentage: 2.23, + bytes: 979, + type: 'data', + color: '#cb171e', + }, + { + name: 'JSON', + percentage: 1.31, + bytes: 574, + type: 'data', + color: '#292929', + }, + { + name: 'C++', + percentage: 5.25, + bytes: 2300, + type: 'programming', + color: '#f34b7d', + }, + { + name: 'ASP.net', + percentage: 6.97, + bytes: 3053, + type: 'programming', + color: '#178600', + }, + { + name: 'Java', + percentage: 12.79, + bytes: 5603, + type: 'programming', + color: '#b07219', + }, + { + name: 'Common Lisp', + percentage: 71.46, + bytes: 31314, + type: 'programming', + color: '#3fb68b', + }, + ], + }), + } as unknown as Response); +} + +function baseEntity(): Entity { + return { + apiVersion: 'backstage.io/v1alpha1', + kind: 'Component', + metadata: { + name: 'foo', + }, + }; +} + +function buildProcessor(options: Partial) { + const config = new ConfigReader({ + linguist: { + tagsProcessor: { + bytesThreshold: options.bytesThreshold, + languageTypes: options.languageTypes, + languageMap: options.languageMap, + cacheTTL: options.cacheTTL, + tagPrefix: options.tagPrefix, + }, + }, + }); + return LinguistTagsProcessor.fromConfig(config, { + logger: getVoidLogger(), + discovery, + auth, + shouldProcessEntity: options.shouldProcessEntity, + }); +} diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.ts new file mode 100644 index 0000000000..76272ca355 --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.ts @@ -0,0 +1,294 @@ +/* + * Copyright 2023 The Backstage Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import { Entity, stringifyEntityRef } from '@backstage/catalog-model'; +import { + CatalogProcessor, + CatalogProcessorCache, +} from '@backstage/plugin-catalog-node'; +import { + AuthService, + DiscoveryService, + LoggerService, +} from '@backstage/backend-plugin-api'; +import { + Languages, + LanguageType, +} from '@backstage-community/plugin-linguist-common'; +import fetch from 'node-fetch'; +import { durationToMilliseconds, HumanDuration } from '@backstage/types'; +import { Config } from '@backstage/config'; + +/** + * A function which given an entity, determines if it should be processed for linguist tags. + * @public + */ +export type ShouldProcessEntity = (entity: Entity) => boolean; + +interface CachedData { + [key: string]: number | string[]; + languages: string[]; + cachedTime: number; +} + +/** + * The constructor options for building the LinguistTagsProcessor + * @public + */ +export interface LinguistTagsProcessorOptions { + logger: LoggerService; + discovery: DiscoveryService; + auth: AuthService; + /** + * Optional map that gives full control over which linguist languages should be included as tags and + * how they should be represented. The keys should be exact matches to languages in the linguist + * and the values should be how they render as backstage tags. Keep in mind that backstage has character + * requirements for tags. If you map a key to a falsey value, it will not be emitted as a tag. + */ + languageMap?: Record; + /** + * A function which determines which entities should be processed by the LinguistTagProcessor. + * + * The default is to process all entities of kind=Component + */ + shouldProcessEntity?: ShouldProcessEntity; + /** + * Determines how long to cache language breakdowns for entities in the processor. Considering + * how often this processor runs, caching can help move some read traffic off of the linguist DB. + * + * If this caching is using up too much memory, you can disable it by setting cacheTTL to 0. + */ + cacheTTL?: HumanDuration; + /** + * How many bytes must exist of a language in a repo before we consider it for adding a tag to + * the entity. This can be used if some repos have short utility scripts that may not be the primary + * language for the repo. + */ + bytesThreshold?: number; + /** + * Which linguist file types to process tags for. + */ + languageTypes?: LanguageType[]; + /** + * An optional prefix to apply to all created tags from linguist + */ + tagPrefix?: string; +} + +/** + * This processor will fetch the language breakdown from the linguist API and + * add the languages to the entity as searchable tags. + * + * @public + * */ +export class LinguistTagsProcessor implements CatalogProcessor { + private logger: LoggerService; + private discovery: DiscoveryService; + private auth: AuthService; + private loggerMeta = { plugin: 'LinguistTagsProcessor' }; + private languageMap: Record = {}; + private tagPrefix: string = ''; + private shouldProcessEntity: ShouldProcessEntity = (entity: Entity) => { + return entity.kind === 'Component'; + }; + private cacheTTLMilliseconds: number; + private bytesThreshold = 0; + private languageTypes: LanguageType[] = ['programming']; + + getProcessorName(): string { + return 'LinguistTagsProcessor'; + } + + constructor(options: LinguistTagsProcessorOptions) { + this.logger = options.logger; + this.discovery = options.discovery; + this.auth = options.auth; + if (options.shouldProcessEntity) { + this.shouldProcessEntity = options.shouldProcessEntity; + } + this.cacheTTLMilliseconds = durationToMilliseconds( + options.cacheTTL || { minutes: 30 }, + ); + if (options.bytesThreshold) { + this.bytesThreshold = options.bytesThreshold; + } + if (options.languageTypes) { + this.languageTypes = options.languageTypes; + } + if (options.languageMap) { + this.languageMap = options.languageMap; + } + if (options.tagPrefix) { + this.tagPrefix = options.tagPrefix; + } + } + + static fromConfig( + config: Config, + options: LinguistTagsProcessorOptions, + ): LinguistTagsProcessor { + const c = config.getOptionalConfig('linguist.tagsProcessor'); + if (c) { + options.bytesThreshold ??= c.getOptionalNumber('bytesThreshold'); + options.languageTypes ??= c.getOptionalStringArray( + 'languageTypes', + ) as LanguageType[]; + options.languageMap ??= c.getOptional('languageMap'); + options.cacheTTL ??= c.getOptional('cacheTTL'); + options.tagPrefix ??= c.getOptional('tagPrefix'); + } + + return new LinguistTagsProcessor(options); + } + + /** + * Given an entity ref, fetches the language breakdown from the Linguist backend HTTP API. + * @param entityRef - stringified entity ref + * @returns The language breakdown + */ + private async getLanguagesFromLinguistAPI( + entityRef: string, + ): Promise { + this.logger.debug(`Fetching languages from linguist API`, { + ...this.loggerMeta, + entityRef, + }); + const { token } = await this.auth.getPluginRequestToken({ + onBehalfOf: await this.auth.getOwnServiceCredentials(), + targetPluginId: 'linguist', + }); + const baseUrl = await this.discovery.getBaseUrl('linguist'); + const linguistApi = new URL(`${baseUrl}/entity-languages`); + linguistApi.searchParams.append('entityRef', entityRef); + const linguistData = await fetch(linguistApi, { + headers: { + 'Content-Type': 'application/json', + Authorization: `Bearer ${token}`, + }, + }).then(res => res.json() as Promise); + if (!linguistData || !linguistData.processedDate) { + return []; + } + + return linguistData.breakdown + .filter( + b => + this.languageTypes.includes(b.type) && b.bytes > this.bytesThreshold, + ) + .map(b => b.name); + } + + /** + * Cached wrapper around getLanguagesFromLinguistAPI + * @param cache - The CatalogProcessorCache + * @param entityRef - Stringified entity references + * + * @returns List of languages + */ + private async getCachedLanguages( + cache: CatalogProcessorCache, + entityRef: string, + ): Promise { + let cachedData = (await cache.get(entityRef)) as CachedData | undefined; + if (!cachedData || this.isExpired(cachedData)) { + const languages = await this.getLanguagesFromLinguistAPI(entityRef); + cachedData = { languages, cachedTime: Date.now() }; + await cache.set(entityRef, cachedData); + } + this.logger.debug(`Fetched cached languages ${cachedData.languages}`, { + ...this.loggerMeta, + entityRef, + }); + return cachedData.languages; + } + + /** + * Determines if cached data is expired based on TTL + * + * @param cachedData - The cached data for this entity + * @returns True if data is expired + */ + private isExpired(cachedData: CachedData): boolean { + const elapsed = Date.now() - (cachedData.cachedTime || 0); + return elapsed > this.cacheTTLMilliseconds; + } + + /** + * This pre-processor will fetch linguist data for a Component and convert the language breakdown + * into entity tags which will be appended to the entity. + * + * @public + */ + async preProcessEntity( + entity: Entity, + _: any, + __: any, + ___: any, + cache: CatalogProcessorCache, + ): Promise { + if (!this.shouldProcessEntity(entity)) { + return entity; + } + const entityRef = stringifyEntityRef(entity); + this.logger.debug(`Processing ${entityRef}`, { + ...this.loggerMeta, + entityRef, + }); + + const languages = + this.cacheTTLMilliseconds > 0 + ? await this.getCachedLanguages(cache, entityRef) + : await this.getLanguagesFromLinguistAPI(entityRef); + + const tags = (entity.metadata.tags ||= []); + const originalTagCount = tags.length; + + languages.forEach(lang => { + const cleanedUpLangTag = + this.tagPrefix + + (lang in this.languageMap ? this.languageMap[lang] : sanitizeTag(lang)); + if (cleanedUpLangTag && !tags.includes(cleanedUpLangTag)) { + tags.push(cleanedUpLangTag); + } + }); + + const addedCount = tags.length - originalTagCount; + + this.logger.debug(`Added ${addedCount} language tags from linguist`, { + ...this.loggerMeta, + entityRef, + }); + + return entity; + } +} + +/** + * Converts language tags from linguist to something acceptable by + * the tag requirements for backstage + * + * @param tag - A language tag from linguist + * @returns Cleaned up language tag + * @internal + */ +export function sanitizeTag(tag: string): string { + return tag + .toLowerCase() + .replace(/\.net/g, '-dot-net') + .replace(/[^a-z0-9:+#-]+/g, '-') + .replace(/-{2,}/g, '-') + .replace(/^-+|-+$/g, ''); +} diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/__snapshots__/LinguistTagsProcessor.test.ts.snap b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/__snapshots__/LinguistTagsProcessor.test.ts.snap new file mode 100644 index 0000000000..4eb428f61a --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/__snapshots__/LinguistTagsProcessor.test.ts.snap @@ -0,0 +1,707 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`sanitizeTag Should clean up all linguist languages 1`] = ` +[ + "1c-enterprise", + "2-dimensional-array", + "4d", + "abap", + "abap-cds", + "abnf", + "ags-script", + "aidl", + "al", + "ampl", + "antlr", + "api-blueprint", + "apl", + "asl", + "asn-1", + "asp-dot-net", + "ats", + "actionscript", + "ada", + "adblock-filter-list", + "adobe-font-metrics", + "agda", + "alloy", + "alpine-abuild", + "altium-designer", + "angelscript", + "ant-build-system", + "antlers", + "apacheconf", + "apex", + "apollo-guidance-computer", + "applescript", + "arc", + "asciidoc", + "aspectj", + "assembly", + "astro", + "asymptote", + "augeas", + "autohotkey", + "autoit", + "avro-idl", + "awk", + "basic", + "ballerina", + "batchfile", + "beef", + "befunge", + "berry", + "bibtex", + "bicep", + "bikeshed", + "bison", + "bitbake", + "blade", + "blitzbasic", + "blitzmax", + "bluespec", + "boo", + "boogie", + "brainfuck", + "brighterscript", + "brightscript", + "browserslist", + "c", + "c#", + "c++", + "c-objdump", + "c2hs-haskell", + "cap-cds", + "cil", + "clips", + "cmake", + "cobol", + "codeowners", + "collada", + "cson", + "css", + "csv", + "cue", + "cweb", + "cabal-config", + "cadence", + "cairo", + "cameligo", + "cap-n-proto", + "cartocss", + "ceylon", + "chapel", + "charity", + "checksums", + "chuck", + "circom", + "cirru", + "clarion", + "clarity", + "classic-asp", + "clean", + "click", + "clojure", + "closure-templates", + "cloud-firestore-security-rules", + "conll-u", + "codeql", + "coffeescript", + "coldfusion", + "coldfusion-cfc", + "common-lisp", + "common-workflow-language", + "component-pascal", + "cool", + "coq", + "cpp-objdump", + "creole", + "crystal", + "csound", + "csound-document", + "csound-score", + "cuda", + "cue-sheet", + "curry", + "cycript", + "cypher", + "cython", + "d", + "d-objdump", + "d2", + "digital-command-language", + "dm", + "dns-zone", + "dtrace", + "dafny", + "darcs-patch", + "dart", + "dataweave", + "debian-package-control-file", + "denizenscript", + "dhall", + "diff", + "directx-3d-file", + "dockerfile", + "dogescript", + "dotenv", + "dylan", + "e", + "e-mail", + "ebnf", + "ecl", + "eclipse", + "ejs", + "eq", + "eagle", + "earthly", + "easybuild", + "ecere-projects", + "ecmarkup", + "editorconfig", + "edje-data-collection", + "eiffel", + "elixir", + "elm", + "elvish", + "elvish-transcript", + "emacs-lisp", + "emberscript", + "erlang", + "euphoria", + "f#", + "f", + "figlet-font", + "flux", + "factor", + "fancy", + "fantom", + "faust", + "fennel", + "filebench-wml", + "filterscript", + "fluent", + "formatted", + "forth", + "fortran", + "fortran-free-form", + "freebasic", + "freemarker", + "frege", + "futhark", + "g-code", + "gaml", + "gams", + "gap", + "gcc-machine-description", + "gdb", + "gdscript", + "gedcom", + "glsl", + "gn", + "gsc", + "game-maker-language", + "gemfile-lock", + "gemini", + "genero", + "genero-forms", + "genie", + "genshi", + "gentoo-ebuild", + "gentoo-eclass", + "gerber-image", + "gettext-catalog", + "gherkin", + "git-attributes", + "git-config", + "git-revision-list", + "gleam", + "glyph", + "glyph-bitmap-distribution-format", + "gnuplot", + "go", + "go-checksums", + "go-module", + "go-workspace", + "godot-resource", + "golo", + "gosu", + "grace", + "gradle", + "grammatical-framework", + "graph-modeling-language", + "graphql", + "graphviz-dot", + "groovy", + "groovy-server-pages", + "haproxy", + "hcl", + "hlsl", + "hocon", + "html", + "html+ecr", + "html+eex", + "html+erb", + "html+php", + "html+razor", + "http", + "hxml", + "hack", + "haml", + "handlebars", + "harbour", + "haskell", + "haxe", + "hiveql", + "holyc", + "hosts-file", + "hy", + "hyphy", + "idl", + "igor-pro", + "ini", + "irc-log", + "idris", + "ignore-list", + "imagej-macro", + "imba", + "inform-7", + "ink", + "inno-setup", + "io", + "ioke", + "isabelle", + "isabelle-root", + "j", + "jar-manifest", + "jcl", + "jflex", + "json", + "json-with-comments", + "json5", + "jsonld", + "jsoniq", + "janet", + "jasmin", + "java", + "java-properties", + "java-server-pages", + "javascript", + "javascript+erb", + "jest-snapshot", + "jetbrains-mps", + "jinja", + "jison", + "jison-lex", + "jolie", + "jsonnet", + "julia", + "jupyter-notebook", + "just", + "krl", + "kaitai-struct", + "kakounescript", + "kerboscript", + "kicad-layout", + "kicad-legacy-layout", + "kicad-schematic", + "kickstart", + "kit", + "kotlin", + "kusto", + "lfe", + "llvm", + "lolcode", + "lsl", + "ltspice-symbol", + "labview", + "lark", + "lasso", + "latte", + "lean", + "less", + "lex", + "ligolang", + "lilypond", + "limbo", + "linker-script", + "linux-kernel-module", + "liquid", + "literate-agda", + "literate-coffeescript", + "literate-haskell", + "livescript", + "logos", + "logtalk", + "lookml", + "loomscript", + "lua", + "m", + "m4", + "m4sugar", + "matlab", + "maxscript", + "mdx", + "mlir", + "mql4", + "mql5", + "mtml", + "muf", + "macaulay2", + "makefile", + "mako", + "markdown", + "marko", + "mask", + "mathematica", + "maven-pom", + "max", + "mercury", + "mermaid", + "meson", + "metal", + "microsoft-developer-studio-project", + "microsoft-visual-studio-solution", + "minid", + "miniyaml", + "mint", + "mirah", + "modelica", + "modula-2", + "modula-3", + "module-management-system", + "monkey", + "monkey-c", + "moocode", + "moonscript", + "motoko", + "motorola-68k-assembly", + "move", + "muse", + "mustache", + "myghty", + "nasl", + "ncl", + "neon", + "nl", + "npm-config", + "nsis", + "nwscript", + "nasal", + "nearley", + "nemerle", + "netlinx", + "netlinx+erb", + "netlogo", + "newlisp", + "nextflow", + "nginx", + "nim", + "ninja", + "nit", + "nix", + "nu", + "numpy", + "nunjucks", + "nushell", + "oasv2-json", + "oasv2-yaml", + "oasv3-json", + "oasv3-yaml", + "ocaml", + "objdump", + "object-data-instance-notation", + "objectscript", + "objective-c", + "objective-c++", + "objective-j", + "odin", + "omgrofl", + "opa", + "opal", + "open-policy-agent", + "openapi-specification-v2", + "openapi-specification-v3", + "opencl", + "openedge-abl", + "openqasm", + "openrc-runscript", + "openscad", + "openstep-property-list", + "opentype-feature-file", + "option-list", + "org", + "ox", + "oxygene", + "oz", + "p4", + "pddl", + "peg-js", + "php", + "plsql", + "plpgsql", + "pov-ray-sdl", + "pact", + "pan", + "papyrus", + "parrot", + "parrot-assembly", + "parrot-internal-representation", + "pascal", + "pawn", + "pep8", + "perl", + "pic", + "pickle", + "picolisp", + "piglatin", + "pike", + "plantuml", + "pod", + "pod-6", + "pogoscript", + "polar", + "pony", + "portugol", + "postcss", + "postscript", + "powerbuilder", + "powershell", + "prisma", + "processing", + "procfile", + "proguard", + "prolog", + "promela", + "propeller-spin", + "protocol-buffer", + "protocol-buffer-text-format", + "public-key", + "pug", + "puppet", + "pure-data", + "purebasic", + "purescript", + "pyret", + "python", + "python-console", + "python-traceback", + "q#", + "qml", + "qmake", + "qt-script", + "quake", + "r", + "raml", + "rbs", + "rdoc", + "realbasic", + "rexx", + "rmarkdown", + "rpc", + "rpgle", + "rpm-spec", + "runoff", + "racket", + "ragel", + "raku", + "rascal", + "raw-token-data", + "rescript", + "readline-config", + "reason", + "reasonligo", + "rebol", + "record-jar", + "red", + "redcode", + "redirect-rules", + "regular-expression", + "ren-py", + "renderscript", + "rich-text-format", + "ring", + "riot", + "robotframework", + "roff", + "roff-manpage", + "rouge", + "routeros-script", + "ruby", + "rust", + "sas", + "scss", + "selinux-policy", + "smt", + "sparql", + "sqf", + "sql", + "sqlpl", + "srecode-template", + "ssh-config", + "star", + "stl", + "ston", + "svg", + "swig", + "sage", + "saltstack", + "sass", + "scala", + "scaml", + "scenic", + "scheme", + "scilab", + "self", + "shaderlab", + "shell", + "shellcheck-config", + "shellsession", + "shen", + "sieve", + "simple-file-verification", + "singularity", + "slash", + "slice", + "slim", + "smpl", + "smali", + "smalltalk", + "smarty", + "smithy", + "snakemake", + "solidity", + "soong", + "sourcepawn", + "spline-font-database", + "squirrel", + "stan", + "standard-ml", + "starlark", + "stata", + "stringtemplate", + "stylus", + "subrip-text", + "sugarss", + "supercollider", + "svelte", + "sway", + "swift", + "systemverilog", + "ti-program", + "tl-verilog", + "tla", + "toml", + "tsql", + "tsv", + "tsx", + "txl", + "talon", + "tcl", + "tcsh", + "tex", + "tea", + "terra", + "texinfo", + "text", + "textmate-properties", + "textile", + "thrift", + "turing", + "turtle", + "twig", + "type-language", + "typescript", + "unified-parallel-c", + "unity3d-asset", + "unix-assembly", + "uno", + "unrealscript", + "urweb", + "v", + "vba", + "vbscript", + "vcl", + "vhdl", + "vala", + "valve-data-format", + "velocity-template-language", + "verilog", + "vim-help-file", + "vim-script", + "vim-snippet", + "visual-basic-dot-net", + "visual-basic-6-0", + "volt", + "vue", + "vyper", + "wdl", + "wgsl", + "wavefront-material", + "wavefront-object", + "web-ontology-language", + "webassembly", + "webassembly-interface-type", + "webidl", + "webvtt", + "wget-config", + "whiley", + "wikitext", + "win32-message-file", + "windows-registry-entries", + "witcher-script", + "wollok", + "world-of-warcraft-addon-data", + "wren", + "x-bitmap", + "x-font-directory-index", + "x-pixmap", + "x10", + "xc", + "xcompose", + "xml", + "xml-property-list", + "xpages", + "xproc", + "xquery", + "xs", + "xslt", + "xojo", + "xonsh", + "xtend", + "yaml", + "yang", + "yara", + "yasnippet", + "yacc", + "yul", + "zap", + "zil", + "zeek", + "zenscript", + "zephir", + "zig", + "zimpl", + "curl-config", + "desktop", + "dircolors", + "ec", + "edn", + "fish", + "hoon", + "jq", + "kvlang", + "mirc-script", + "mcfunction", + "mupad", + "nanorc", + "nesc", + "ooc", + "q", + "restructuredtext", + "robots-txt", + "sed", + "wisp", + "xbase", +] +`; diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/index.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/index.ts new file mode 100644 index 0000000000..b56618b203 --- /dev/null +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/index.ts @@ -0,0 +1,20 @@ +/* + * Copyright 2023 The Backstage Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +export type { + LinguistTagsProcessorOptions, + ShouldProcessEntity, +} from './LinguistTagsProcessor'; +export { LinguistTagsProcessor } from './LinguistTagsProcessor'; diff --git a/workspaces/linguist/plugins/linguist-backend/README.md b/workspaces/linguist/plugins/linguist-backend/README.md index 9ffd7c2c63..6d779e1b16 100644 --- a/workspaces/linguist/plugins/linguist-backend/README.md +++ b/workspaces/linguist/plugins/linguist-backend/README.md @@ -172,122 +172,6 @@ export default async function createPlugin( ); ``` -### Processor Options - -The processor accepts configurations either directly as options when constructing using `fromConfig()`, or can also be configured in `app-config.yaml` with the same fields. - -Example linguist processor configuration: - -```yaml -linguist: - tagsProcessor: - bytesThreshold: 1000 - languageTypes: ['programming', 'markup'] - languageMap: - Dockerfile: '' - TSX: 'react' - tagPrefix: 'lang:' - cacheTTL: - hours: 24 -``` - -#### `languageMap` - -The `languageMap` option allows you to build a custom map of linguist languages to how you want them to show up as tags. The keys should be exact matches to languages in the [linguist dataset](https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml) and the values should be how they render as backstage tags. These values will be used "as is" and will not be further transformed. - -Keep in mind that backstage has [character requirements for tags](https://backstage.io/docs/features/software-catalog/descriptor-format#tags-optional). If your map emits an invalid tag, it will cause an error during processing and your entity will not be processed. - -If you map a key to `''`, it will not be emitted as a tag. This can be useful if you want to ignore some of the linguist languages. - -```yaml -linguist: - tagsProcessor: - languageMap: - # You don't want dockerfile to show up as a tag - Dockerfile: '' - # Be more specific about what the file is - HCL: terraform - # A more casual tag for a formal name - Protocol Buffer: protobuf -``` - -#### `tagPrefix` - -The `tagPrefix` option allows you to provide a prefix to all tags created by linguist. Keep in mind that backstage has [character requirements for tags](https://backstage.io/docs/features/software-catalog/descriptor-format#tags-optional). If your prefix emits an invalid tag, it will cause an error during processing and your entity will not be processed. - -As an example, use the following config to get tags like `lang:java` instead of just `java`. - -```yaml -linguist: - tagsProcessor: - tagPrefix: 'lang:' -``` - -#### `cacheTTL` - -The `cacheTTL` option allows you to determine for how long this processor will cache languages for an `entityRef` before refreshing from the linguist backend. As this processor will run continuously, this cache is supplied to limit the load done on the linguist DB and API. - -By default, this processor will cache languages for 30 minutes before refreshing from the linguist database. - -You can optionally disable the cache entirely by passing in a `cacheTTL` duration of 0 minutes. - -```yaml -linguist: - tagsProcessor: - cacheTTL: { minutes: 0 } -``` - -#### `bytesThreshold` - -The `bytesThreshold` option allows you to control a number of bytes threshold which must be surpassed before a language tag will be emitted by this processor. As an example, some repositories may have short build scripts written in Bash, but you may only want the main language of the project emitted (an alternate way to control this is to use the `languageMap` to map `Shell` languages to `undefined`). - -```yaml -linguist: - tagsProcessor: - # Ignore languages with less than 5000 bytes in a repo. - bytesThreshold: 5000 -``` - -#### `languageTypes` - -The `languageTypes` option allows you to control what categories of linguist languages are automatically added as tags. By default, this will only include language tags of type `programming`, but you can pass in a custom array here to allow adding other language types. - -You can see the full breakdown of linguist supported languages [in their repo](https://github.com/github-linguist/linguist/blob/master/lib/linguist/languages.yml). - -For example, you may want to also include languages of type `data` - -```yaml -linguist: - tagsProcessor: - languageTypes: - - programming - - data -``` - -#### `shouldProcessEntity` - -The `shouldProcessEntity` is a function you can pass into the processor which determines which entities should have language tags fetched from linguist and added to the entity. By default, this will only run on entities of `kind: Component`, however this function let's you fully customize which entities should be processed. - -As an example, you may choose to extend this to support both `Component` and `Resource` kinds along with allowing an opt-in annotation on the entity which entity authors can use. - -As this option is a function, it cannot be configured in `app-config.yaml`. You must pass this as an option within typescript. - -```ts -LinguistLanguageTagsProcessor.fromConfig(env.config, { - logger: env.logger, - discovery: env.discovery, - shouldProcessEntity: (entity: Entity) => { - if ( - ['Component', 'Resource'].includes(entity.kind) && - entity.metadata.annotations?.['some-custom-annotation'] - ) { - return true; - } - return false; - }, -}); -``` - ## Links - [Frontend part of the plugin](https://github.com/backstage/backstage/tree/master/plugins/linguist) diff --git a/workspaces/linguist/plugins/linguist-backend/api-report.md b/workspaces/linguist/plugins/linguist-backend/api-report.md index 5c7b1a5371..43718d1484 100644 --- a/workspaces/linguist/plugins/linguist-backend/api-report.md +++ b/workspaces/linguist/plugins/linguist-backend/api-report.md @@ -46,7 +46,7 @@ export interface LinguistBackendApi { const linguistPlugin: BackendFeatureCompat; export default linguistPlugin; -// @public +// @public @deprecated export class LinguistTagsProcessor implements CatalogProcessor { constructor(options: LinguistTagsProcessorOptions); // (undocumented) diff --git a/workspaces/linguist/plugins/linguist-backend/src/processor/LinguistTagsProcessor.ts b/workspaces/linguist/plugins/linguist-backend/src/processor/LinguistTagsProcessor.ts index 93f80fdbc9..17d7e2fc68 100644 --- a/workspaces/linguist/plugins/linguist-backend/src/processor/LinguistTagsProcessor.ts +++ b/workspaces/linguist/plugins/linguist-backend/src/processor/LinguistTagsProcessor.ts @@ -88,7 +88,10 @@ export interface LinguistTagsProcessorOptions { * add the languages to the entity as searchable tags. * * @public - * */ + * + * @deprecated Use `@backstage-community/plugin-catalog-backend-module-linguist-tags-processor` instead, + * see {@link https://github.com/backstage/community-plugins/tree/main/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor} + */ export class LinguistTagsProcessor implements CatalogProcessor { private logger: LoggerService; private discovery: DiscoveryService; diff --git a/workspaces/linguist/yarn.lock b/workspaces/linguist/yarn.lock index 9621573a9e..a9300a6f68 100644 --- a/workspaces/linguist/yarn.lock +++ b/workspaces/linguist/yarn.lock @@ -2729,6 +2729,26 @@ __metadata: languageName: node linkType: hard +"@backstage-community/plugin-catalog-backend-module-linguist-tags-processor@workspace:^, @backstage-community/plugin-catalog-backend-module-linguist-tags-processor@workspace:plugins/catalog-backend-module-linguist-tags-processor": + version: 0.0.0-use.local + resolution: "@backstage-community/plugin-catalog-backend-module-linguist-tags-processor@workspace:plugins/catalog-backend-module-linguist-tags-processor" + dependencies: + "@backstage-community/plugin-linguist-common": "workspace:^" + "@backstage/backend-common": ^0.21.7 + "@backstage/backend-plugin-api": ^0.6.17 + "@backstage/backend-tasks": ^0.5.22 + "@backstage/backend-test-utils": ^0.3.7 + "@backstage/catalog-model": ^1.4.5 + "@backstage/cli": ^0.26.3 + "@backstage/config": ^1.2.0 + "@backstage/plugin-catalog-node": ^1.11.1 + "@backstage/types": ^1.1.1 + js-yaml: ^4.1.0 + linguist-js: ^2.5.3 + node-fetch: ^2.6.7 + languageName: unknown + linkType: soft + "@backstage-community/plugin-github-actions@npm:^0.6.16": version: 0.6.16 resolution: "@backstage-community/plugin-github-actions@npm:0.6.16" @@ -2874,7 +2894,7 @@ __metadata: languageName: node linkType: hard -"@backstage/backend-app-api@npm:^0.7.9": +"@backstage/backend-app-api@npm:^0.7.0, @backstage/backend-app-api@npm:^0.7.3, @backstage/backend-app-api@npm:^0.7.9": version: 0.7.9 resolution: "@backstage/backend-app-api@npm:0.7.9" dependencies: @@ -2920,6 +2940,146 @@ __metadata: languageName: node linkType: hard +"@backstage/backend-common@npm:^0.21.7": + version: 0.21.7 + resolution: "@backstage/backend-common@npm:0.21.7" + dependencies: + "@aws-sdk/abort-controller": ^3.347.0 + "@aws-sdk/client-codecommit": ^3.350.0 + "@aws-sdk/client-s3": ^3.350.0 + "@aws-sdk/credential-providers": ^3.350.0 + "@aws-sdk/types": ^3.347.0 + "@backstage/backend-app-api": ^0.7.0 + "@backstage/backend-dev-utils": ^0.1.4 + "@backstage/backend-plugin-api": ^0.6.17 + "@backstage/cli-common": ^0.1.13 + "@backstage/config": ^1.2.0 + "@backstage/config-loader": ^1.8.0 + "@backstage/errors": ^1.2.4 + "@backstage/integration": ^1.10.0 + "@backstage/integration-aws-node": ^0.1.12 + "@backstage/plugin-auth-node": ^0.4.12 + "@backstage/types": ^1.1.1 + "@google-cloud/storage": ^7.0.0 + "@keyv/memcache": ^1.3.5 + "@keyv/redis": ^2.5.3 + "@kubernetes/client-node": 0.20.0 + "@manypkg/get-packages": ^1.1.3 + "@octokit/rest": ^19.0.3 + "@types/cors": ^2.8.6 + "@types/dockerode": ^3.3.0 + "@types/express": ^4.17.6 + "@types/luxon": ^3.0.0 + "@types/webpack-env": ^1.15.2 + archiver: ^6.0.0 + base64-stream: ^1.0.0 + compression: ^1.7.4 + concat-stream: ^2.0.0 + cors: ^2.8.5 + dockerode: ^4.0.0 + express: ^4.17.1 + express-promise-router: ^4.1.0 + fs-extra: ^11.2.0 + git-url-parse: ^14.0.0 + helmet: ^6.0.0 + isomorphic-git: ^1.23.0 + jose: ^5.0.0 + keyv: ^4.5.2 + knex: ^3.0.0 + lodash: ^4.17.21 + logform: ^2.3.2 + luxon: ^3.0.0 + minimatch: ^9.0.0 + mysql2: ^3.0.0 + node-fetch: ^2.6.7 + p-limit: ^3.1.0 + pg: ^8.11.3 + raw-body: ^2.4.1 + tar: ^6.1.12 + uuid: ^9.0.0 + winston: ^3.2.1 + winston-transport: ^4.5.0 + yauzl: ^3.0.0 + yn: ^4.0.0 + peerDependencies: + pg-connection-string: ^2.3.0 + peerDependenciesMeta: + pg-connection-string: + optional: true + checksum: a774e8556d2286fe4648a669c96cece8f831db11b1d7c1075a6bf8da43318ce53e064543b173b7ecc347a23c738e2b52a74168d5f9403fc20fa14eaf2d1fc83b + languageName: node + linkType: hard + +"@backstage/backend-common@npm:^0.22.0": + version: 0.22.0 + resolution: "@backstage/backend-common@npm:0.22.0" + dependencies: + "@aws-sdk/abort-controller": ^3.347.0 + "@aws-sdk/client-codecommit": ^3.350.0 + "@aws-sdk/client-s3": ^3.350.0 + "@aws-sdk/credential-providers": ^3.350.0 + "@aws-sdk/types": ^3.347.0 + "@backstage/backend-app-api": ^0.7.3 + "@backstage/backend-dev-utils": ^0.1.4 + "@backstage/backend-plugin-api": ^0.6.18 + "@backstage/cli-common": ^0.1.13 + "@backstage/config": ^1.2.0 + "@backstage/config-loader": ^1.8.0 + "@backstage/errors": ^1.2.4 + "@backstage/integration": ^1.11.0 + "@backstage/integration-aws-node": ^0.1.12 + "@backstage/plugin-auth-node": ^0.4.13 + "@backstage/types": ^1.1.1 + "@google-cloud/storage": ^7.0.0 + "@keyv/memcache": ^1.3.5 + "@keyv/redis": ^2.5.3 + "@kubernetes/client-node": 0.20.0 + "@manypkg/get-packages": ^1.1.3 + "@octokit/rest": ^19.0.3 + "@types/cors": ^2.8.6 + "@types/dockerode": ^3.3.0 + "@types/express": ^4.17.6 + "@types/luxon": ^3.0.0 + "@types/webpack-env": ^1.15.2 + archiver: ^6.0.0 + base64-stream: ^1.0.0 + compression: ^1.7.4 + concat-stream: ^2.0.0 + cors: ^2.8.5 + dockerode: ^4.0.0 + express: ^4.17.1 + express-promise-router: ^4.1.0 + fs-extra: ^11.2.0 + git-url-parse: ^14.0.0 + helmet: ^6.0.0 + isomorphic-git: ^1.23.0 + jose: ^5.0.0 + keyv: ^4.5.2 + knex: ^3.0.0 + lodash: ^4.17.21 + logform: ^2.3.2 + luxon: ^3.0.0 + minimatch: ^9.0.0 + mysql2: ^3.0.0 + node-fetch: ^2.6.7 + p-limit: ^3.1.0 + pg: ^8.11.3 + raw-body: ^2.4.1 + tar: ^6.1.12 + uuid: ^9.0.0 + winston: ^3.2.1 + winston-transport: ^4.5.0 + yauzl: ^3.0.0 + yn: ^4.0.0 + peerDependencies: + pg-connection-string: ^2.3.0 + peerDependenciesMeta: + pg-connection-string: + optional: true + checksum: 289f5cbe2ef826bd5c1ae949090b24999a80622f023928f148286a445b2e326a8fb58d1f6861b1c40d7ca9ab404e35c9a073b53e8660c20726caf0d305aaa337 + languageName: node + linkType: hard + "@backstage/backend-common@npm:^0.23.2": version: 0.23.2 resolution: "@backstage/backend-common@npm:0.23.2" @@ -3098,7 +3258,7 @@ __metadata: languageName: node linkType: hard -"@backstage/backend-plugin-api@npm:^0.6.21": +"@backstage/backend-plugin-api@npm:^0.6.17, @backstage/backend-plugin-api@npm:^0.6.18, @backstage/backend-plugin-api@npm:^0.6.21": version: 0.6.21 resolution: "@backstage/backend-plugin-api@npm:0.6.21" dependencies: @@ -3117,7 +3277,7 @@ __metadata: languageName: node linkType: hard -"@backstage/backend-tasks@npm:^0.5.26": +"@backstage/backend-tasks@npm:^0.5.22, @backstage/backend-tasks@npm:^0.5.26": version: 0.5.26 resolution: "@backstage/backend-tasks@npm:0.5.26" dependencies: @@ -3138,6 +3298,35 @@ __metadata: languageName: node linkType: hard +"@backstage/backend-test-utils@npm:^0.3.7": + version: 0.3.8 + resolution: "@backstage/backend-test-utils@npm:0.3.8" + dependencies: + "@backstage/backend-app-api": ^0.7.3 + "@backstage/backend-common": ^0.22.0 + "@backstage/backend-plugin-api": ^0.6.18 + "@backstage/config": ^1.2.0 + "@backstage/errors": ^1.2.4 + "@backstage/plugin-auth-node": ^0.4.13 + "@backstage/plugin-events-node": ^0.3.4 + "@backstage/types": ^1.1.1 + better-sqlite3: ^9.0.0 + cookie: ^0.6.0 + express: ^4.17.1 + fs-extra: ^11.0.0 + knex: ^3.0.0 + msw: ^1.0.0 + mysql2: ^3.0.0 + pg: ^8.11.3 + testcontainers: ^10.0.0 + textextensions: ^5.16.0 + uuid: ^9.0.0 + peerDependencies: + "@types/jest": "*" + checksum: 76813c12afc21b0a02c323757dbd3cfc3f30108283bf3f5eee2e237bd9d69d4056ff801a027e5ac0464037266709f2e959e9e76fdf3d607d811f35e31a5c62fb + languageName: node + linkType: hard + "@backstage/backend-test-utils@npm:^0.4.3": version: 0.4.3 resolution: "@backstage/backend-test-utils@npm:0.4.3" @@ -3197,7 +3386,7 @@ __metadata: languageName: node linkType: hard -"@backstage/cli-common@npm:^0.1.14": +"@backstage/cli-common@npm:^0.1.13, @backstage/cli-common@npm:^0.1.14": version: 0.1.14 resolution: "@backstage/cli-common@npm:0.1.14" checksum: 6c5031ae31f08b405e5e59105d98e43dc6d865f960e5d016067267ecabccd5a892ab65d59d5b9e31850dccddb9eb29e06bf360ab6be8f7949991561ddb163fcb @@ -3220,7 +3409,7 @@ __metadata: languageName: node linkType: hard -"@backstage/cli@npm:^0.26.10": +"@backstage/cli@npm:^0.26.10, @backstage/cli@npm:^0.26.3": version: 0.26.10 resolution: "@backstage/cli@npm:0.26.10" dependencies: @@ -3355,7 +3544,7 @@ __metadata: languageName: node linkType: hard -"@backstage/config-loader@npm:^1.8.1": +"@backstage/config-loader@npm:^1.8.0, @backstage/config-loader@npm:^1.8.1": version: 1.8.1 resolution: "@backstage/config-loader@npm:1.8.1" dependencies: @@ -3608,7 +3797,7 @@ __metadata: languageName: node linkType: hard -"@backstage/integration@npm:^1.10.0, @backstage/integration@npm:^1.12.0": +"@backstage/integration@npm:^1.10.0, @backstage/integration@npm:^1.11.0, @backstage/integration@npm:^1.12.0": version: 1.12.0 resolution: "@backstage/integration@npm:1.12.0" dependencies: @@ -3978,7 +4167,7 @@ __metadata: languageName: node linkType: hard -"@backstage/plugin-auth-node@npm:^0.4.16": +"@backstage/plugin-auth-node@npm:^0.4.12, @backstage/plugin-auth-node@npm:^0.4.13, @backstage/plugin-auth-node@npm:^0.4.16": version: 0.4.16 resolution: "@backstage/plugin-auth-node@npm:0.4.16" dependencies: @@ -4151,7 +4340,7 @@ __metadata: languageName: node linkType: hard -"@backstage/plugin-catalog-node@npm:^1.12.3": +"@backstage/plugin-catalog-node@npm:^1.11.1, @backstage/plugin-catalog-node@npm:^1.12.3": version: 1.12.3 resolution: "@backstage/plugin-catalog-node@npm:1.12.3" dependencies: @@ -4242,7 +4431,7 @@ __metadata: languageName: node linkType: hard -"@backstage/plugin-events-node@npm:^0.3.7": +"@backstage/plugin-events-node@npm:^0.3.4, @backstage/plugin-events-node@npm:^0.3.7": version: 0.3.7 resolution: "@backstage/plugin-events-node@npm:0.3.7" dependencies: @@ -14052,6 +14241,7 @@ __metadata: version: 0.0.0-use.local resolution: "backend@workspace:packages/backend" dependencies: + "@backstage-community/plugin-catalog-backend-module-linguist-tags-processor": "workspace:^" "@backstage-community/plugin-linguist-backend": "workspace:^" "@backstage/backend-common": ^0.23.2 "@backstage/backend-defaults": ^0.3.3 From a96f2221f9cc90fb2030b1af7650510104361406 Mon Sep 17 00:00:00 2001 From: Andre Wanlin Date: Fri, 5 Jul 2024 14:56:42 -0500 Subject: [PATCH 2/4] Updates based on feedback Signed-off-by: Andre Wanlin --- .../README.md | 69 ------------------- .../api-report.md | 5 +- .../config.d.ts | 3 +- .../processor/LinguistTagsProcessor.test.ts | 22 ------ .../src/processor/LinguistTagsProcessor.ts | 9 --- 5 files changed, 3 insertions(+), 105 deletions(-) diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/README.md b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/README.md index e97a1182d2..7a79e7ec74 100644 --- a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/README.md +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/README.md @@ -27,49 +27,6 @@ Then in your `/packages/backend/src/index.ts` file you simply add the following backend.start(); ``` -### Legacy Setup - -To setup the Linguist Tags Processor when using the legacy backend you'll first need to run this command to add the package: - -```sh -# From your Backstage root directory -yarn --cwd packages/backend add @backstage-community/plugin-catalog-backend-module-linguist-tags-processor -``` - -Then you will need to make the following changes in your `/packages/backend/src/plugins/catalog.ts` file: - -```ts -import { LinguistTagsProcessor } from '@backstage-community/plugin-linguist-backend'; -// ... -export default async function createPlugin( - // ... - builder.addProcessor( - LinguistTagsProcessor.fromConfig(env.config, { - logger: env.logger, - discovery: env.discovery, - }) - ); -``` - -```diff - import { CatalogBuilder } from '@backstage/plugin-catalog-backend'; - import { ScaffolderEntitiesProcessor } from '@backstage/plugin-catalog-backend-module-scaffolder-entity-model'; - import { Router } from 'express'; - import { PluginEnvironment } from '../types'; -+ import { LinguistTagsProcessor } from '@backstage-community/plugin-catalog-backend-module-linguist-tags-processor'; - - export default async function createPlugin( - env: PluginEnvironment, - ): Promise { - const builder = await CatalogBuilder.create(env); - builder.addProcessor(new ScaffolderEntitiesProcessor()); -+ builder.addProcessor(LinguistTagsProcessor.fromConfig(env.config, { logger: env.logger, discovery: env.discovery }); - const { processingEngine, router } = await builder.build(); - await processingEngine.start(); - return router; - } -``` - ### Processor Options The processor can be configured in `app-config.yaml`, here is an example Linguist Tag Processor configuration: @@ -159,29 +116,3 @@ linguist: - programming - data ``` - -#### `shouldProcessEntity` - -The `shouldProcessEntity` is a function you can pass into the processor which determines which entities should have language tags fetched from linguist and added to the entity. By default, this will only run on entities of `kind: Component`, however this function let's you fully customize which entities should be processed. - -> Note: this is not currently supported with the new backend system - -As an example, you may choose to extend this to support both `Component` and `Resource` kinds along with allowing an opt-in annotation on the entity which entity authors can use. - -As this option is a function, it cannot be configured in `app-config.yaml`. You must pass this as an option within typescript. - -```ts -LinguistLanguageTagsProcessor.fromConfig(env.config, { - logger: env.logger, - discovery: env.discovery, - shouldProcessEntity: (entity: Entity) => { - if ( - ['Component', 'Resource'].includes(entity.kind) && - entity.metadata.annotations?.['some-custom-annotation'] - ) { - return true; - } - return false; - }, -}); -``` diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/api-report.md b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/api-report.md index 2de880e6ed..1c52acc17a 100644 --- a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/api-report.md +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/api-report.md @@ -4,7 +4,7 @@ ```ts import { AuthService } from '@backstage/backend-plugin-api'; -import { BackendFeature } from '@backstage/backend-plugin-api'; +import { BackendFeatureCompat } from '@backstage/backend-plugin-api'; import { CatalogProcessor } from '@backstage/plugin-catalog-node'; import { CatalogProcessorCache } from '@backstage/plugin-catalog-node'; import { Config } from '@backstage/config'; @@ -15,7 +15,7 @@ import { LanguageType } from '@backstage-community/plugin-linguist-common'; import { LoggerService } from '@backstage/backend-plugin-api'; // @public (undocumented) -const catalogModuleLinguistTagsProcessor: () => BackendFeature; +const catalogModuleLinguistTagsProcessor: BackendFeatureCompat; export default catalogModuleLinguistTagsProcessor; // @public @@ -49,7 +49,6 @@ export interface LinguistTagsProcessorOptions { languageTypes?: LanguageType[]; // (undocumented) logger: LoggerService; - shouldProcessEntity?: ShouldProcessEntity; tagPrefix?: string; } diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts index e6ba743d7b..ef5fd3f273 100644 --- a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts @@ -14,9 +14,8 @@ * limitations under the License. */ -import { TaskScheduleDefinition } from '@backstage/backend-tasks'; + import { HumanDuration } from '@backstage/types'; -import { Options as LinguistJsOptions } from 'linguist-js/dist/types'; export interface Config { /** Configuration options for the linguist plugin */ diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.test.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.test.ts index 52d061373f..0ff327a1d1 100644 --- a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.test.ts +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.test.ts @@ -166,27 +166,6 @@ describe('LinguistTagsProcessor', () => { expect(entity.metadata.tags).toStrictEqual(undefined); }); - test('Can process Resource entities by overriding shouldProcessEntity', async () => { - const processor = buildProcessor({ - shouldProcessEntity: (entity: Entity) => { - return entity.kind === 'Resource'; - }, - }); - - mockFetchImplementation(); - const entity = baseEntity(); - entity.kind = 'Resource'; - - await processor.preProcessEntity(entity, null, null, null, cache); - expect(mockedFetch).toHaveBeenCalledTimes(1); - expect(entity.metadata.tags).toStrictEqual([ - 'c++', - 'asp-dot-net', - 'java', - 'common-lisp', - ]); - }); - test('Can omit languages using languageMap', async () => { const processor = buildProcessor({ languageMap: { @@ -365,6 +344,5 @@ function buildProcessor(options: Partial) { logger: getVoidLogger(), discovery, auth, - shouldProcessEntity: options.shouldProcessEntity, }); } diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.ts index 76272ca355..6a909f9da2 100644 --- a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.ts +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/src/processor/LinguistTagsProcessor.ts @@ -59,12 +59,6 @@ export interface LinguistTagsProcessorOptions { * requirements for tags. If you map a key to a falsey value, it will not be emitted as a tag. */ languageMap?: Record; - /** - * A function which determines which entities should be processed by the LinguistTagProcessor. - * - * The default is to process all entities of kind=Component - */ - shouldProcessEntity?: ShouldProcessEntity; /** * Determines how long to cache language breakdowns for entities in the processor. Considering * how often this processor runs, caching can help move some read traffic off of the linguist DB. @@ -116,9 +110,6 @@ export class LinguistTagsProcessor implements CatalogProcessor { this.logger = options.logger; this.discovery = options.discovery; this.auth = options.auth; - if (options.shouldProcessEntity) { - this.shouldProcessEntity = options.shouldProcessEntity; - } this.cacheTTLMilliseconds = durationToMilliseconds( options.cacheTTL || { minutes: 30 }, ); From dcce5b1a4be15104dba78c374c557d9c057aaaa0 Mon Sep 17 00:00:00 2001 From: Andre Wanlin Date: Fri, 5 Jul 2024 15:04:48 -0500 Subject: [PATCH 3/4] Fixed prettier error Signed-off-by: Andre Wanlin --- .../catalog-backend-module-linguist-tags-processor/config.d.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts index ef5fd3f273..a452e61dd4 100644 --- a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/config.d.ts @@ -14,7 +14,6 @@ * limitations under the License. */ - import { HumanDuration } from '@backstage/types'; export interface Config { From 3c0afa4be11259e925677b4f5d8300033253a070 Mon Sep 17 00:00:00 2001 From: Andre Wanlin Date: Fri, 5 Jul 2024 15:09:56 -0500 Subject: [PATCH 4/4] Removed private flag Signed-off-by: Andre Wanlin --- .../catalog-backend-module-linguist-tags-processor/package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/package.json b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/package.json index 5aa4903d80..8a63203331 100644 --- a/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/package.json +++ b/workspaces/linguist/plugins/catalog-backend-module-linguist-tags-processor/package.json @@ -5,7 +5,6 @@ "main": "src/index.ts", "types": "src/index.ts", "license": "Apache-2.0", - "private": true, "publishConfig": { "access": "public", "main": "dist/index.cjs.js",