diff --git a/.gitignore b/.gitignore index 111dd6b33..f6aff726f 100644 --- a/.gitignore +++ b/.gitignore @@ -13,3 +13,5 @@ src/scripts/jsonld/* /nohup.out src/*.gmt downloads/* +public/img/pathways/* +!/**/.gitkeep \ No newline at end of file diff --git a/Dockerfile.syblars b/Dockerfile.syblars new file mode 100644 index 000000000..9b5e766c9 --- /dev/null +++ b/Dockerfile.syblars @@ -0,0 +1,36 @@ +FROM node:14.21.3 + +# Environment variables +ENV NODE_OPTIONS=--max_old_space_size=8192 + +# Create an unprivileged user w/ home directory +RUN groupadd appuser \ + && useradd --gid appuser --shell /bin/bash --create-home appuser + +# Create app directory +RUN mkdir -p /home/appuser/app + +# Copy in source code +RUN cd /home/appuser/app && git clone https://github.com/iVis-at-Bilkent/syblars.git +WORKDIR /home/appuser/app/syblars + +# Install app dependencies +# Puppeteer requirements +RUN wget -q -O - https://dl-ssl.google.com/linux/linux_signing_key.pub | apt-key add - \ + && sh -c 'echo "deb [arch=amd64] http://dl.google.com/linux/chrome/deb/ stable main" >> /etc/apt/sources.list.d/google.list' \ + && apt-get update \ + && apt-get install -y google-chrome-stable fonts-ipafont-gothic fonts-wqy-zenhei fonts-thai-tlwg fonts-kacst fonts-freefont-ttf libxss1 libxtst6 gconf-service libasound2 libatk1.0-0 libatk-bridge2.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget \ + --no-install-recommends + +RUN npm clean-install + +# Expose port +EXPOSE 3000 + +# Change ownership of the app to the unprivileged user +RUN chown appuser:appuser -R /home/appuser/app +USER appuser + +# set server start as entry point +ENTRYPOINT npm run start + diff --git a/README.md b/README.md index 1520f6896..67bf083c7 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,8 @@ The following environment variables can be used to configure the server (also do - `PC_URL`: Pathway Commons homepage URL (default: 'http://www.pathwaycommons.org/'; cPath2 service should be there available at /pc2/ path) - `NCBI_API_KEY`: NCBI E-Utilities API key ([read more](https://ncbiinsights.ncbi.nlm.nih.gov/2017/11/02/new-api-keys-for-the-e-utilities/)) - `FACTOID_URL`: the Factoid app URL (default: 'http://unstable.factoid.baderlab.org/') +- `SBGN_IMG_SERVICE_BASE_URL`: URL for service that converts SBGN to an image (i.e. [Syblars](http://syblars.cs.bilkent.edu.tr/); default is `http://localhost:9090/`) +- `SBGN_IMG_PATH`: cli tool `snapshot` output folder for images (default: `public/img/pathways`) ## Run targets @@ -66,7 +68,7 @@ docker build --build-arg NODE_ENV=production -t app-ui . Run the container: ``` -docker run -it --rm -p 12345:3000 -e "NODE_ENV=production" --name "app-ui" app-ui +docker run -it --rm -p 3000:3000 -e "NODE_ENV=production" --name "app-ui" app-ui ``` Notes: @@ -93,25 +95,15 @@ PC repository on Docker Hub). To run the app using the pathwaycommons/app-ui:master image, execute: ```sh -docker-compose up -d +docker-compose up -d webapp ``` -Access the app instance at port `9090` (can be specified in the docker-compose.yml). +Access the app instance at port `3000` (can be specified in the docker-compose.yml). Notes: - References: - [Getting started with Docker Compose](https://docs.docker.com/compose/gettingstarted/) -### Custom build/rebuild/run with Docker Compose - -Create .env file in this directory and define there yours: NODE_ENV, PC_URL, FACTOID_URL, PORT options; -execute: - -```sh -docker-compose -f dev-compose.yml build -docker-compose -f dev-compose.yml up -d -``` - ## Testing @@ -121,6 +113,26 @@ can run `npm run test ./test/path/to/test` to run specific tests. [Chai](http://chaijs.com/) is included to make the tests easier to read and write. +## Scripts + +### Command line tools + +The `scripts/cli.js` file contains app-ui command line tools: + - `source`: Download and extract a file to `downloads` folder + - `snapshot`: Generate PNG images for pathways listed in a PC GMT-formatted file + - Requires an instance of [Syblars](http://syblars.cs.bilkent.edu.tr/) accessible at a location defined by the configuration variable `SBGN_IMG_SERVICE_BASE_URL` (see `docker-compose.yml` service `syblars`) + - Images will be placed in directory `SBGN_IMG_PATH` (default: `public/img/pathways`) + +Usage: To generate a PNG of an SBGN representation for each pathway declared in the GMT file at `downloads/PathwayCommons12.All.hgnc.gmt`: + +```sh +$ docker-compose up -d syblars +$ SERVER_FETCH_TIMEOUT="60000" node src/scripts/cli.js snapshot --file PathwayCommons12.All.hgnc.gmt +``` +NB: The default timeout of fetch is normally quite brief (5 seconds). + +In this way, images will be served via expressJS at `img/pathways/:id`, where `id` is the pathway URI with anything that is not a letter (a-z) or digit (0-9) is replaced with underscores (`_`). + ## Developing a feature and making a pull request Students who work on the repo should follow these instructions for each feature that they work on: diff --git a/docker-compose.yml b/docker-compose.yml index b2041fedc..83c999dc1 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,6 +32,14 @@ services: FACTOID_URL: networks: - app-ui-network + syblars: + image: pathwaycommons/syblars:${SYBLARS_IMAGE_TAG:-latest} + restart: unless-stopped + container_name: syblars + ports: + - "${SYBLARS_PORT:-9090}:3000" + networks: + - app-ui-network networks: app-ui-network: diff --git a/package-lock.json b/package-lock.json index ce395bab8..199aed641 100644 --- a/package-lock.json +++ b/package-lock.json @@ -401,6 +401,15 @@ "integrity": "sha512-jp/uFnooOiO+L211eZOoSyzpOITMXx1rBITauYykG3BRYPu8h0UcxsPNB04RR5vo4Tyz3+ay17tR6JVf9qzYWg==", "dev": true }, + "async-retry": { + "version": "1.3.3", + "resolved": "https://registry.npmjs.org/async-retry/-/async-retry-1.3.3.tgz", + "integrity": "sha512-wfr/jstw9xNi/0teMHrRW7dsz3Lt5ARhYNZ2ewpadnhaIp5mbALhOAP+EAdsC7t4Z6wqsDVv9+W6gm1Dk9mEyw==", + "dev": true, + "requires": { + "retry": "0.13.1" + } + }, "asynckit": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", @@ -2274,9 +2283,10 @@ } }, "commander": { - "version": "2.14.0", - "resolved": "https://registry.npmjs.org/commander/-/commander-2.14.0.tgz", - "integrity": "sha512-okPpdvdJr6mUGi2XzupC+irQxzwGLVaBzacFC14hjLv8NColXEsxsU+QaeuSSXpQUak5g2K0vQ7WjA1e8svczg==" + "version": "11.1.0", + "resolved": "https://registry.npmjs.org/commander/-/commander-11.1.0.tgz", + "integrity": "sha512-yPVavfyCcRhmorC7rWlkHn15b4wDVgVmBA7kV4QVBsF7kv/9TKJAbAXVTxvTnwP8HHKjRCJDClKbciiYS7p0DQ==", + "dev": true }, "commondir": { "version": "1.0.1", @@ -2296,6 +2306,13 @@ "integrity": "sha1-QPM1MInWVGdpXLGIa0Xt1jfYzKg=", "requires": { "commander": "^2.9.0" + }, + "dependencies": { + "commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==" + } } }, "concat-map": { @@ -10812,6 +10829,12 @@ "integrity": "sha512-TTlYpa+OL+vMMNG24xSlQGEJ3B/RzEfUlLct7b5G/ytav+wPrplCpVMFuwzXbkecJrb6IYo1iFb0S9v37754mg==", "dev": true }, + "retry": { + "version": "0.13.1", + "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", + "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", + "dev": true + }, "revalidator": { "version": "0.1.8", "resolved": "https://registry.npmjs.org/revalidator/-/revalidator-0.1.8.tgz", @@ -12109,6 +12132,11 @@ "swagger-parser": "^3.4.1" }, "dependencies": { + "commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==" + }, "js-yaml": { "version": "3.12.2", "resolved": "https://registry.npmjs.org/js-yaml/-/js-yaml-3.12.2.tgz", @@ -14435,6 +14463,12 @@ "supports-color": "^4.0.0" } }, + "commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", + "dev": true + }, "has-flag": { "version": "2.0.0", "resolved": "https://registry.npmjs.org/has-flag/-/has-flag-2.0.0.tgz", @@ -14699,6 +14733,12 @@ "validator": "^10.0.0" }, "dependencies": { + "commander": { + "version": "2.20.3", + "resolved": "https://registry.npmjs.org/commander/-/commander-2.20.3.tgz", + "integrity": "sha512-GpVkmM8vF2vQUkj2LvZmD35JxeJOLCwJ9cUkugyk2nuhbv3+mJvpLYYt+0+USMxE+oj+ey/lJEnhZw75x/OMcQ==", + "optional": true + }, "core-js": { "version": "2.6.5", "resolved": "https://registry.npmjs.org/core-js/-/core-js-2.6.5.tgz", diff --git a/package.json b/package.json index e9736e36a..3252ae3fd 100644 --- a/package.json +++ b/package.json @@ -94,6 +94,7 @@ "winston": "^2.4.0" }, "devDependencies": { + "async-retry": "^1.3.3", "babel-core": "^6.26.0", "babel-loader": "^7.1.2", "babel-plugin-transform-async-to-generator": "^6.24.1", @@ -101,6 +102,7 @@ "babel-preset-env": "^1.6.0", "babel-preset-react": "^6.24.1", "chai": "^4.1.2", + "commander": "^11.1.0", "cross-env": "^5.0.5", "cssnano": "^3.10.0", "echo-cli": "^1.0.8", diff --git a/public/img/pathways/.gitkeep b/public/img/pathways/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/src/config.js b/src/config.js index 042ce80c8..19fee7049 100644 --- a/src/config.js +++ b/src/config.js @@ -15,6 +15,8 @@ let defaults = { UNIPROT_API_BASE_URL: 'https://www.ebi.ac.uk/proteins/api', DOI_BASE_URL: 'https://doi.org/', ORCID_BASE_URL: 'https://orcid.org/', + SBGN_IMG_SERVICE_BASE_URL: 'http://localhost:9090/', + SBGN_IMG_PATH: 'public/img/pathways', PC_IMAGE_CACHE_MAX_SIZE: 10000, PC_CACHE_MAX_SIZE: 1000, PUB_CACHE_MAX_SIZE: 1000000, diff --git a/src/scripts/cli.js b/src/scripts/cli.js new file mode 100644 index 000000000..62a214f20 --- /dev/null +++ b/src/scripts/cli.js @@ -0,0 +1,247 @@ +const _ = require( 'lodash' ); +const stream = require( 'stream' ); +const path = require( 'path' ); +const { program } = require( 'commander' ); +const zlib = require( 'zlib' ); +const nodefetch = require( 'node-fetch' ); +const fs = require( 'fs' ); +const fsPromises = require('fs').promises; +const readline = require( 'readline' ); +const retry = require( 'async-retry' ); + +const logger = require( '../server/logger.js' ); +const { + DOWNLOADS_FOLDER_NAME, + SBGN_IMG_SERVICE_BASE_URL, + SBGN_IMG_PATH +} = require( '../config.js' ); +const { fetch } = require( '../util/index.js' ); +const pc = require( '../server/external-services/pathway-commons.js' ); + +global.fetch = nodefetch; + +/** + * Source (download and extract) a file + * + * @param {string} url The url for the file + * @param {string} options command line opts + * @returns + */ +async function source( url, options ){ + try { + let extractor; + const { file, type } = options; + switch( type ) { + case 'zip': + extractor = zlib.createUnzip(); + break; + case 'gzip': + extractor = zlib.createGunzip(); + break; + default: + extractor = new stream.PassThrough(); + } + const outfile = path.resolve( DOWNLOADS_FOLDER_NAME, file ); + const outstream = fs.createWriteStream( outfile ); + const response = await fetch( url ); + return response.body.pipe( extractor ).pipe( outstream ); + + } catch (err) { + throw err; + } +} + +/** + * Get a PNG image given SBGN-ML + * + * @param {string} sbgn The sbgn xml text + * @param {object} opts Options for the image service {@link https://github.com/iVis-at-Bilkent/syblars?tab=readme-ov-file#usage} + * @returns base64 encoded PNG + */ +async function sbgn2image( sbgn, opts ){ + const decodeBase64img = str => { + const extractFields = s => { + const { groups } = s.match(/^data:(?.*);(?.*),(?.*)$/); + return groups; + }; + const { base64str, encoding, mediatype } = extractFields(str); + const data = Buffer.from( base64str, encoding ); + return { data, mediatype }; + }; + let url = `${SBGN_IMG_SERVICE_BASE_URL}sbgnml`; + const defaults = { + layoutOptions: { + name: 'fcose', + randomize: true, + padding: 30 + }, + imageOptions: { + format: 'png', + background: 'transparent', + color: 'black_white' + } + }; + const imageOpts = _.defaults( opts, defaults ); + const imageOptsString = JSON.stringify( imageOpts ); + const body = `${sbgn}${imageOptsString}`; + const fetchOpts = { + method: 'POST', + headers: { + 'Content-Type': 'text/plain', + 'Accept': 'application/json' + }, + body + }; + + try { + const response = await fetch( url , fetchOpts ); + const { image } = await response.json(); + return decodeBase64img( image ); + + } catch ( err ) { + logger.error( err ); + throw err; + } +} + +/** + * Iterate over each line in a PC GMT file and retrieve an image, save to store. + * @param {string} fpath file path to the GMT file + * @param {object} store persistence via save function + * @param {object} get data retrieval + * @param {object} parse function to extract information from each line in GMT + * @param {object} convert function to map data to image + */ +async function imagesFromGmtFile( fpath, store, get, parse, convert ) { + let rl; + try { + const handleLine = async ({ value }) => { + const { uri, meta, genes } = parse( value ); + logger.info( `Handling pathway "${meta.name}" from ${meta.source}` ); + const markup = await get({ uri, format: 'sbgn' }); + const image = await convert( markup ); + const item = _.assign( {}, { uri, genes, image }, meta ); + await store.save( item ); + }; + const input = fs.createReadStream( fpath ); + rl = readline.createInterface( { input, crlfDelay: Infinity }); + const it = rl[Symbol.asyncIterator](); + let line = await it.next(); + + while( !line.done ){ + await retry( + async (bail, count) => { + try { + logger.info( `------------------------------` ); + logger.info(`Processing line: attempt ${count}`); + await handleLine( line ); + line = await it.next(); + } catch (err) { + logger.error(`Fatal error processing`); + if( err.name === 'FetchError' ) bail( err ); + } + }, + { retries: 3 } + ); + + } + + } catch ( err ) { + logger.error( err ); + throw err; + + } finally { + rl.close(); + } +} + +// ambiguous: e.g. value[1] = 'name: t(4;14) translocations of FGFR3; datasource: reactome; organism: 9606; idtype: hgnc symbol' +const parsePCGmtLine = line => { + const extractFields = str => str.match(/^name:\s(?.*);\sdatasource:\s(?.*);\sorganism:\s(?.*);\sidtype:\s(?.*)$/); + const parseMeta = value => { + const { groups } = extractFields( value ); + return groups; + }; + const values = line.split('\t'); + const uri = values[0]; + const meta = parseMeta( values[1] ); + const genes = values.slice(2); + return { uri, meta, genes }; +}; + +// Create file safe names from a uri +const uri2filename = s => s.replace(/[^a-z0-9]/gi, '_').toLowerCase(); + +async function getStore( ) { + const store = { + async save( item ){ + const { uri, image: { data, mediatype } } = item; + const ext = mediatype.split('/')[1]; + const filename = uri2filename( uri ); + const fpath = path.resolve( SBGN_IMG_PATH, `${filename}.${ext}` ); + try { + await fsPromises.writeFile( fpath, data ); + logger.info(`Saved item at ${uri} to file`); + + } catch (err) { + logger.error(`Error saving data for ${uri}`); + logger.error(err); + throw err; + } + }, + + close(){ + logger.info(`TODO: close store`); + } + }; + return store; +} + +/** + * Generate images for all pathways in a PC .gmt file + * + * @param {object} options Command line opts + */ +async function snapshot( options ){ + const { file } = options; + const fpath = path.resolve( DOWNLOADS_FOLDER_NAME, file ); + let store; + try { + store = await getStore(); + await imagesFromGmtFile( fpath, store, pc.query, parsePCGmtLine, sbgn2image ); + + } catch ( err ) { + logger.error( err ); + throw err; + + } finally { + await store.close(); + } +} + +async function main () { + (program + .name( 'app-ui' ) + .description( 'A CLI for processing pathway data' ) + ); + + ( program.command( 'source' ) + .description( 'Source (download and extract) a file' ) + .argument( '', 'URL of source file' ) + .requiredOption( '-f, --file ', 'Name of output file' ) + .option( '-t, --type ', 'Compression type', 'gzip' ) + .action( source ) + ); + + ( program.command( 'snapshot' ) + .description( 'Generate images for all pathways in a PC gmt file' ) + .requiredOption( '-f, --file ', 'Name of PC-formatted gmt source file' ) + .action( snapshot ) + ); + + await program.parseAsync(); +} + +main(); + +module.exports = { snapshot, source }; \ No newline at end of file