Skip to content

Commit

Permalink
refactor config and proxy-server into separate files
Browse files Browse the repository at this point in the history
Proxy-server needs to be started before malware-scanner
  • Loading branch information
nielm committed Nov 24, 2023
1 parent 8988aa4 commit 560b7d4
Show file tree
Hide file tree
Showing 5 changed files with 265 additions and 171 deletions.
16 changes: 9 additions & 7 deletions cloudrun-malware-scanner/bootstrap.sh
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,6 @@ apt-get -qqy install --no-install-recommends clamav-daemon clamav-freshclam
export PATH="$PATH:$HOME/.local/bin" # add pipx locations to path.
pipx install cvdupdate

# The node server includes a reverse proxy which adds authentication
# to requests to GCS REST API, allowing freshclam to access the GCS
# CVD mirror as if it was an unauthenticated HTPP server
#
export PROXY_PORT=8888
PROXY_SERVER_ADDRESS=127.0.0.1:${PROXY_PORT}

# Ensure clamav services are shut down, as we do not have the config files set up yet.
service clamav-daemon stop &
service clamav-freshclam stop &
Expand Down Expand Up @@ -82,6 +75,15 @@ if ! gsutil ls "gs://${CVD_MIRROR_BUCKET}/" > /dev/null ; then
exit 1
fi

# Start the reverse proxy which adds authentication
# to requests to GCS REST API, allowing freshclam to access the GCS
# CVD mirror bucket as if it was an unauthenticated HTPP server
#
export PROXY_PORT=8888
PROXY_SERVER_ADDRESS=127.0.0.1:${PROXY_PORT}
npm start-proxy "${CONFIG_FILE}" &
sleep 5

# This function is used to update clam and freshclam config files.
# Use by specifying the config file on the command line and
# piping the config file updates in.
Expand Down
136 changes: 136 additions & 0 deletions cloudrun-malware-scanner/config.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
/*
* Copyright 2022 Google LLC
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* https://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

const {Storage} = require('@google-cloud/storage');
const {logger} = require('./logger.js');
const pkgJson = require('./package.json');


/**
* Configuration object.
*
* Values are read from the JSON configuration file.
* See {@link readAndVerifyConfig}.
*
* @typedef {{
* buckets: Array<
* {
* unscanned: string,
* clean: string,
* quarantined: string
* }>,
* ClamCvdMirrorBucket: string
* }}
*/
const Config = null;

const storage = new Storage({userAgent: `${pkgJson.name}/${pkgJson.version}`});

/**
* Read configuration from JSON configuration file.
* and store in BUCKET_CONFIG global
*
* @async
* @param {string} configFile
* @return {Config}
*/
async function readAndVerifyConfig(configFile) {
logger.info(`Using configuration file: ${configFile}`);


/** @type {Config} */
let config;

try {
config = require(configFile);
delete config.comments;
} catch (e) {
logger.fatal(
{err: e},
`Unable to read JSON file from ${configFile}`);
throw new Error(`Invalid configuration ${configFile}`);
}

if (config.buckets.length === 0) {
logger.fatal(`No buckets configured for scanning in ${configFile}`);
throw new Error('No buckets configured');
}

logger.info('BUCKET_CONFIG: '+JSON.stringify(config, null, 2));

// Check buckets are specified and exist.
let success = true;
for (let x = 0; x < config.buckets.length; x++) {
const buckets = BUCKET_CONFIG.buckets[x];
for (const bucketType of ['unscanned', 'clean', 'quarantined']) {
if ( !(await checkBucketExists(
buckets[bucketType],
`config.buckets[${x}].${bucketType}`))) {
success=false;
}
}
if (buckets.unscanned === buckets.clean ||
buckets.unscanned === buckets.quarantined ||
buckets.clean === buckets.quarantined) {
logger.fatal(
`Error in ${configFile} buckets[${x}]: bucket names are not unique`);
success = false;
}
}
if ( !(await checkBucketExists(
config.ClamCvdMirrorBucket,
'ClamCvdMirrorBucket'))) {
success=false;
}

if (!success) {
throw new Error('Invalid configuration');
}
return config;
}


/**
* Check that given bucket exists. Returns true on success
*
* @param {string} bucketName
* @param {string} configName
* @return {Promise<boolean>}
*/
async function checkBucketExists(bucketName, configName) {
if (!bucketName) {
logger.fatal(`Error in config: no "${configName}" bucket defined`);
success = false;
}
// Check for bucket existence by listing files in bucket, will throw
// an exception if the bucket is not readable.
// This is used in place of Bucket.exists() to avoid the need for
// Project/viewer permission.
try {
await storage.bucket(bucketName).getFiles(
{maxResults: 1, prefix: 'zzz', autoPaginate: false});
return true;
} catch (e) {
logger.fatal(
`Error in config: cannot view files in "${
configName}" : ${bucketName} : ${e.message}`);
logger.debug({err: e});
return false;
}
}

exports.Config = Config;
exports.readAndVerifyConfig = readAndVerifyConfig;
113 changes: 113 additions & 0 deletions cloudrun-malware-scanner/gcs-proxy-server.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
/*
* Copyright 2022 Google LLC
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* https://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

const {GoogleAuth} = require('google-auth-library');
const {logger} = require('./logger.js');
// eslint-disable-next-line no-unused-vars
const {Config, readAndVerifyConfig} = require('./config.js');
const httpProxy = require('http-proxy');

const googleAuth = new GoogleAuth();

// access token for GCS requests - will be refreshed every 50 mins
let accessToken;
const ACCESS_TOKEN_REFRESH_INTERVAL = 50*60*1000;

/**
* Set up a reverse proxy to add authentication to HTTP requests from
* freshclam and proxy it to the GCS API
*
* @param {string} clamCvdMirrorBucket
*/
async function setupGcsReverseProxy(clamCvdMirrorBucket) {
// Get an OAuth2 access token and refresh it every 50mins.
accessToken = await googleAuth.getAccessToken();

setInterval(async () => {
logger.info(`Refreshing Oauth2 Access Token for GCS proxy.`);
accessToken = await googleAuth.getAccessToken();
}, ACCESS_TOKEN_REFRESH_INTERVAL);

const proxy = httpProxy.createProxyServer({
target: 'https://storage.googleapis.com/',
changeOrigin: true,
autoRewrite: true,
secure: true,
ws: false,
});

// Error handling...
proxy.on('error', function(err, req, res) {
let statusCode = 500;
if (res && res.statusCode && res.statusCode != 200) {
statusCode = res.statusCode;
}
logger.error(`Failed to proxy to GCS for path ${req.url}, returning code ${
statusCode}: ${err}`);
res.writeHead(statusCode, {
'Content-Type': 'text/plain',
});
res.end(`Failed to proxy to GCS: status ${statusCode}\n`);
});

// Add auth header/
proxy.on('proxyReq', function(proxyReq, req, res) {
if (proxyReq.path.startsWith(
'/' + clamCvdMirrorBucket + '/')) {
logger.info(`Proxying request for ${proxyReq.path} to GCS`);
proxyReq.setHeader('Authorization', 'Bearer ' + accessToken);
} else {
logger.error(
`Denying Proxy request for ${proxyReq.path} to GCS - invalid path`);
res.writeHead(404, {
'Content-Type': 'text/plain',
});
res.end('Failed to proxy to GCS - invalid path: status 404\n');
}
});

const PROXY_PORT = process.env.PROXY_PORT || 8888;

proxy.listen(PROXY_PORT, 'localhost');
logger.info(
`GCS authenticating reverse proxy listenting on port ${PROXY_PORT}`);
}

/**
* Perform async setup and start the app.
*
* @async
*/
async function run() {
let configFile;
if (process.argv.length >= 3) {
configFile = process.argv[2];
} else {
configFile = './config.json';
}

/** @type {Config} */
const config = await readAndVerifyConfig(configFile);

await setupGcsReverseProxy(config.ClamCvdMirrorBucket);
}

// Start the service, exiting on error.
run().catch((e) => {
logger.fatal(e);
logger.fatal('Exiting');
process.exit(1);
});
3 changes: 2 additions & 1 deletion cloudrun-malware-scanner/package.json
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
{
"name": "gcs-malware-scanner",
"version": "2.2.0",
"version": "2.3.0",
"description": "Service to scan GCS documents for the malware and move the analyzed documents to appropriate buckets",
"main": "index.js",
"scripts": {
"start": "node server.js",
"start-proxy": "node gcs-proxy-server.js",
"test": "echo \"Error: no test specified\" && exit 1",
"eslint": "eslint *.js",
"eslint-fix": "eslint --fix *.js"
Expand Down
Loading

0 comments on commit 560b7d4

Please sign in to comment.