diff --git a/cypress/e2e/ua.cy.ts b/cypress/e2e/ua.cy.ts new file mode 100644 index 000000000..6df99b8ec --- /dev/null +++ b/cypress/e2e/ua.cy.ts @@ -0,0 +1,14 @@ +/// +import { start } from '../support/setup' + +describe('User Agent Blocking', () => { + it('should pick up that our automated cypress tests are indeed bot traffic', async () => { + cy.skipOn('windows') + start({}) + + cy.window().then((win) => { + const isLikelyBot = win.eval('window.posthog._is_bot()') + expect(isLikelyBot).to.eql(true) + }) + }) +}) diff --git a/cypress/support/commands.ts b/cypress/support/commands.ts index 2c82d7f45..11a219bb2 100644 --- a/cypress/support/commands.ts +++ b/cypress/support/commands.ts @@ -13,6 +13,7 @@ Cypress.Commands.add('posthogInit', (options) => { $captures.push(event) $fullCaptures.push(eventData) }, + opt_out_useragent_filter: true, ...options, }) }) diff --git a/cypress/support/e2e.ts b/cypress/support/e2e.ts index 429a80322..abff144f2 100644 --- a/cypress/support/e2e.ts +++ b/cypress/support/e2e.ts @@ -1,4 +1,5 @@ import './commands' +import '@cypress/skip-test/support' // Add console errors into cypress logs. Cypress.on('window:before:load', (win) => { diff --git a/cypress/support/index.ts b/cypress/support/index.ts index 22e18cfb6..82dec1097 100644 --- a/cypress/support/index.ts +++ b/cypress/support/index.ts @@ -2,6 +2,7 @@ import { PostHog } from '../../src/posthog-core' import { PostHogConfig } from '../../src/types' +import '@cypress/skip-test' declare global { // eslint-disable-next-line @typescript-eslint/no-namespace diff --git a/cypress/support/setup.ts b/cypress/support/setup.ts index 2e31e880c..655b3e070 100644 --- a/cypress/support/setup.ts +++ b/cypress/support/setup.ts @@ -39,7 +39,10 @@ export const start = ({ cy.visit(url) if (initPosthog) { - cy.posthogInit(options) + cy.posthogInit({ + opt_out_useragent_filter: true, // we ARE a bot, so we need to enable this opt-out + ...options, + }) } if (resetOnInit) { diff --git a/cypress/tsconfig.json b/cypress/tsconfig.json index 02963d41b..3592f862f 100644 --- a/cypress/tsconfig.json +++ b/cypress/tsconfig.json @@ -1,7 +1,7 @@ { "compilerOptions": { "target": "es2015", - "lib": ["es5", "dom"], + "lib": ["es5", "dom", "es2015"], "types": ["cypress", "node"], "moduleResolution": "node" }, diff --git a/package.json b/package.json index 56efc1c7e..7650e4e18 100644 --- a/package.json +++ b/package.json @@ -43,6 +43,7 @@ "@babel/plugin-transform-react-jsx": "^7.23.4", "@babel/preset-env": "7.18.9", "@babel/preset-typescript": "^7.18.6", + "@cypress/skip-test": "^2.6.1", "@jest/globals": "^27.5.1", "@rollup/plugin-babel": "^6.0.4", "@rollup/plugin-json": "^6.1.0", @@ -64,8 +65,8 @@ "babel-eslint": "10.1.0", "babel-jest": "^26.6.3", "compare-versions": "^6.1.0", - "cypress": "13.6.3", - "cypress-localstorage-commands": "^2.2.5", + "cypress": "13.13.2", + "cypress-localstorage-commands": "^2.2.6", "date-fns": "^3.6.0", "eslint": "8.56.0", "eslint-config-posthog-js": "link:eslint-rules", diff --git a/playground/nextjs/package.json b/playground/nextjs/package.json index 557149376..4811df568 100644 --- a/playground/nextjs/package.json +++ b/playground/nextjs/package.json @@ -4,7 +4,7 @@ "private": true, "scripts": { "clean-react": "cd ../../react && rm -rf ./node_modules/", - "dev": "pnpm run link-posthog-js && pnpm run clean-react && next dev --experimental-https", + "dev": "pnpm run link-posthog-js && pnpm run clean-react && next dev", "dev-crossdomain": "pnpm run link-posthog-js && pnpm run clean-react && NEXT_PUBLIC_CROSSDOMAIN=1 next dev --experimental-https", "build": "pnpm run build-posthog-js && pnpm run link-posthog-js && pnpm run clean-react && next build", "start": "next start", diff --git a/playground/nextjs/pages/ua.tsx b/playground/nextjs/pages/ua.tsx new file mode 100644 index 000000000..96f749112 --- /dev/null +++ b/playground/nextjs/pages/ua.tsx @@ -0,0 +1,32 @@ +import { useEffect, useState } from 'react' + +// Try this page with some of the following commands: +// chrome --headless --disable-gpu --print-to-pdf http://localhost:3000/ua --virtual-time-budget=10000 +// chrome --headless --disable-gpu --print-to-pdf http://localhost:3000/ua --virtual-time-budget=10000 --user-agent="RealHuman" + +export default function Home() { + const [isClient, setIsClient] = useState(false) + useEffect(() => { + setIsClient(true) + }, []) + if (!isClient) { + return
Not client
+ } + return ( +
+
UA
+
+ {navigator.userAgent} +
+
WebDriver
+
+ {String(navigator.webdriver)} +
+
NavigatorUAData brands
+
+ {/* eslint-disable-next-line compat/compat */} + {JSON.stringify((navigator as any).userAgentData?.brands)} +
+
+ ) +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b5384b46a..0a6f9e8f9 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -36,6 +36,9 @@ devDependencies: '@babel/preset-typescript': specifier: ^7.18.6 version: 7.18.6(@babel/core@7.18.9) + '@cypress/skip-test': + specifier: ^2.6.1 + version: 2.6.1 '@jest/globals': specifier: ^27.5.1 version: 27.5.1 @@ -100,11 +103,11 @@ devDependencies: specifier: ^6.1.0 version: 6.1.0 cypress: - specifier: 13.6.3 - version: 13.6.3 + specifier: 13.13.2 + version: 13.13.2 cypress-localstorage-commands: - specifier: ^2.2.5 - version: 2.2.5(cypress@13.6.3) + specifier: ^2.2.6 + version: 2.2.6(cypress@13.13.2) date-fns: specifier: ^3.6.0 version: 3.6.0 @@ -1656,6 +1659,10 @@ packages: uuid: 8.3.2 dev: true + /@cypress/skip-test@2.6.1: + resolution: {integrity: sha512-X+ibefBiuOmC5gKG91wRIT0/OqXeETYvu7zXktjZ3yLeO186Y8ia0K7/gQUpAwuUi28DuqMd1+7tBQVtPkzbPA==} + dev: true + /@cypress/xvfb@1.2.4(supports-color@8.1.1): resolution: {integrity: sha512-skbBzPggOVYCbnGgV+0dmBdW/s77ZkAOXIC1knS8NagwDjBrNC1LuXtQJeiN6l+m7lzmHtaoUw/ctJKdqkG57Q==} dependencies: @@ -4474,17 +4481,17 @@ packages: resolution: {integrity: sha512-d4ZVpCW31eWwCMe1YT3ur7mUDnTXbgwyzaL320DrcRT45rfjYxkt5QWLrmOJ+/UEAI2+fQgKe/fCjR8l4TpRgw==} dev: true - /cypress-localstorage-commands@2.2.5(cypress@13.6.3): - resolution: {integrity: sha512-07zpwzWdY+uPi1NEHFhWQNylIJqRxR78Ile05L6WT8h1Gz0OaxgBSZRuzp+pqUni/3Pk4d2ieq/cSh++ZmujEA==} + /cypress-localstorage-commands@2.2.6(cypress@13.13.2): + resolution: {integrity: sha512-l3nZ+Lu6YbBWk4UIfNrRkNK56BkF8zVbCrqzCf35x4Nlx2wA2r0spBOZXnKjbutQZgo6qDqtS8uXoSqV36YM1Q==} engines: {node: '>=14.0.0'} peerDependencies: cypress: '>=2.1.0' dependencies: - cypress: 13.6.3 + cypress: 13.13.2 dev: true - /cypress@13.6.3: - resolution: {integrity: sha512-d/pZvgwjAyZsoyJ3FOsJT5lDsqnxQ/clMqnNc++rkHjbkkiF2h9s0JsZSyyH4QXhVFW3zPFg82jD25roFLOdZA==} + /cypress@13.13.2: + resolution: {integrity: sha512-PvJQU33933NvS1StfzEb8/mu2kMy4dABwCF+yd5Bi7Qly1HOVf+Bufrygee/tlmty/6j5lX+KIi8j9Q3JUMbhA==} engines: {node: ^16.0.0 || ^18.0.0 || >=20.0.0} hasBin: true requiresBuild: true @@ -4528,7 +4535,7 @@ packages: request-progress: 3.0.0 semver: 7.5.4 supports-color: 8.1.1 - tmp: 0.2.1 + tmp: 0.2.3 untildify: 4.0.0 yauzl: 2.10.0 dev: true @@ -10243,11 +10250,9 @@ packages: rimraf: 2.7.1 dev: true - /tmp@0.2.1: - resolution: {integrity: sha512-76SUhtfqR2Ijn+xllcI5P1oyannHNHByD80W1q447gU3mp9G9PSpGdWmjUOHRDPiHYacIk66W7ubDTuPF3BEtQ==} - engines: {node: '>=8.17.0'} - dependencies: - rimraf: 3.0.2 + /tmp@0.2.3: + resolution: {integrity: sha512-nZD7m9iCPC5g0pYmcaxogYKggSfLsdxl8of3Q/oIbqCqLLIO9IAF0GWjX1z9NZRHPiXv8Wex4yDCaZsgEw0Y8w==} + engines: {node: '>=14.14'} dev: true /tmpl@1.0.5: diff --git a/src/__tests__/posthog-core.ts b/src/__tests__/posthog-core.ts index 2ce103037..0662c9880 100644 --- a/src/__tests__/posthog-core.ts +++ b/src/__tests__/posthog-core.ts @@ -144,24 +144,28 @@ describe('posthog core', () => { }) it('respects opt_out_useragent_filter (default: false)', () => { - const originalUseragent = globals.userAgent - ;(globals as any)['userAgent'] = - 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36' - + const originalNavigator = globals.navigator + ;(globals as any).navigator = { + ...globals.navigator, + userAgent: + 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36', + } const hook = jest.fn() const posthog = posthogWith(defaultConfig, defaultOverrides) posthog._addCaptureHook(hook) posthog.capture(eventName, {}, {}) expect(hook).not.toHaveBeenCalledWith('$event') - ;(globals as any)['userAgent'] = originalUseragent + ;(globals as any)['navigator'] = originalNavigator }) it('respects opt_out_useragent_filter', () => { - const originalUseragent = globals.userAgent - - ;(globals as any)['userAgent'] = - 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36' + const originalNavigator = globals.navigator + ;(globals as any).navigator = { + ...globals.navigator, + userAgent: + 'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36', + } const hook = jest.fn() const posthog = posthogWith( @@ -184,7 +188,7 @@ describe('posthog core', () => { }) ) expect(event.properties['$browser_type']).toEqual('bot') - ;(globals as any)['userAgent'] = originalUseragent + ;(globals as any)['navigator'] = originalNavigator }) it('truncates long properties', () => { diff --git a/src/__tests__/utils.test.ts b/src/__tests__/utils.test.ts index cdbb5e300..a8ae87d6b 100644 --- a/src/__tests__/utils.test.ts +++ b/src/__tests__/utils.test.ts @@ -9,7 +9,8 @@ import { _copyAndTruncateStrings, isCrossDomainCookie, _base64Encode } from '../utils' import { Info } from '../utils/event-utils' -import { isBlockedUA, DEFAULT_BLOCKED_UA_STRS } from '../utils/blocked-uas' +import { isLikelyBot, DEFAULT_BLOCKED_UA_STRS, isBlockedUA, NavigatorUAData } from '../utils/blocked-uas' +import { expect } from '@jest/globals' function userAgentFor(botString: string) { const randOne = (Math.random() + 1).toString(36).substring(7) @@ -103,13 +104,13 @@ describe('utils', () => { }) }) - describe('user agent blocking', () => { + describe('isLikelyBot', () => { it.each(DEFAULT_BLOCKED_UA_STRS.concat('testington'))( 'blocks a bot based on the user agent %s', (botString) => { const randomisedUserAgent = userAgentFor(botString) - expect(isBlockedUA(randomisedUserAgent, ['testington'])).toBe(true) + expect(isLikelyBot({ userAgent: randomisedUserAgent } as Navigator, ['testington'])).toBe(true) } ) @@ -125,10 +126,93 @@ describe('utils', () => { [ 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.6422.175 Safari/537.36 (compatible; Google-HotelAdsVerifier/2.0)', ], + [ + 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/122.0.0.0 Safari/537.36', + ], + [ + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cypress/13.6.3 Chrome/114.0.5735.289 Electron/25.8.4 Safari/537.36', + ], ])('blocks based on user agent', (botString) => { expect(isBlockedUA(botString, [])).toBe(true) expect(isBlockedUA(botString.toLowerCase(), [])).toBe(true) expect(isBlockedUA(botString.toUpperCase(), [])).toBe(true) + expect(isLikelyBot({ userAgent: botString } as Navigator, [])).toBe(true) + expect(isLikelyBot({ userAgent: botString.toLowerCase() } as Navigator, [])).toBe(true) + expect(isLikelyBot({ userAgent: botString.toUpperCase() } as Navigator, [])).toBe(true) + }) + + it.each([ + ['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:129.0) Gecko/20100101 Firefox/129.0'], + [ + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36', + ], + [ + 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15', + ], + [ + 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) elec/1.0.0 Chrome/126.0.6478.127 Electron/31.2.1 Safari/537.36', + ], + ])('does not block based on non-bot user agent', (userAgent) => { + expect(isBlockedUA(userAgent, [])).toBe(false) + expect(isBlockedUA(userAgent.toLowerCase(), [])).toBe(false) + expect(isBlockedUA(userAgent.toUpperCase(), [])).toBe(false) + expect(isLikelyBot({ userAgent } as Navigator, [])).toBe(false) + expect(isLikelyBot({ userAgent: userAgent.toLowerCase() } as Navigator, [])).toBe(false) + expect(isLikelyBot({ userAgent: userAgent.toUpperCase() } as Navigator, [])).toBe(false) + }) + + it('blocks based on the webdriver property being set to true', () => { + expect(isLikelyBot({ webdriver: true } as Navigator, [])).toBe(true) + }) + + it('blocks based on userAgentData', () => { + const headlessUserAgentData: NavigatorUAData = { + brands: [ + { brand: 'Not)A;Brand', version: '99' }, + { brand: 'HeadlessChrome', version: '127' }, + { brand: 'Chromium', version: '127' }, + ], + } + expect( + isLikelyBot( + { + userAgentData: headlessUserAgentData, + } as Navigator, + [] + ) + ).toBe(true) + }) + + it('does not block a normal browser based of userAgentData', () => { + const realUserAgentData: NavigatorUAData = { + brands: [ + { brand: 'Not)A;Brand', version: '99' }, + { brand: 'Google Chrome', version: '127' }, + { brand: 'Chromium', version: '127' }, + ], + } + expect( + isLikelyBot( + { + userAgentData: realUserAgentData, + } as Navigator, + [] + ) + ).toBe(false) + }) + + it('does not crash if the type of navigatorUAData changes', () => { + // we're not checking the return values of these, only that they don't crash + // @ts-expect-error testing invalid data + isLikelyBot({ userAgentData: { brands: ['HeadlessChrome'] } } as Navigator, []) + // @ts-expect-error testing invalid data + isLikelyBot({ userAgentData: { brands: [() => 'HeadlessChrome'] } } as Navigator, []) + isLikelyBot({ userAgentData: { brands: () => ['HeadlessChrome'] } } as unknown as Navigator, []) + isLikelyBot({ userAgentData: 'HeadlessChrome' } as unknown as Navigator, []) + isLikelyBot({ userAgentData: {} } as unknown as Navigator, []) + isLikelyBot({ userAgentData: null } as unknown as Navigator, []) + isLikelyBot({ userAgentData: () => ['HeadlessChrome'] } as unknown as Navigator, []) + isLikelyBot({ userAgentData: true } as unknown as Navigator, []) }) }) diff --git a/src/posthog-core.ts b/src/posthog-core.ts index 16f21e721..d01ab5e9e 100644 --- a/src/posthog-core.ts +++ b/src/posthog-core.ts @@ -10,7 +10,7 @@ import { isCrossDomainCookie, isDistinctIdStringLike, } from './utils' -import { assignableWindow, document, location, userAgent, window } from './utils/globals' +import { assignableWindow, document, location, navigator, userAgent, window } from './utils/globals' import { PostHogFeatureFlags } from './posthog-featureflags' import { PostHogPersistence } from './posthog-persistence' import { @@ -66,7 +66,7 @@ import { import { Info } from './utils/event-utils' import { logger } from './utils/logger' import { SessionPropsManager } from './session-props' -import { isBlockedUA } from './utils/blocked-uas' +import { isLikelyBot } from './utils/blocked-uas' import { extendURLParams, request, SUPPORTS_REQUEST } from './request' import { Heatmaps } from './heatmaps' import { ScrollManager } from './scroll-manager' @@ -771,11 +771,7 @@ export class PostHog { return } - if ( - userAgent && - !this.config.opt_out_useragent_filter && - isBlockedUA(userAgent, this.config.custom_blocked_useragents) - ) { + if (!this.config.opt_out_useragent_filter && this._is_bot()) { return } @@ -930,9 +926,7 @@ export class PostHog { // this is only added when this.config.opt_out_useragent_filter is true, // or it would always add "browser" if (userAgent && this.config.opt_out_useragent_filter) { - properties['$browser_type'] = isBlockedUA(userAgent, this.config.custom_blocked_useragents) - ? 'bot' - : 'browser' + properties['$browser_type'] = this._is_bot() ? 'bot' : 'browser' } // note: extend writes to the first object, so lets make sure we @@ -2038,6 +2032,14 @@ export class PostHog { this._sync_opt_out_with_persistence() } + _is_bot(): boolean | undefined { + if (navigator) { + return isLikelyBot(navigator, this.config.custom_blocked_useragents) + } else { + return undefined + } + } + debug(debug?: boolean): void { if (debug === false) { window?.console.log("You've disabled debug mode.") diff --git a/src/utils/blocked-uas.ts b/src/utils/blocked-uas.ts index d01da0e56..fd0223d73 100644 --- a/src/utils/blocked-uas.ts +++ b/src/utils/blocked-uas.ts @@ -34,6 +34,11 @@ export const DEFAULT_BLOCKED_UA_STRS = [ 'yahoo! slurp', 'yandexbot', + // headless browsers + 'headlesschrome', + 'cypress', + // we don't block electron here, as many customers use posthog-js in electron apps + // a whole bunch of goog-specific crawlers // https://developers.google.com/search/docs/advanced/crawling/overview-google-crawlers 'Google-HotelAdsVerifier', @@ -66,3 +71,52 @@ export const isBlockedUA = function (ua: string, customBlockedUserAgents: string return uaLower.indexOf(blockedUaLower) !== -1 }) } + +// There's more in the type, but this is all we use. It's currently experimental, see +// https://developer.mozilla.org/en-US/docs/Web/API/Navigator/userAgentData +// if you're reading this in the future, when it's no longer experimental, please remove this type and use an official one. +// Be extremely defensive here to ensure backwards and *forwards* compatibility, and remove this defensiveness in the +// future when it is safe to do so. +export interface NavigatorUAData { + brands?: { + brand: string + version: string + }[] +} +declare global { + interface Navigator { + userAgentData?: NavigatorUAData + } +} + +export const isLikelyBot = function (navigator: Navigator | undefined, customBlockedUserAgents: string[]): boolean { + if (!navigator) { + return false + } + const ua = navigator.userAgent + if (ua) { + if (isBlockedUA(ua, customBlockedUserAgents)) { + return true + } + } + try { + // eslint-disable-next-line compat/compat + const uaData = navigator?.userAgentData as NavigatorUAData + if (uaData?.brands && uaData.brands.some((brandObj) => isBlockedUA(brandObj?.brand, customBlockedUserAgents))) { + return true + } + } catch { + // ignore the error, we were using experimental browser features + } + + return !!navigator.webdriver + + // There's some more enhancements we could make in this area, e.g. it's possible to check if Chrome dev tools are + // open, which will detect some bots that are trying to mask themselves and might get past the checks above. + // However, this would give false positives for actual humans who have dev tools open. + + // We could also use the data in navigator.userAgentData.getHighEntropyValues() to detect bots, but we should wait + // until this stops being experimental. The MDN docs imply that this might eventually require user permission. + // See https://developer.mozilla.org/en-US/docs/Web/API/NavigatorUAData/getHighEntropyValues + // It would be very bad if posthog-js caused a permission prompt to appear on every page load. +} diff --git a/testcafe/helpers.js b/testcafe/helpers.js index a4a239698..9dff4711e 100644 --- a/testcafe/helpers.js +++ b/testcafe/helpers.js @@ -57,6 +57,7 @@ export const initPosthog = (config) => { distinctID: 'automated-tester', // We set this to get around the ingestion delay for new distinctIDs isIdentifiedID: true, }, + opt_out_useragent_filter: true, }) }