Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Use NavigatorUAData and navigator.webdriver to improve bot detection #1359

Merged
merged 13 commits into from
Aug 20, 2024
14 changes: 14 additions & 0 deletions cypress/e2e/ua.cy.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/// <reference types="cypress" />
import { start } from '../support/setup'

describe('User Agent Blocking', () => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

❤️ this is lovely

it('should pick up that our automated cypress tests are indeed bot traffic', async () => {
cy.skipOn('windows')
start({})

cy.window().then((win) => {
const isLikelyBot = win.eval('window.posthog._is_bot()')
expect(isLikelyBot).to.eql(true)
})
})
})
1 change: 1 addition & 0 deletions cypress/support/commands.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ Cypress.Commands.add('posthogInit', (options) => {
$captures.push(event)
$fullCaptures.push(eventData)
},
opt_out_useragent_filter: true,
...options,
})
})
Expand Down
1 change: 1 addition & 0 deletions cypress/support/e2e.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import './commands'
import '@cypress/skip-test/support'

// Add console errors into cypress logs.
Cypress.on('window:before:load', (win) => {
Expand Down
1 change: 1 addition & 0 deletions cypress/support/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import { PostHog } from '../../src/posthog-core'
import { PostHogConfig } from '../../src/types'
import '@cypress/skip-test'

declare global {
// eslint-disable-next-line @typescript-eslint/no-namespace
Expand Down
5 changes: 4 additions & 1 deletion cypress/support/setup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,10 @@ export const start = ({
cy.visit(url)

if (initPosthog) {
cy.posthogInit(options)
cy.posthogInit({
opt_out_useragent_filter: true, // we ARE a bot, so we need to enable this opt-out
...options,
})
}

if (resetOnInit) {
Expand Down
2 changes: 1 addition & 1 deletion cypress/tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"compilerOptions": {
"target": "es2015",
"lib": ["es5", "dom"],
"lib": ["es5", "dom", "es2015"],
"types": ["cypress", "node"],
"moduleResolution": "node"
},
Expand Down
5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
"@babel/plugin-transform-react-jsx": "^7.23.4",
"@babel/preset-env": "7.18.9",
"@babel/preset-typescript": "^7.18.6",
"@cypress/skip-test": "^2.6.1",
"@jest/globals": "^27.5.1",
"@rollup/plugin-babel": "^6.0.4",
"@rollup/plugin-json": "^6.1.0",
Expand All @@ -64,8 +65,8 @@
"babel-eslint": "10.1.0",
"babel-jest": "^26.6.3",
"compare-versions": "^6.1.0",
"cypress": "13.6.3",
"cypress-localstorage-commands": "^2.2.5",
"cypress": "13.13.2",
"cypress-localstorage-commands": "^2.2.6",
"date-fns": "^3.6.0",
"eslint": "8.56.0",
"eslint-config-posthog-js": "link:eslint-rules",
Expand Down
2 changes: 1 addition & 1 deletion playground/nextjs/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"private": true,
"scripts": {
"clean-react": "cd ../../react && rm -rf ./node_modules/",
"dev": "pnpm run link-posthog-js && pnpm run clean-react && next dev --experimental-https",
"dev": "pnpm run link-posthog-js && pnpm run clean-react && next dev",
"dev-crossdomain": "pnpm run link-posthog-js && pnpm run clean-react && NEXT_PUBLIC_CROSSDOMAIN=1 next dev --experimental-https",
"build": "pnpm run build-posthog-js && pnpm run link-posthog-js && pnpm run clean-react && next build",
"start": "next start",
Expand Down
32 changes: 32 additions & 0 deletions playground/nextjs/pages/ua.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import { useEffect, useState } from 'react'

// Try this page with some of the following commands:
// chrome --headless --disable-gpu --print-to-pdf http://localhost:3000/ua --virtual-time-budget=10000
// chrome --headless --disable-gpu --print-to-pdf http://localhost:3000/ua --virtual-time-budget=10000 --user-agent="RealHuman"

export default function Home() {
const [isClient, setIsClient] = useState(false)
useEffect(() => {
setIsClient(true)
}, [])
if (!isClient) {
return <pre>Not client</pre>
}
return (
<dl>
<dt>UA</dt>
<dd>
<code>{navigator.userAgent}</code>
</dd>
<dt>WebDriver</dt>
<dd>
<code>{String(navigator.webdriver)}</code>
</dd>
<dt>NavigatorUAData brands</dt>
<dd>
{/* eslint-disable-next-line compat/compat */}
<code>{JSON.stringify((navigator as any).userAgentData?.brands)}</code>
</dd>
</dl>
)
}
35 changes: 20 additions & 15 deletions pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 14 additions & 10 deletions src/__tests__/posthog-core.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,24 +144,28 @@ describe('posthog core', () => {
})

it('respects opt_out_useragent_filter (default: false)', () => {
const originalUseragent = globals.userAgent
;(globals as any)['userAgent'] =
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'

const originalNavigator = globals.navigator
;(globals as any).navigator = {
...globals.navigator,
userAgent:
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36',
}
const hook = jest.fn()
const posthog = posthogWith(defaultConfig, defaultOverrides)
posthog._addCaptureHook(hook)

posthog.capture(eventName, {}, {})
expect(hook).not.toHaveBeenCalledWith('$event')
;(globals as any)['userAgent'] = originalUseragent
;(globals as any)['navigator'] = originalNavigator
})

it('respects opt_out_useragent_filter', () => {
const originalUseragent = globals.userAgent

;(globals as any)['userAgent'] =
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36'
const originalNavigator = globals.navigator
;(globals as any).navigator = {
...globals.navigator,
userAgent:
'Mozilla/5.0 AppleWebKit/537.36 (KHTML, like Gecko; compatible; Googlebot/2.1; +http://www.google.com/bot.html) Chrome/W.X.Y.Z Safari/537.36',
}

const hook = jest.fn()
const posthog = posthogWith(
Expand All @@ -184,7 +188,7 @@ describe('posthog core', () => {
})
)
expect(event.properties['$browser_type']).toEqual('bot')
;(globals as any)['userAgent'] = originalUseragent
;(globals as any)['navigator'] = originalNavigator
})

it('truncates long properties', () => {
Expand Down
90 changes: 87 additions & 3 deletions src/__tests__/utils.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

import { _copyAndTruncateStrings, isCrossDomainCookie, _base64Encode } from '../utils'
import { Info } from '../utils/event-utils'
import { isBlockedUA, DEFAULT_BLOCKED_UA_STRS } from '../utils/blocked-uas'
import { isLikelyBot, DEFAULT_BLOCKED_UA_STRS, isBlockedUA, NavigatorUAData } from '../utils/blocked-uas'
import { expect } from '@jest/globals'

function userAgentFor(botString: string) {
const randOne = (Math.random() + 1).toString(36).substring(7)
Expand Down Expand Up @@ -103,13 +104,13 @@ describe('utils', () => {
})
})

describe('user agent blocking', () => {
describe('isLikelyBot', () => {
it.each(DEFAULT_BLOCKED_UA_STRS.concat('testington'))(
'blocks a bot based on the user agent %s',
(botString) => {
const randomisedUserAgent = userAgentFor(botString)

expect(isBlockedUA(randomisedUserAgent, ['testington'])).toBe(true)
expect(isLikelyBot({ userAgent: randomisedUserAgent } as Navigator, ['testington'])).toBe(true)
}
)

Expand All @@ -125,10 +126,93 @@ describe('utils', () => {
[
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.6422.175 Safari/537.36 (compatible; Google-HotelAdsVerifier/2.0)',
],
[
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) HeadlessChrome/122.0.0.0 Safari/537.36',
],
[
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Cypress/13.6.3 Chrome/114.0.5735.289 Electron/25.8.4 Safari/537.36',
],
])('blocks based on user agent', (botString) => {
expect(isBlockedUA(botString, [])).toBe(true)
expect(isBlockedUA(botString.toLowerCase(), [])).toBe(true)
expect(isBlockedUA(botString.toUpperCase(), [])).toBe(true)
expect(isLikelyBot({ userAgent: botString } as Navigator, [])).toBe(true)
expect(isLikelyBot({ userAgent: botString.toLowerCase() } as Navigator, [])).toBe(true)
expect(isLikelyBot({ userAgent: botString.toUpperCase() } as Navigator, [])).toBe(true)
})

it.each([
['Mozilla/5.0 (Macintosh; Intel Mac OS X 10.15; rv:129.0) Gecko/20100101 Firefox/129.0'],
[
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/127.0.0.0 Safari/537.36',
],
[
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.5 Safari/605.1.15',
],
[
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) elec/1.0.0 Chrome/126.0.6478.127 Electron/31.2.1 Safari/537.36',
],
])('does not block based on non-bot user agent', (userAgent) => {
expect(isBlockedUA(userAgent, [])).toBe(false)
expect(isBlockedUA(userAgent.toLowerCase(), [])).toBe(false)
expect(isBlockedUA(userAgent.toUpperCase(), [])).toBe(false)
expect(isLikelyBot({ userAgent } as Navigator, [])).toBe(false)
expect(isLikelyBot({ userAgent: userAgent.toLowerCase() } as Navigator, [])).toBe(false)
expect(isLikelyBot({ userAgent: userAgent.toUpperCase() } as Navigator, [])).toBe(false)
})

it('blocks based on the webdriver property being set to true', () => {
expect(isLikelyBot({ webdriver: true } as Navigator, [])).toBe(true)
})

it('blocks based on userAgentData', () => {
const headlessUserAgentData: NavigatorUAData = {
brands: [
{ brand: 'Not)A;Brand', version: '99' },
{ brand: 'HeadlessChrome', version: '127' },
{ brand: 'Chromium', version: '127' },
],
}
expect(
isLikelyBot(
{
userAgentData: headlessUserAgentData,
} as Navigator,
[]
)
).toBe(true)
})

it('does not block a normal browser based of userAgentData', () => {
const realUserAgentData: NavigatorUAData = {
brands: [
{ brand: 'Not)A;Brand', version: '99' },
{ brand: 'Google Chrome', version: '127' },
{ brand: 'Chromium', version: '127' },
],
}
expect(
isLikelyBot(
{
userAgentData: realUserAgentData,
} as Navigator,
[]
)
).toBe(false)
})

it('does not crash if the type of navigatorUAData changes', () => {
// we're not checking the return values of these, only that they don't crash
// @ts-expect-error testing invalid data
isLikelyBot({ userAgentData: { brands: ['HeadlessChrome'] } } as Navigator, [])
// @ts-expect-error testing invalid data
isLikelyBot({ userAgentData: { brands: [() => 'HeadlessChrome'] } } as Navigator, [])
isLikelyBot({ userAgentData: { brands: () => ['HeadlessChrome'] } } as unknown as Navigator, [])
isLikelyBot({ userAgentData: 'HeadlessChrome' } as unknown as Navigator, [])
isLikelyBot({ userAgentData: {} } as unknown as Navigator, [])
isLikelyBot({ userAgentData: null } as unknown as Navigator, [])
isLikelyBot({ userAgentData: () => ['HeadlessChrome'] } as unknown as Navigator, [])
isLikelyBot({ userAgentData: true } as unknown as Navigator, [])
})
})

Expand Down
Loading
Loading