Skip to content

Commit

Permalink
Combined export is processed in chunks to save memory. Also using view
Browse files Browse the repository at this point in the history
  • Loading branch information
ShootingStar91 committed Aug 15, 2024
1 parent 62dad97 commit 2a8b1bd
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 37 deletions.
82 changes: 47 additions & 35 deletions backend/src/services/locality.ts
Original file line number Diff line number Diff line change
Expand Up @@ -7,49 +7,61 @@ import { fixBigInt } from '../utils/common'
import { Role } from '../../../frontend/src/types'
import { AccessError } from '../middlewares/authorizer'
import { NOW_DB_NAME } from '../utils/config'
import { localityColumns, now_lsColumns, speciesColumns } from './combinedExportColumns'

export const getLocalitySpeciesList = async (lids: number[], user: User | undefined) => {
// Get localities separately with getAllLocalities to know which localities user has access to.
// Unoptimal, if it's slow try a different way.
const localityList = await getAllLocalities(user)

const lidsSet = new Set(lids)
const permittedLids = localityList.filter(loc => lidsSet.has(loc.lid)).map(loc => loc.lid)

const conn = await pool.getConnection()
const columns = [
...localityColumns.map(col => `${NOW_DB_NAME}.now_loc.${col} as ${col}`),
...now_lsColumns.map(col => `${NOW_DB_NAME}.now_ls.${col} as ${col}`),
...speciesColumns.map(col => `${NOW_DB_NAME}.com_species.${col} as ${col}`),
].join(', ')

const localitySpecies: { [index: string]: string | number | null | bigint | boolean }[] = await conn.query(
import { PoolConnection } from 'mariadb'

const CHUNK_SIZE = 20000

const getChunk = async (
conn: PoolConnection,
limit: number,
offset: number,
lids: number[],
includeDrafts: boolean
) => {
const excludeDraftsString = includeDrafts
? ''
: `AND (loc_status = 0 AND lid NOT IN (SELECT DISTINCT ${NOW_DB_NAME}.now_plr.lid FROM ${NOW_DB_NAME}.now_plr JOIN ${NOW_DB_NAME}.now_proj ON ${NOW_DB_NAME}.now_plr.pid = ${NOW_DB_NAME}.now_proj.pid WHERE ${NOW_DB_NAME}.now_proj.proj_records = 1))`
const result: { [index: string]: string | number | null | bigint | boolean }[] = await conn.query(
`
SELECT ${columns} FROM ${NOW_DB_NAME}.now_loc JOIN ${NOW_DB_NAME}.now_ls ON ${NOW_DB_NAME}.now_loc.lid = ${NOW_DB_NAME}.now_ls.lid JOIN ${NOW_DB_NAME}.com_species ON ${NOW_DB_NAME}.now_ls.species_id = ${NOW_DB_NAME}.com_species.species_id WHERE ${NOW_DB_NAME}.now_loc.lid IN (?)
SELECT * FROM ${NOW_DB_NAME}.now_v_export_locsp WHERE lid IN (${lids.map(() => '?').join(', ')}) ${excludeDraftsString} ORDER BY loc_name LIMIT ${limit} OFFSET ${offset}
`,
[permittedLids]
[...lids]
)
return result
}

await conn.end()
const formatValue = (val: unknown) => {
if (typeof val === 'bigint') return `"${Number(val)}"`
if (val === null) return `""`
return `"${val as string}"`
}

const localityTitles = [...localityColumns, ...now_lsColumns, ...speciesColumns]
const getExportList = async (conn: PoolConnection, lids: number[], includeDrafts: boolean) => {
const chunks = []

const formatValue = (value: string | number | boolean | null | object | bigint) => {
if (Array.isArray(value)) return `"${value.map((value: { synonym: string }) => value.synonym).join(', ')}"`
if (typeof value === 'object' && value !== null)
throw new Error('Internal error: Unexpected non-array object in export data')
if (typeof value === 'bigint') return Number(BigInt)
if (value === null) return `""`
return `"${value}"`
}
let columnHeaders: null | string[] = null

for (let i = 0; ; i += 1) {
const chunk = await getChunk(conn, CHUNK_SIZE, i * CHUNK_SIZE, lids, includeDrafts)
if (chunk.length === 0) break

// Get column headers
if (!columnHeaders) columnHeaders = Object.keys(chunk[0])

const speciesTitles = Object.keys(nowDb.com_species.fields)
const lsTitles = Object.keys(nowDb.now_ls.fields)
const dataRows = localitySpecies.map(obj => Object.values(obj).map(value => formatValue(value as string))) as unknown
chunks.push(...chunk.map(c => Object.values(c).map(val => formatValue(val))))
}
return [columnHeaders, ...chunks]
}

const titleRow = [...localityTitles, ...speciesTitles, ...lsTitles]
return [titleRow, ...(dataRows as string[])]
export const getLocalitySpeciesList = async (lids: number[], user: User | undefined) => {
const conn = await pool.getConnection()
const exportList = await getExportList(
conn,
lids,
(user && [Role.Admin, Role.EditUnrestricted].includes(user.role)) || false
)
await conn.end()
return exportList
}

const getIdsOfUsersProjects = async (user: User) => {
Expand Down
4 changes: 2 additions & 2 deletions frontend/src/components/Locality/LocalityTable.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ export const LocalityTable = ({ selectorFn }: { selectorFn?: (newObject: Localit
return
}

const limit = 4000
const limit = 99999999
if (lids.length > limit) {
notify(`Please filter the table more. Current rows: ${lids.length}. Limit: ${limit}`, 'error')
return
}
const result = await getLocalitySpeciesList(lids).unwrap()
const dataString = result.map(row => (row as Array<unknown>).join(',')).join('\n')
const dataString = result.map(row => row.join(',')).join('\n')

Check failure on line 54 in frontend/src/components/Locality/LocalityTable.tsx

View workflow job for this annotation

GitHub Actions / Lint & tsc backend

Unsafe return of an `any` typed value

Check failure on line 54 in frontend/src/components/Locality/LocalityTable.tsx

View workflow job for this annotation

GitHub Actions / Lint & tsc backend

Unsafe call of an `any` typed value
const blob = new Blob([dataString], { type: 'text/csv' })
const url = URL.createObjectURL(blob)
const a = document.createElement('a')
Expand Down

0 comments on commit 2a8b1bd

Please sign in to comment.