Skip to content

fix: make empty bucket use queue to remove underlying objects asynchronously #701

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,7 @@ type StorageConfigType = {
responseSMaxAge: number
anonKeyAsync: Promise<string>
serviceKeyAsync: Promise<string>
emptyBucketMax: number
storageBackendType: StorageBackendType
tenantId: string
requestUrlLengthLimit: number
Expand Down Expand Up @@ -312,6 +313,7 @@ export function getConfig(options?: { reload?: boolean }): StorageConfigType {
),
// Storage
storageBackendType: getOptionalConfigFromEnv('STORAGE_BACKEND') as StorageBackendType,
emptyBucketMax: parseInt(getOptionalConfigFromEnv('STORAGE_EMPTY_BUCKET_MAX') || '200000', 10),

// Storage - File
storageFilePath: getOptionalConfigFromEnv(
Expand Down
8 changes: 8 additions & 0 deletions src/internal/errors/codes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,14 @@ export const ERRORS = {
message: `The bucket you tried to delete is not empty`,
originalError: e,
}),
UnableToEmptyBucket: (bucket: string, e?: Error) =>
new StorageBackendError({
code: ErrorCode.InvalidRequest,
resource: bucket,
httpStatusCode: 409,
message: `Unable to empty the bucket because it contains too many objects`,
originalError: e,
}),
NoSuchBucket: (bucket: string, e?: Error) =>
new StorageBackendError({
code: ErrorCode.NoSuchBucket,
Expand Down
2 changes: 1 addition & 1 deletion src/storage/database/adapter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ export interface Database {
filters?: Filters
): Promise<Filters['dontErrorOnEmpty'] extends true ? Bucket | undefined : Bucket>

countObjectsInBucket(bucketId: string): Promise<number>
countObjectsInBucket(bucketId: string, limit?: number): Promise<number>

deleteBucket(bucketId: string | string[]): Promise<Bucket[]>

Expand Down
54 changes: 32 additions & 22 deletions src/storage/database/knex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -160,17 +160,21 @@ export class StorageKnexDB implements Database {
return result
}

async countObjectsInBucket(bucketId: string) {
async countObjectsInBucket(bucketId: string, limit?: number): Promise<number> {
// if we have a limit use select to only scan up to that limit
if (limit !== undefined) {
const result = await this.runQuery('CountObjectsInBucketWithLimit', (knex) => {
return knex.from('objects').where('bucket_id', bucketId).limit(limit).select(knex.raw('1'))
})
return result.length
}

// do full count if there is no limit
const result = await this.runQuery('CountObjectsInBucket', (knex) => {
return knex
.from<{ count: number }>('objects')
.where('bucket_id', bucketId)
.limit(10)
.count()
.first()
return knex.from('objects').where('bucket_id', bucketId).count().first<{ count: number }>()
})

return (result?.count as number) || 0
return result?.count || 0
}

async deleteBucket(bucketId: string | string[]) {
Expand Down Expand Up @@ -612,7 +616,10 @@ export class StorageKnexDB implements Database {
async mustLockObject(bucketId: string, objectName: string, version?: string) {
return this.runQuery('MustLockObject', async (knex) => {
const hash = hashStringToInt(`${bucketId}/${objectName}${version ? `/${version}` : ''}`)
const result = await knex.raw<any>(`SELECT pg_try_advisory_xact_lock(?);`, [hash])
const result = await knex.raw<{ rows: { pg_try_advisory_xact_lock: boolean }[] }>(
`SELECT pg_try_advisory_xact_lock(?);`,
[hash]
)
const lockAcquired = result.rows.shift()?.pg_try_advisory_xact_lock || false

if (!lockAcquired) {
Expand All @@ -631,7 +638,7 @@ export class StorageKnexDB implements Database {
) {
return this.runQuery('WaitObjectLock', async (knex) => {
const hash = hashStringToInt(`${bucketId}/${objectName}${version ? `/${version}` : ''}`)
const query = knex.raw<any>(`SELECT pg_advisory_xact_lock(?)`, [hash])
const query = knex.raw(`SELECT pg_advisory_xact_lock(?)`, [hash])

if (opts?.timeout) {
let timeoutInterval: undefined | NodeJS.Timeout
Expand Down Expand Up @@ -661,18 +668,21 @@ export class StorageKnexDB implements Database {

async searchObjects(bucketId: string, prefix: string, options: SearchObjectOption) {
return this.runQuery('SearchObjects', async (knex) => {
const result = await knex.raw('select * from storage.search(?,?,?,?,?,?,?,?)', [
prefix,
bucketId,
options.limit || 100,
prefix.split('/').length,
options.offset || 0,
options.search || '',
options.sortBy?.column ?? 'name',
options.sortBy?.order ?? 'asc',
])
const result = await knex.raw<{ rows: Obj[] }>(
'select * from storage.search(?,?,?,?,?,?,?,?)',
[
prefix,
bucketId,
options.limit || 100,
prefix.split('/').length,
options.offset || 0,
options.search || '',
options.sortBy?.column ?? 'name',
options.sortBy?.order ?? 'asc',
]
)

return (result as any).rows
return result.rows
})
}

Expand Down Expand Up @@ -807,7 +817,7 @@ export class StorageKnexDB implements Database {

if (typeof columns === 'object') {
value.forEach((column: string) => {
delete (columns as Record<any, any>)[column]
delete (columns as Record<string, object>)[column]
})
}
}
Expand Down
1 change: 1 addition & 0 deletions src/storage/events/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ export * from './lifecycle/object-created'
export * from './lifecycle/object-updated'
export * from './lifecycle/object-removed'
export * from './lifecycle/object-admin-delete'
export * from './lifecycle/object-admin-delete-batch'
export * from './objects/backup-object'
export * from './migrations/run-migrations'
export * from './migrations/reset-migrations'
Expand Down
87 changes: 87 additions & 0 deletions src/storage/events/lifecycle/object-admin-delete-batch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
import { BaseEvent } from '../base-event'
import { getConfig } from '../../../config'
import { Job, SendOptions, WorkOptions } from 'pg-boss'
import { logger, logSchema } from '@internal/monitoring'
import { Storage } from '../../index'
import { BasePayload } from '@internal/queue'

export interface ObjectDeleteBatchEvent extends BasePayload {
prefixes: string[]
bucketId: string
}

const { storageS3Bucket, adminDeleteQueueTeamSize, adminDeleteConcurrency } = getConfig()

export class ObjectAdminDeleteBatch extends BaseEvent<ObjectDeleteBatchEvent> {
static queueName = 'object:admin:delete-batch'

static getWorkerOptions(): WorkOptions {
return {}
}

static getSendOptions(): SendOptions {
return {
priority: 10,
expireInSeconds: 30,
}
}

static async handle(job: Job<ObjectDeleteBatchEvent>) {
let storage: Storage | undefined = undefined

const { prefixes, bucketId } = job.data
if (prefixes.length < 1) {
return
}

try {
storage = await this.createStorage(job.data)

logSchema.event(logger, `[Admin]: ObjectAdminDeleteBatch ${bucketId} ${prefixes.length}`, {
jodId: job.id,
type: 'event',
event: 'ObjectAdminDeleteBatch',
payload: JSON.stringify(job.data),
objectPath: bucketId,
resources: prefixes,
tenantId: job.data.tenant.ref,
project: job.data.tenant.ref,
reqId: job.data.reqId,
})

await storage.backend.deleteObjects(storageS3Bucket, prefixes)
} catch (e) {
logger.error(
{
error: e,
jodId: job.id,
type: 'event',
event: 'ObjectAdminDeleteBatch',
payload: JSON.stringify(job.data),
objectPath: bucketId,
resources: prefixes,
tenantId: job.data.tenant.ref,
project: job.data.tenant.ref,
reqId: job.data.reqId,
},
`[Admin]: ObjectAdminDeleteBatch ${bucketId} ${prefixes.length} - FAILED`
)
throw e
} finally {
if (storage) {
const tenant = storage.db.tenant()
storage.db
.destroyConnection()
.then(() => {
// no-op
})
.catch((e) => {
logger.error(
{ error: e },
`[Admin]: ObjectAdminDeleteBatch ${tenant.ref} - FAILED DISPOSING CONNECTION`
)
})
}
}
}
}
2 changes: 2 additions & 0 deletions src/storage/events/workers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@ import { BackupObjectEvent } from './objects/backup-object'
import { ResetMigrationsOnTenant } from './migrations/reset-migrations'
import { JwksCreateSigningSecret } from './jwks/jwks-create-signing-secret'
import { UpgradePgBossV10 } from './pgboss/upgrade-v10'
import { ObjectAdminDeleteBatch } from './lifecycle/object-admin-delete-batch'

export function registerWorkers() {
Queue.register(Webhook)
Queue.register(ObjectAdminDelete)
Queue.register(ObjectAdminDeleteBatch)
Queue.register(RunMigrationsOnTenants)
Queue.register(BackupObjectEvent)
Queue.register(ResetMigrationsOnTenant)
Expand Down
28 changes: 14 additions & 14 deletions src/storage/storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ import { getFileSizeLimit, mustBeValidBucketName, parseFileSizeToBytes } from '.
import { getConfig } from '../config'
import { ObjectStorage } from './object'
import { InfoRenderer } from '@storage/renderer/info'
import { logger, logSchema } from '@internal/monitoring'
import { ObjectAdminDeleteBatch } from './events'

const { requestUrlLengthLimit, storageS3Bucket } = getConfig()
const { requestUrlLengthLimit, emptyBucketMax } = getConfig()

/**
* Storage
Expand Down Expand Up @@ -146,14 +146,6 @@ export class Storage {
return this.db.updateBucket(id, bucketData)
}

/**
* Counts objects in a bucket
* @param id
*/
countObjects(id: string) {
return this.db.countObjectsInBucket(id)
}

/**
* Delete a specific bucket if empty
* @param id
Expand All @@ -164,7 +156,7 @@ export class Storage {
forUpdate: true,
})

const countObjects = await db.asSuperUser().countObjectsInBucket(id)
const countObjects = await db.asSuperUser().countObjectsInBucket(id, 1)

if (countObjects && countObjects > 0) {
throw ERRORS.BucketNotEmpty(id)
Expand All @@ -187,6 +179,11 @@ export class Storage {
async emptyBucket(bucketId: string) {
await this.findBucket(bucketId, 'name')

const count = await this.db.countObjectsInBucket(bucketId, emptyBucketMax + 1)
if (count > emptyBucketMax) {
throw ERRORS.UnableToEmptyBucket(bucketId)
}

while (true) {
const objects = await this.db.listObjects(
bucketId,
Expand All @@ -205,15 +202,18 @@ export class Storage {
)

if (deleted && deleted.length > 0) {
const params = deleted.reduce((all, { name, version }) => {
const prefixes = deleted.reduce((all, { name, version }) => {
const fileName = withOptionalVersion(`${this.db.tenantId}/${bucketId}/${name}`, version)
all.push(fileName)
all.push(fileName + '.info')
return all
}, [] as string[])
// delete files from s3 asynchronously
this.backend.deleteObjects(storageS3Bucket, params).catch((e) => {
logSchema.error(logger, 'Failed to delete objects from s3', { type: 's3', error: e })
await ObjectAdminDeleteBatch.send({
prefixes,
bucketId,
tenant: this.db.tenant(),
reqId: this.db.reqId,
})
}

Expand Down
Loading
Loading