Skip to content

Brotli decompression error #687

Closed
@rgaudin

Description

@rgaudin

Here's the log of a run for https://www.imagepond.net/davidmark that crashed apparently trying to decompress something using brotli.

I notice there are several ERR_CONNECTION_REFUSED in this very short log.

[zimit::2024-09-10 08:26:24,462] INFO:Running browsertrix-crawler crawl: crawl --failOnFailedSeed --waitUntil load --depth -1 --timeout 90 --behaviors autoplay,autofetch,siteSpecific --behaviorTimeout 90 --sizeLimit 4294967296 --diskUtilization 90 --timeLimit 7200 --url https://www.imagepond.net/davidmark --userAgentSuffix zimit.kiwix.org+ [email protected] --mobileDevice Pixel 2 --cwd /output/.tmputm27tp6 --statsFilename /output/crawl.json
{"timestamp":"2024-09-10T08:26:26.837Z","logLevel":"info","context":"general","message":"Browsertrix-Crawler 1.3.0-beta.1 (with warcio.js 2.3.1)","details":{}}
{"timestamp":"2024-09-10T08:26:26.841Z","logLevel":"info","context":"general","message":"Seeds","details":[{"url":"https://www.imagepond.net/davidmark","scopeType":"prefix","include":["/^https?:\\/\\/www\\.imagepond\\.net\\//"],"exclude":[],"allowHash":false,"depth":-1,"sitemap":null,"auth":null,"_authEncoded":null,"maxExtraHops":0,"maxDepth":1000000}]}
{"timestamp":"2024-09-10T08:26:26.841Z","logLevel":"info","context":"general","message":"Behavior Options","details":{"message":"{\"autoplay\":true,\"autofetch\":true,\"siteSpecific\":true,\"log\":\"__bx_log\",\"startEarly\":true}"}}
{"timestamp":"2024-09-10T08:26:28.370Z","logLevel":"info","context":"worker","message":"Creating 1 workers","details":{}}
{"timestamp":"2024-09-10T08:26:28.373Z","logLevel":"info","context":"worker","message":"Worker starting","details":{"workerid":0}}
{"timestamp":"2024-09-10T08:26:29.671Z","logLevel":"info","context":"worker","message":"Starting page","details":{"workerid":0,"page":"https://www.imagepond.net/davidmark"}}
{"timestamp":"2024-09-10T08:26:29.676Z","logLevel":"info","context":"crawlStatus","message":"Crawl statistics","details":{"crawled":0,"total":1,"pending":1,"failed":0,"limit":{"max":0,"hit":false},"pendingPages":["{\"seedId\":0,\"started\":\"2024-09-10T08:26:28.379Z\",\"extraHops\":0,\"url\":\"https:\\/\\/www.imagepond.net\\/davidmark\",\"added\":\"2024-09-10T08:26:27.029Z\",\"depth\":0}"]}}
{"timestamp":"2024-09-10T08:26:30.224Z","logLevel":"info","context":"general","message":"Awaiting page load","details":{"page":"https://www.imagepond.net/davidmark","workerid":0}}
{"timestamp":"2024-09-10T08:26:31.047Z","logLevel":"warn","context":"recorder","message":"Request failed","details":{"url":"https://www.googletagmanager.com/gtag/js?id=G-CWJKBWX8WJ","errorText":"net::ERR_CONNECTION_REFUSED","page":"https://www.imagepond.net/davidmark","workerid":0}}
{"timestamp":"2024-09-10T08:26:34.051Z","logLevel":"info","context":"behavior","message":"Running behaviors","details":{"frames":1,"frameUrls":["https://www.imagepond.net/davidmark"],"page":"https://www.imagepond.net/davidmark","workerid":0}}
{"timestamp":"2024-09-10T08:26:34.052Z","logLevel":"info","context":"behavior","message":"Run Script Started","details":{"frameUrl":"https://www.imagepond.net/davidmark","page":"https://www.imagepond.net/davidmark","workerid":0}}
{"timestamp":"2024-09-10T08:26:36.511Z","logLevel":"warn","context":"recorder","message":"Request failed","details":{"url":"https://www.facebook.com/share.php?u=__url__","errorText":"net::ERR_CONNECTION_REFUSED","page":"https://www.imagepond.net/davidmark","workerid":0}}
{"timestamp":"2024-09-10T08:26:38.891Z","logLevel":"info","context":"behavior","message":"Run Script Finished","details":{"frameUrl":"https://www.imagepond.net/davidmark","page":"https://www.imagepond.net/davidmark","workerid":0}}
{"timestamp":"2024-09-10T08:26:38.892Z","logLevel":"info","context":"behavior","message":"Behaviors finished","details":{"finished":1,"page":"https://www.imagepond.net/davidmark","workerid":0}}
{"timestamp":"2024-09-10T08:26:39.941Z","logLevel":"info","context":"pageStatus","message":"Page Finished","details":{"loadState":4,"page":"https://www.imagepond.net/davidmark","workerid":0}}
{"timestamp":"2024-09-10T08:26:39.983Z","logLevel":"info","context":"worker","message":"Starting page","details":{"workerid":0,"page":"https://www.imagepond.net/"}}
{"timestamp":"2024-09-10T08:26:39.985Z","logLevel":"info","context":"crawlStatus","message":"Crawl statistics","details":{"crawled":1,"total":79,"pending":1,"failed":0,"limit":{"max":0,"hit":false},"pendingPages":["{\"seedId\":0,\"started\":\"2024-09-10T08:26:39.981Z\",\"extraHops\":0,\"url\":\"https:\\/\\/www.imagepond.net\\/\",\"added\":\"2024-09-10T08:26:33.736Z\",\"depth\":1}"]}}
{"timestamp":"2024-09-10T08:26:40.160Z","logLevel":"info","context":"general","message":"Awaiting page load","details":{"page":"https://www.imagepond.net/","workerid":0}}
{"timestamp":"2024-09-10T08:26:41.748Z","logLevel":"info","context":"behavior","message":"Running behaviors","details":{"frames":1,"frameUrls":["https://www.imagepond.net/"],"page":"https://www.imagepond.net/","workerid":0}}
{"timestamp":"2024-09-10T08:26:41.748Z","logLevel":"info","context":"behavior","message":"Run Script Started","details":{"frameUrl":"https://www.imagepond.net/","page":"https://www.imagepond.net/","workerid":0}}
{"timestamp":"2024-09-10T08:26:41.795Z","logLevel":"warn","context":"recorder","message":"Request failed","details":{"url":"https://www.facebook.com/share.php?u=__url__","errorText":"net::ERR_CONNECTION_REFUSED","page":"https://www.imagepond.net/","workerid":0}}
{"timestamp":"2024-09-10T08:26:42.642Z","logLevel":"info","context":"behavior","message":"Run Script Finished","details":{"frameUrl":"https://www.imagepond.net/","page":"https://www.imagepond.net/","workerid":0}}
{"timestamp":"2024-09-10T08:26:42.643Z","logLevel":"info","context":"behavior","message":"Behaviors finished","details":{"finished":1,"page":"https://www.imagepond.net/","workerid":0}}
{"timestamp":"2024-09-10T08:26:43.647Z","logLevel":"info","context":"pageStatus","message":"Page Finished","details":{"loadState":4,"page":"https://www.imagepond.net/","workerid":0}}
{"timestamp":"2024-09-10T08:26:43.666Z","logLevel":"info","context":"worker","message":"Starting page","details":{"workerid":0,"page":"https://www.imagepond.net/upload"}}
{"timestamp":"2024-09-10T08:26:43.668Z","logLevel":"info","context":"crawlStatus","message":"Crawl statistics","details":{"crawled":2,"total":118,"pending":1,"failed":0,"limit":{"max":0,"hit":false},"pendingPages":["{\"seedId\":0,\"started\":\"2024-09-10T08:26:43.665Z\",\"extraHops\":0,\"url\":\"https:\\/\\/www.imagepond.net\\/upload\",\"added\":\"2024-09-10T08:26:33.738Z\",\"depth\":1}"]}}
node:events:497
      throw er; // Unhandled 'error' event
      ^

Error: unexpected end of file
    at genericNodeError (node:internal/errors:984:15)
    at wrappedFn (node:internal/errors:538:14)
    at BrotliDecoder.zlibOnError [as onerror] (node:zlib:191:17)
Emitted 'error' event on BrotliDecompress instance at:
    at emitErrorNT (node:internal/streams/destroy:169:8)
    at emitErrorCloseNT (node:internal/streams/destroy:128:3)
    at process.processTicksAndRejections (node:internal/process/task_queues:82:21) {
  errno: -5,
  code: 'Z_BUF_ERROR'
}

Node.js v20.17.0

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    Status

    Done!

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions