6
6
from abc import ABCMeta , abstractmethod
7
7
from contextlib import asynccontextmanager as actxmgr
8
8
from contextvars import ContextVar
9
- from typing import Any , AsyncIterator , Dict , Final , Optional , cast
9
+ from typing import Any , AsyncIterator , Dict , Final , Mapping , Optional , Sequence , cast
10
10
11
11
import aiohttp
12
12
import aiotools
@@ -268,7 +268,6 @@ async def _scan_tag(
268
268
image : str ,
269
269
tag : str ,
270
270
) -> None :
271
- manifests = {}
272
271
async with concurrency_sema .get ():
273
272
rqst_args ["headers" ]["Accept" ] = self .MEDIA_TYPE_DOCKER_MANIFEST_LIST
274
273
async with sess .get (
@@ -281,62 +280,163 @@ async def _scan_tag(
281
280
content_type = resp .headers ["Content-Type" ]
282
281
resp .raise_for_status ()
283
282
resp_json = await resp .json ()
284
- match content_type :
285
- case self .MEDIA_TYPE_DOCKER_MANIFEST_LIST :
286
- manifest_list = resp_json ["manifests" ]
287
- request_type = self .MEDIA_TYPE_DOCKER_MANIFEST
288
- case self .MEDIA_TYPE_OCI_INDEX :
289
- manifest_list = [
290
- item
291
- for item in resp_json ["manifests" ]
292
- if "annotations" not in item # skip attestation manifests
293
- ]
294
- request_type = self .MEDIA_TYPE_OCI_MANIFEST
295
- case _:
296
- log .warning ("Unknown content type: {}" , content_type )
297
- raise RuntimeError (
298
- "The registry does not support the standard way of "
299
- "listing multiarch images."
300
- )
301
- rqst_args ["headers" ]["Accept" ] = request_type
302
- for manifest in manifest_list :
303
- platform_arg = (
304
- f"{ manifest ["platform" ]["os" ]} /{ manifest ["platform" ]["architecture" ]} "
305
- )
306
- if variant := manifest ["platform" ].get ("variant" , None ):
307
- platform_arg += f"/{ variant } "
308
- architecture = manifest ["platform" ]["architecture" ]
309
- architecture = arch_name_aliases .get (architecture , architecture )
310
- async with sess .get (
311
- self .registry_url / f"v2/{ image } /manifests/{ manifest ["digest" ]} " , ** rqst_args
312
- ) as resp :
313
- data = await resp .json ()
314
- config_digest = data ["config" ]["digest" ]
315
- size_bytes = sum (layer ["size" ] for layer in data ["layers" ]) + data ["config" ]["size" ]
316
- async with sess .get (
317
- self .registry_url / f"v2/{ image } /blobs/{ config_digest } " , ** rqst_args
318
- ) as resp :
319
- resp .raise_for_status ()
320
- data = json .loads (await resp .read ())
321
- labels = {}
322
- # we should favor `config` instead of `container_config` since `config` can contain additional datas
323
- # set when commiting image via `--change` flag
324
- if _config_labels := data .get ("config" , {}).get ("Labels" ):
325
- labels = _config_labels
326
- elif _container_config_labels := data .get ("container_config" , {}).get ("Labels" ):
327
- labels = _container_config_labels
328
-
329
- if not labels :
330
- log .warning (
331
- "Labels section not found on image {}:{}/{}" , image , tag , architecture
332
- )
333
283
334
- manifests [architecture ] = {
335
- "size" : size_bytes ,
336
- "labels" : labels ,
337
- "digest" : config_digest ,
338
- }
339
- await self ._read_manifest (image , tag , manifests )
284
+ async with aiotools .TaskGroup () as tg :
285
+ match content_type :
286
+ case self .MEDIA_TYPE_DOCKER_MANIFEST :
287
+ await self ._process_docker_v2_image (
288
+ tg , sess , rqst_args , image , tag , resp_json
289
+ )
290
+ case self .MEDIA_TYPE_DOCKER_MANIFEST_LIST :
291
+ await self ._process_docker_v2_multiplatform_image (
292
+ tg , sess , rqst_args , image , tag , resp_json
293
+ )
294
+ case self .MEDIA_TYPE_OCI_INDEX :
295
+ await self ._process_oci_index (
296
+ tg , sess , rqst_args , image , tag , resp_json
297
+ )
298
+ case _:
299
+ log .warn ("Unknown content type: {}" , content_type )
300
+ raise RuntimeError (
301
+ "The registry does not support the standard way of "
302
+ "listing multiarch images."
303
+ )
304
+
305
+ async def _read_manifest_list (
306
+ self ,
307
+ sess : aiohttp .ClientSession ,
308
+ manifest_list : Sequence [Any ],
309
+ rqst_args : Mapping [str , Any ],
310
+ image : str ,
311
+ tag : str ,
312
+ ) -> None :
313
+ """
314
+ Understands images defined under [OCI image manifest](https://github.com/opencontainers/image-spec/blob/main/manifest.md#example-image-manifest) or
315
+ [Docker image manifest list](https://github.com/openshift/docker-distribution/blob/master/docs/spec/manifest-v2-2.md#example-manifest-list)
316
+ and imports Backend.AI compatible images.
317
+ """
318
+ manifests = {}
319
+ for manifest in manifest_list :
320
+ platform_arg = f"{ manifest ["platform" ]["os" ]} /{ manifest ["platform" ]["architecture" ]} "
321
+ if variant := manifest ["platform" ].get ("variant" , None ):
322
+ platform_arg += f"/{ variant } "
323
+ architecture = manifest ["platform" ]["architecture" ]
324
+ architecture = arch_name_aliases .get (architecture , architecture )
325
+
326
+ async with sess .get (
327
+ self .registry_url / f"v2/{ image } /manifests/{ manifest ["digest" ]} " ,
328
+ ** rqst_args ,
329
+ ) as resp :
330
+ manifest_info = await resp .json ()
331
+
332
+ manifests [architecture ] = await self ._preprocess_manifest (
333
+ sess , manifest_info , rqst_args , image
334
+ )
335
+
336
+ if not manifests [architecture ]["labels" ]:
337
+ log .warning ("Labels section not found on image {}:{}/{}" , image , tag , architecture )
338
+
339
+ await self ._read_manifest (image , tag , manifests )
340
+
341
+ async def _preprocess_manifest (
342
+ self ,
343
+ sess : aiohttp .ClientSession ,
344
+ manifest : Mapping [str , Any ],
345
+ rqst_args : Mapping [str , Any ],
346
+ image : str ,
347
+ ) -> dict [str , Any ]:
348
+ """
349
+ Extracts informations from
350
+ [Docker iamge manifest](https://github.com/openshift/docker-distribution/blob/master/docs/spec/manifest-v2-2.md#example-image-manifest)
351
+ required by Backend.AI.
352
+ """
353
+ config_digest = manifest ["config" ]["digest" ]
354
+ size_bytes = sum (layer ["size" ] for layer in manifest ["layers" ]) + manifest ["config" ]["size" ]
355
+
356
+ async with sess .get (
357
+ self .registry_url / f"v2/{ image } /blobs/{ config_digest } " , ** rqst_args
358
+ ) as resp :
359
+ resp .raise_for_status ()
360
+ data = json .loads (await resp .read ())
361
+ labels = {}
362
+
363
+ # we should favor `config` instead of `container_config` since `config` can contain additional datas
364
+ # set when commiting image via `--change` flag
365
+ if _config_labels := data .get ("config" , {}).get ("Labels" ):
366
+ labels = _config_labels
367
+ elif _container_config_labels := data .get ("container_config" , {}).get ("Labels" ):
368
+ labels = _container_config_labels
369
+
370
+ return {
371
+ "size" : size_bytes ,
372
+ "labels" : labels ,
373
+ "digest" : config_digest ,
374
+ }
375
+
376
+ async def _process_oci_index (
377
+ self ,
378
+ tg : aiotools .TaskGroup ,
379
+ sess : aiohttp .ClientSession ,
380
+ rqst_args : Mapping [str , Any ],
381
+ image : str ,
382
+ tag : str ,
383
+ image_info : Mapping [str , Any ],
384
+ ) -> None :
385
+ manifest_list = [
386
+ item
387
+ for item in image_info ["manifests" ]
388
+ if "annotations" not in item # skip attestation manifests
389
+ ]
390
+ rqst_args ["headers" ]["Accept" ] = self .MEDIA_TYPE_OCI_MANIFEST
391
+
392
+ await self ._read_manifest_list (sess , manifest_list , rqst_args , image , tag )
393
+
394
+ async def _process_docker_v2_multiplatform_image (
395
+ self ,
396
+ tg : aiotools .TaskGroup ,
397
+ sess : aiohttp .ClientSession ,
398
+ rqst_args : Mapping [str , Any ],
399
+ image : str ,
400
+ tag : str ,
401
+ image_info : Mapping [str , Any ],
402
+ ) -> None :
403
+ manifest_list = image_info ["manifests" ]
404
+ rqst_args ["headers" ]["Accept" ] = self .MEDIA_TYPE_DOCKER_MANIFEST
405
+
406
+ await self ._read_manifest_list (
407
+ sess ,
408
+ manifest_list ,
409
+ rqst_args ,
410
+ image ,
411
+ tag ,
412
+ )
413
+
414
+ async def _process_docker_v2_image (
415
+ self ,
416
+ tg : aiotools .TaskGroup ,
417
+ sess : aiohttp .ClientSession ,
418
+ rqst_args : Mapping [str , Any ],
419
+ image : str ,
420
+ tag : str ,
421
+ image_info : Mapping [str , Any ],
422
+ ) -> None :
423
+ config_digest = image_info ["config" ]["digest" ]
424
+ rqst_args ["headers" ]["Accept" ] = self .MEDIA_TYPE_DOCKER_MANIFEST
425
+
426
+ async with sess .get (
427
+ self .registry_url / f"v2/{ image } /blobs/{ config_digest } " ,
428
+ ** rqst_args ,
429
+ ) as resp :
430
+ resp .raise_for_status ()
431
+ blob_data = json .loads (await resp .read ())
432
+
433
+ manifest_arch = blob_data ["architecture" ]
434
+ architecture = arch_name_aliases .get (manifest_arch , manifest_arch )
435
+
436
+ manifests = {
437
+ architecture : await self ._preprocess_manifest (sess , image_info , rqst_args , image ),
438
+ }
439
+ await self ._read_manifest (image , tag , manifests )
340
440
341
441
async def _read_manifest (
342
442
self ,
@@ -345,6 +445,9 @@ async def _read_manifest(
345
445
manifests : dict [str , dict ],
346
446
skip_reason : Optional [str ] = None ,
347
447
) -> None :
448
+ """
449
+ Detects if image is compatible with Backend.AI and injects the matadata to database if it complies.
450
+ """
348
451
if not manifests :
349
452
if not skip_reason :
350
453
skip_reason = "missing/deleted"
0 commit comments