Skip to content

Commit a4b9b3c

Browse files
authored
refactor: Base ContainerRegistry's scan_tag and implement MEDIA_TYPE_DOCKER_MANIFEST type handling (#2620)
1 parent 7f968e7 commit a4b9b3c

File tree

3 files changed

+177
-69
lines changed

3 files changed

+177
-69
lines changed

changes/2620.feature.md

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Rafactor Base ContainerRegistry's `scan_tag` and implement `MEDIA_TYPE_DOCKER_MANIFEST` type handling.

src/ai/backend/manager/container_registry/base.py

+160-57
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
from abc import ABCMeta, abstractmethod
77
from contextlib import asynccontextmanager as actxmgr
88
from contextvars import ContextVar
9-
from typing import Any, AsyncIterator, Dict, Final, Optional, cast
9+
from typing import Any, AsyncIterator, Dict, Final, Mapping, Optional, Sequence, cast
1010

1111
import aiohttp
1212
import aiotools
@@ -268,7 +268,6 @@ async def _scan_tag(
268268
image: str,
269269
tag: str,
270270
) -> None:
271-
manifests = {}
272271
async with concurrency_sema.get():
273272
rqst_args["headers"]["Accept"] = self.MEDIA_TYPE_DOCKER_MANIFEST_LIST
274273
async with sess.get(
@@ -281,62 +280,163 @@ async def _scan_tag(
281280
content_type = resp.headers["Content-Type"]
282281
resp.raise_for_status()
283282
resp_json = await resp.json()
284-
match content_type:
285-
case self.MEDIA_TYPE_DOCKER_MANIFEST_LIST:
286-
manifest_list = resp_json["manifests"]
287-
request_type = self.MEDIA_TYPE_DOCKER_MANIFEST
288-
case self.MEDIA_TYPE_OCI_INDEX:
289-
manifest_list = [
290-
item
291-
for item in resp_json["manifests"]
292-
if "annotations" not in item # skip attestation manifests
293-
]
294-
request_type = self.MEDIA_TYPE_OCI_MANIFEST
295-
case _:
296-
log.warning("Unknown content type: {}", content_type)
297-
raise RuntimeError(
298-
"The registry does not support the standard way of "
299-
"listing multiarch images."
300-
)
301-
rqst_args["headers"]["Accept"] = request_type
302-
for manifest in manifest_list:
303-
platform_arg = (
304-
f"{manifest["platform"]["os"]}/{manifest["platform"]["architecture"]}"
305-
)
306-
if variant := manifest["platform"].get("variant", None):
307-
platform_arg += f"/{variant}"
308-
architecture = manifest["platform"]["architecture"]
309-
architecture = arch_name_aliases.get(architecture, architecture)
310-
async with sess.get(
311-
self.registry_url / f"v2/{image}/manifests/{manifest["digest"]}", **rqst_args
312-
) as resp:
313-
data = await resp.json()
314-
config_digest = data["config"]["digest"]
315-
size_bytes = sum(layer["size"] for layer in data["layers"]) + data["config"]["size"]
316-
async with sess.get(
317-
self.registry_url / f"v2/{image}/blobs/{config_digest}", **rqst_args
318-
) as resp:
319-
resp.raise_for_status()
320-
data = json.loads(await resp.read())
321-
labels = {}
322-
# we should favor `config` instead of `container_config` since `config` can contain additional datas
323-
# set when commiting image via `--change` flag
324-
if _config_labels := data.get("config", {}).get("Labels"):
325-
labels = _config_labels
326-
elif _container_config_labels := data.get("container_config", {}).get("Labels"):
327-
labels = _container_config_labels
328-
329-
if not labels:
330-
log.warning(
331-
"Labels section not found on image {}:{}/{}", image, tag, architecture
332-
)
333283

334-
manifests[architecture] = {
335-
"size": size_bytes,
336-
"labels": labels,
337-
"digest": config_digest,
338-
}
339-
await self._read_manifest(image, tag, manifests)
284+
async with aiotools.TaskGroup() as tg:
285+
match content_type:
286+
case self.MEDIA_TYPE_DOCKER_MANIFEST:
287+
await self._process_docker_v2_image(
288+
tg, sess, rqst_args, image, tag, resp_json
289+
)
290+
case self.MEDIA_TYPE_DOCKER_MANIFEST_LIST:
291+
await self._process_docker_v2_multiplatform_image(
292+
tg, sess, rqst_args, image, tag, resp_json
293+
)
294+
case self.MEDIA_TYPE_OCI_INDEX:
295+
await self._process_oci_index(
296+
tg, sess, rqst_args, image, tag, resp_json
297+
)
298+
case _:
299+
log.warn("Unknown content type: {}", content_type)
300+
raise RuntimeError(
301+
"The registry does not support the standard way of "
302+
"listing multiarch images."
303+
)
304+
305+
async def _read_manifest_list(
306+
self,
307+
sess: aiohttp.ClientSession,
308+
manifest_list: Sequence[Any],
309+
rqst_args: Mapping[str, Any],
310+
image: str,
311+
tag: str,
312+
) -> None:
313+
"""
314+
Understands images defined under [OCI image manifest](https://github.com/opencontainers/image-spec/blob/main/manifest.md#example-image-manifest) or
315+
[Docker image manifest list](https://github.com/openshift/docker-distribution/blob/master/docs/spec/manifest-v2-2.md#example-manifest-list)
316+
and imports Backend.AI compatible images.
317+
"""
318+
manifests = {}
319+
for manifest in manifest_list:
320+
platform_arg = f"{manifest["platform"]["os"]}/{manifest["platform"]["architecture"]}"
321+
if variant := manifest["platform"].get("variant", None):
322+
platform_arg += f"/{variant}"
323+
architecture = manifest["platform"]["architecture"]
324+
architecture = arch_name_aliases.get(architecture, architecture)
325+
326+
async with sess.get(
327+
self.registry_url / f"v2/{image}/manifests/{manifest["digest"]}",
328+
**rqst_args,
329+
) as resp:
330+
manifest_info = await resp.json()
331+
332+
manifests[architecture] = await self._preprocess_manifest(
333+
sess, manifest_info, rqst_args, image
334+
)
335+
336+
if not manifests[architecture]["labels"]:
337+
log.warning("Labels section not found on image {}:{}/{}", image, tag, architecture)
338+
339+
await self._read_manifest(image, tag, manifests)
340+
341+
async def _preprocess_manifest(
342+
self,
343+
sess: aiohttp.ClientSession,
344+
manifest: Mapping[str, Any],
345+
rqst_args: Mapping[str, Any],
346+
image: str,
347+
) -> dict[str, Any]:
348+
"""
349+
Extracts informations from
350+
[Docker iamge manifest](https://github.com/openshift/docker-distribution/blob/master/docs/spec/manifest-v2-2.md#example-image-manifest)
351+
required by Backend.AI.
352+
"""
353+
config_digest = manifest["config"]["digest"]
354+
size_bytes = sum(layer["size"] for layer in manifest["layers"]) + manifest["config"]["size"]
355+
356+
async with sess.get(
357+
self.registry_url / f"v2/{image}/blobs/{config_digest}", **rqst_args
358+
) as resp:
359+
resp.raise_for_status()
360+
data = json.loads(await resp.read())
361+
labels = {}
362+
363+
# we should favor `config` instead of `container_config` since `config` can contain additional datas
364+
# set when commiting image via `--change` flag
365+
if _config_labels := data.get("config", {}).get("Labels"):
366+
labels = _config_labels
367+
elif _container_config_labels := data.get("container_config", {}).get("Labels"):
368+
labels = _container_config_labels
369+
370+
return {
371+
"size": size_bytes,
372+
"labels": labels,
373+
"digest": config_digest,
374+
}
375+
376+
async def _process_oci_index(
377+
self,
378+
tg: aiotools.TaskGroup,
379+
sess: aiohttp.ClientSession,
380+
rqst_args: Mapping[str, Any],
381+
image: str,
382+
tag: str,
383+
image_info: Mapping[str, Any],
384+
) -> None:
385+
manifest_list = [
386+
item
387+
for item in image_info["manifests"]
388+
if "annotations" not in item # skip attestation manifests
389+
]
390+
rqst_args["headers"]["Accept"] = self.MEDIA_TYPE_OCI_MANIFEST
391+
392+
await self._read_manifest_list(sess, manifest_list, rqst_args, image, tag)
393+
394+
async def _process_docker_v2_multiplatform_image(
395+
self,
396+
tg: aiotools.TaskGroup,
397+
sess: aiohttp.ClientSession,
398+
rqst_args: Mapping[str, Any],
399+
image: str,
400+
tag: str,
401+
image_info: Mapping[str, Any],
402+
) -> None:
403+
manifest_list = image_info["manifests"]
404+
rqst_args["headers"]["Accept"] = self.MEDIA_TYPE_DOCKER_MANIFEST
405+
406+
await self._read_manifest_list(
407+
sess,
408+
manifest_list,
409+
rqst_args,
410+
image,
411+
tag,
412+
)
413+
414+
async def _process_docker_v2_image(
415+
self,
416+
tg: aiotools.TaskGroup,
417+
sess: aiohttp.ClientSession,
418+
rqst_args: Mapping[str, Any],
419+
image: str,
420+
tag: str,
421+
image_info: Mapping[str, Any],
422+
) -> None:
423+
config_digest = image_info["config"]["digest"]
424+
rqst_args["headers"]["Accept"] = self.MEDIA_TYPE_DOCKER_MANIFEST
425+
426+
async with sess.get(
427+
self.registry_url / f"v2/{image}/blobs/{config_digest}",
428+
**rqst_args,
429+
) as resp:
430+
resp.raise_for_status()
431+
blob_data = json.loads(await resp.read())
432+
433+
manifest_arch = blob_data["architecture"]
434+
architecture = arch_name_aliases.get(manifest_arch, manifest_arch)
435+
436+
manifests = {
437+
architecture: await self._preprocess_manifest(sess, image_info, rqst_args, image),
438+
}
439+
await self._read_manifest(image, tag, manifests)
340440

341441
async def _read_manifest(
342442
self,
@@ -345,6 +445,9 @@ async def _read_manifest(
345445
manifests: dict[str, dict],
346446
skip_reason: Optional[str] = None,
347447
) -> None:
448+
"""
449+
Detects if image is compatible with Backend.AI and injects the matadata to database if it complies.
450+
"""
348451
if not manifests:
349452
if not skip_reason:
350453
skip_reason = "missing/deleted"

src/ai/backend/manager/container_registry/harbor.py

+16-12
Original file line numberDiff line numberDiff line change
@@ -263,15 +263,15 @@ async def _scan_image(
263263
match image_info["manifest_media_type"]:
264264
case self.MEDIA_TYPE_OCI_INDEX:
265265
await self._process_oci_index(
266-
tg, sess, rqst_args, image, image_info
266+
tg, sess, rqst_args, image, tag, image_info
267267
)
268268
case self.MEDIA_TYPE_DOCKER_MANIFEST_LIST:
269269
await self._process_docker_v2_multiplatform_image(
270-
tg, sess, rqst_args, image, image_info
270+
tg, sess, rqst_args, image, tag, image_info
271271
)
272272
case self.MEDIA_TYPE_DOCKER_MANIFEST:
273273
await self._process_docker_v2_image(
274-
tg, sess, rqst_args, image, image_info
274+
tg, sess, rqst_args, image, tag, image_info
275275
)
276276
case _ as media_type:
277277
raise RuntimeError(
@@ -312,15 +312,19 @@ async def _scan_tag(
312312
resp.raise_for_status()
313313
resp_json = await resp.json()
314314
async with aiotools.TaskGroup() as tg:
315+
tag = resp_json["tags"][0]["name"]
316+
315317
match resp_json["manifest_media_type"]:
316318
case self.MEDIA_TYPE_OCI_INDEX:
317-
await self._process_oci_index(tg, sess, rqst_args, image, resp_json)
319+
await self._process_oci_index(tg, sess, rqst_args, image, tag, resp_json)
318320
case self.MEDIA_TYPE_DOCKER_MANIFEST_LIST:
319321
await self._process_docker_v2_multiplatform_image(
320-
tg, sess, rqst_args, image, resp_json
322+
tg, sess, rqst_args, image, tag, resp_json
321323
)
322324
case self.MEDIA_TYPE_DOCKER_MANIFEST:
323-
await self._process_docker_v2_image(tg, sess, rqst_args, image, resp_json)
325+
await self._process_docker_v2_image(
326+
tg, sess, rqst_args, image, tag, resp_json
327+
)
324328
case _ as media_type:
325329
raise RuntimeError(f"Unsupported artifact media-type: {media_type}")
326330

@@ -330,14 +334,14 @@ async def _process_oci_index(
330334
sess: aiohttp.ClientSession,
331335
_rqst_args: Mapping[str, Any],
332336
image: str,
337+
tag: str,
333338
image_info: Mapping[str, Any],
334339
) -> None:
335340
rqst_args = dict(_rqst_args)
336341
if not rqst_args.get("headers"):
337342
rqst_args["headers"] = {}
338343
rqst_args["headers"].update({"Accept": "application/vnd.oci.image.manifest.v1+json"})
339344
digests: list[tuple[str, str]] = []
340-
tag_name = image_info["tags"][0]["name"]
341345
for reference in image_info["references"]:
342346
if (
343347
reference["platform"]["os"] == "unknown"
@@ -355,7 +359,7 @@ async def _process_oci_index(
355359
rqst_args,
356360
image,
357361
digest=digest,
358-
tag=tag_name,
362+
tag=tag,
359363
architecture=architecture,
360364
)
361365
)
@@ -366,6 +370,7 @@ async def _process_docker_v2_multiplatform_image(
366370
sess: aiohttp.ClientSession,
367371
_rqst_args: Mapping[str, Any],
368372
image: str,
373+
tag: str,
369374
image_info: Mapping[str, Any],
370375
) -> None:
371376
rqst_args = dict(_rqst_args)
@@ -375,7 +380,6 @@ async def _process_docker_v2_multiplatform_image(
375380
"Accept": "application/vnd.docker.distribution.manifest.v2+json"
376381
})
377382
digests: list[tuple[str, str]] = []
378-
tag_name = image_info["tags"][0]["name"]
379383
for reference in image_info["references"]:
380384
if (
381385
reference["platform"]["os"] == "unknown"
@@ -393,7 +397,7 @@ async def _process_docker_v2_multiplatform_image(
393397
rqst_args,
394398
image,
395399
digest=digest,
396-
tag=tag_name,
400+
tag=tag,
397401
architecture=architecture,
398402
)
399403
)
@@ -404,6 +408,7 @@ async def _process_docker_v2_image(
404408
sess: aiohttp.ClientSession,
405409
_rqst_args: Mapping[str, Any],
406410
image: str,
411+
tag: str,
407412
image_info: Mapping[str, Any],
408413
) -> None:
409414
rqst_args = dict(_rqst_args)
@@ -414,14 +419,13 @@ async def _process_docker_v2_image(
414419
})
415420
if (reporter := progress_reporter.get()) is not None:
416421
reporter.total_progress += 1
417-
tag_name = image_info["tags"][0]["name"]
418422
async with aiotools.TaskGroup() as tg:
419423
tg.create_task(
420424
self._harbor_scan_tag_single_arch(
421425
sess,
422426
rqst_args,
423427
image,
424-
tag=tag_name,
428+
tag=tag,
425429
)
426430
)
427431

0 commit comments

Comments
 (0)