Skip to content

Commit 520c9d8

Browse files
authored
Merge pull request #40 from vkurpad/master
sample to demo passing images to skills
2 parents 68bbc09 + 2c30a93 commit 520c9d8

File tree

15 files changed

+1035
-0
lines changed

15 files changed

+1035
-0
lines changed

Image-Processing/BFR_Sample_Rest.ipynb

Lines changed: 658 additions & 0 deletions
Large diffs are not rendered by default.

Image-Processing/Microsoft.jpg

241 KB
Loading

Image-Processing/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# Image Processing Sample
2+
3+
Cognitive Search can enrich images with text or images with other images. This sample demonstrates how to pass images to a custom skill and return images from the custom skill back to the skillset.
4+
5+
## Redacting PII information from images
6+
7+
This sample deploys a skill to obfuscate or redact phone numbers from images. The skillset contains three skills:
8+
1. OCR
9+
2. PII detection
10+
3. Custom Skill to redact PII
11+
12+
The skillset OCR's the images and runs the extracted text through the PII detection skill to identify PII information. The custom skill then takes the image, layout text from OCR and the identified PII information to obfuscate the image. The image with the PII infomration obfuscted is then returned to the skillset and projected to the knwoledge store.
13+
14+
## Confingure the components
15+
16+
This sample contains a Azure function and a Jupyter Python3 .ipynb file. Start by deploying the Azure function and saving the URL and code.
17+
18+
The folder also contains a sample image with a phone number. Save this image to a container in a storage account. This container will be your data source for the enrichment pipeline.
19+
20+
Open the norebook in this folder and set the URL and other required variables in the first cell of the notebook, execute the cells of the notebook to configure and run the solution.
21+
22+
## Validation
23+
Once the indexer completes, you will see a container `obfuscated` in the knowledge store with the phone number redacted. For comparision the original images are stored in a container `images`.
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
.git*
2+
.vscode
3+
local.settings.json
4+
test
5+
.venv
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
# Byte-compiled / optimized / DLL files
2+
__pycache__/
3+
*.py[cod]
4+
*$py.class
5+
6+
# C extensions
7+
*.so
8+
9+
# Distribution / packaging
10+
.Python
11+
build/
12+
develop-eggs/
13+
dist/
14+
downloads/
15+
eggs/
16+
.eggs/
17+
lib/
18+
lib64/
19+
parts/
20+
sdist/
21+
var/
22+
wheels/
23+
pip-wheel-metadata/
24+
share/python-wheels/
25+
*.egg-info/
26+
.installed.cfg
27+
*.egg
28+
MANIFEST
29+
30+
# PyInstaller
31+
# Usually these files are written by a python script from a template
32+
# before PyInstaller builds the exe, so as to inject date/other infos into it.
33+
*.manifest
34+
*.spec
35+
36+
# Installer logs
37+
pip-log.txt
38+
pip-delete-this-directory.txt
39+
40+
# Unit test / coverage reports
41+
htmlcov/
42+
.tox/
43+
.nox/
44+
.coverage
45+
.coverage.*
46+
.cache
47+
nosetests.xml
48+
coverage.xml
49+
*.cover
50+
.hypothesis/
51+
.pytest_cache/
52+
53+
# Translations
54+
*.mo
55+
*.pot
56+
57+
# Django stuff:
58+
*.log
59+
local_settings.py
60+
db.sqlite3
61+
62+
# Flask stuff:
63+
instance/
64+
.webassets-cache
65+
66+
# Scrapy stuff:
67+
.scrapy
68+
69+
# Sphinx documentation
70+
docs/_build/
71+
72+
# PyBuilder
73+
target/
74+
75+
# Jupyter Notebook
76+
.ipynb_checkpoints
77+
78+
# IPython
79+
profile_default/
80+
ipython_config.py
81+
82+
# pyenv
83+
.python-version
84+
85+
# pipenv
86+
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
87+
# However, in case of collaboration, if having platform-specific dependencies or dependencies
88+
# having no cross-platform support, pipenv may install dependencies that don’t work, or not
89+
# install all needed dependencies.
90+
#Pipfile.lock
91+
92+
# celery beat schedule file
93+
celerybeat-schedule
94+
95+
# SageMath parsed files
96+
*.sage.py
97+
98+
# Environments
99+
.env
100+
.venv
101+
env/
102+
venv/
103+
ENV/
104+
env.bak/
105+
venv.bak/
106+
107+
# Spyder project settings
108+
.spyderproject
109+
.spyproject
110+
111+
# Rope project settings
112+
.ropeproject
113+
114+
# mkdocs documentation
115+
/site
116+
117+
# mypy
118+
.mypy_cache/
119+
.dmypy.json
120+
dmypy.json
121+
122+
# Pyre type checker
123+
.pyre/
124+
125+
# Azure Functions artifacts
126+
bin
127+
obj
128+
appsettings.json
129+
local.settings.json
130+
.python_packages
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"recommendations": [
3+
"ms-azuretools.vscode-azurefunctions",
4+
"ms-python.python"
5+
]
6+
}
Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
{
2+
"version": "0.2.0",
3+
"configurations": [
4+
{
5+
"name": "Attach to Python Functions",
6+
"type": "python",
7+
"request": "attach",
8+
"port": 9091,
9+
"preLaunchTask": "func: host start"
10+
}
11+
]
12+
}
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"azureFunctions.deploySubpath": ".",
3+
"azureFunctions.scmDoBuildDuringDeployment": true,
4+
"azureFunctions.pythonVenv": ".venv",
5+
"azureFunctions.projectLanguage": "Python",
6+
"azureFunctions.projectRuntime": "~2",
7+
"debug.internalConsoleOptions": "neverOpen"
8+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"version": "2.0.0",
3+
"tasks": [
4+
{
5+
"type": "func",
6+
"command": "host start",
7+
"problemMatcher": "$func-python-watch",
8+
"isBackground": true,
9+
"dependsOn": "pipInstall"
10+
},
11+
{
12+
"label": "pipInstall",
13+
"type": "shell",
14+
"osx": {
15+
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
16+
},
17+
"windows": {
18+
"command": "${config:azureFunctions.pythonVenv}\\Scripts\\python -m pip install -r requirements.txt"
19+
},
20+
"linux": {
21+
"command": "${config:azureFunctions.pythonVenv}/bin/python -m pip install -r requirements.txt"
22+
},
23+
"problemMatcher": []
24+
}
25+
]
26+
}
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import logging
2+
import cv2
3+
import numpy as np
4+
import json
5+
import os
6+
import base64
7+
import logging
8+
import azure.functions as func
9+
10+
def base64EncodeImage(image):
11+
is_success, im_buf_arr = cv2.imencode(".jpg", image)
12+
byte_im = im_buf_arr.tobytes()
13+
base64Bytes = base64.b64encode(byte_im)
14+
base64String = base64Bytes.decode('utf-8')
15+
return base64String
16+
17+
def obfuscate_data(image, factor=3.0):
18+
(h, w) = image.shape[:2]
19+
kW = int(w / factor)
20+
kH = int(h / factor)
21+
# ensure the width of the kernel is odd
22+
if kW % 2 == 0:
23+
kW -= 1
24+
# ensure the height of the kernel is odd
25+
if kH % 2 == 0:
26+
kH -= 1
27+
# apply a Gaussian blur to the input image using our computed
28+
# kernel size
29+
return cv2.GaussianBlur(image, (kW, kH), 0)
30+
31+
def main(req: func.HttpRequest) -> func.HttpResponse:
32+
logging.info('Python HTTP trigger function processed a request.')
33+
34+
try:
35+
body = json.dumps(req.get_json())
36+
except ValueError:
37+
return func.HttpResponse(
38+
"Invalid body",
39+
status_code=400
40+
)
41+
42+
if body:
43+
logging.info(body)
44+
result = compose_response(body)
45+
return func.HttpResponse(result, mimetype="application/json")
46+
else:
47+
return func.HttpResponse(
48+
"Invalid body",
49+
status_code=400
50+
)
51+
52+
53+
def compose_response(json_data):
54+
values = json.loads(json_data)['values']
55+
56+
# Prepare the Output before the loop
57+
results = {}
58+
results["values"] = []
59+
60+
for value in values:
61+
output_record = transform_value(value)
62+
if output_record != None:
63+
results["values"].append(output_record)
64+
return json.dumps(results, ensure_ascii=False)
65+
66+
## Perform an operation on a record
67+
def transform_value(value):
68+
try:
69+
recordId = value['recordId']
70+
except AssertionError as error:
71+
return None
72+
73+
# Validate the inputs
74+
try:
75+
assert ('data' in value), "'data' field is required."
76+
data = value['data']
77+
base64String = data["image"]["data"]
78+
base64Bytes = base64String.encode('utf-8')
79+
inputBytes = base64.b64decode(base64Bytes)
80+
jpg_as_np = np.frombuffer(inputBytes, dtype=np.uint8)
81+
originalImage = cv2.imdecode(jpg_as_np, flags=1)
82+
slices = []
83+
for pii_entity in data["pii_entities"]:
84+
if(pii_entity["type"] == "Phone Number"):
85+
for line in data["layoutText"]["lines"]:
86+
if(pii_entity["text"] in line["text"]):
87+
startX = line["boundingBox"][0]["x"]
88+
startY = line["boundingBox"][0]["y"]
89+
endX = line["boundingBox"][2]["x"]
90+
endY = line["boundingBox"][2]["y"]
91+
slicedImage = originalImage[startY:endY, startX:endX]
92+
if(slicedImage.size >0):
93+
fuzzy = obfuscate_data(slicedImage)
94+
originalImage[startY:endY, startX:endX] = fuzzy
95+
base64String = base64EncodeImage(slicedImage)
96+
aslice = { "$type": "file",
97+
"data": base64String
98+
}
99+
slices.append(aslice)
100+
101+
102+
except AssertionError as error:
103+
return (
104+
{
105+
"recordId": recordId,
106+
"errors": [ { "message": "Error:" + error.args[0] } ]
107+
})
108+
109+
110+
111+
return ({
112+
"recordId": recordId,
113+
"data": {
114+
"slices": slices,
115+
"original": { "$type": "file",
116+
"data": base64EncodeImage(originalImage)
117+
}
118+
}
119+
})
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
{
2+
"scriptFile": "__init__.py",
3+
"bindings": [
4+
{
5+
"authLevel": "function",
6+
"type": "httpTrigger",
7+
"direction": "in",
8+
"name": "req",
9+
"methods": [
10+
"get",
11+
"post"
12+
]
13+
},
14+
{
15+
"type": "http",
16+
"direction": "out",
17+
"name": "$return"
18+
}
19+
]
20+
}
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
{
2+
"name": "Azure"
3+
}

Image-Processing/SplitImage/host.json

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"version": "2.0",
3+
"logging": {
4+
"applicationInsights": {
5+
"samplingSettings": {
6+
"isEnabled": true,
7+
"excludedTypes": "Request"
8+
}
9+
}
10+
},
11+
"extensionBundle": {
12+
"id": "Microsoft.Azure.Functions.ExtensionBundle",
13+
"version": "[1.*, 2.0.0)"
14+
}
15+
}
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
"$schema": "http://json.schemastore.org/proxies",
3+
"proxies": {}
4+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
# DO NOT include azure-functions-worker in this file
2+
# The Python Worker is managed by Azure Functions platform
3+
# Manually managing azure-functions-worker may cause unexpected issues
4+
5+
azure-functions
6+
opencv-python

0 commit comments

Comments
 (0)