Skip to content

Commit ec4012c

Browse files
authored
Merge branch 'main' into separate-snowflake-source
2 parents 78695d1 + 88a0348 commit ec4012c

39 files changed

+754
-236
lines changed

docs/how-to-guides/feathr-configuration-and-env.md

Lines changed: 59 additions & 53 deletions
Large diffs are not rendered by default.

docs/samples/customer360/Customer360.ipynb

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -194,8 +194,8 @@
194194
" - 'REDIS_PASSWORD'\n",
195195
" - 'ADLS_ACCOUNT'\n",
196196
" - 'ADLS_KEY'\n",
197-
" - 'WASB_ACCOUNT'\n",
198-
" - 'WASB_KEY'\n",
197+
" - 'BLOB_ACCOUNT'\n",
198+
" - 'BLOB_KEY'\n",
199199
" - 'DATABRICKS_WORKSPACE_TOKEN_VALUE '\n",
200200
" \n",
201201
"offline_store:\n",
@@ -328,8 +328,8 @@
328328
"os.environ['REDIS_PASSWORD'] = ''\n",
329329
"os.environ['ADLS_ACCOUNT'] = ''\n",
330330
"os.environ['ADLS_KEY'] = ''\n",
331-
"os.environ['WASB_ACCOUNT'] = \"\"\n",
332-
"os.environ['WASB_KEY'] = ''\n",
331+
"os.environ['BLOB_ACCOUNT'] = \"\"\n",
332+
"os.environ['BLOB_KEY'] = ''\n",
333333
"os.environ['DATABRICKS_WORKSPACE_TOKEN_VALUE'] = ''"
334334
]
335335
},

feathr_project/feathr/client.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
from feathr.utils._file_utils import write_to_file
3333
from feathr.utils.feature_printer import FeaturePrinter
3434
from feathr.utils.spark_job_params import FeatureGenerationJobParams, FeatureJoinJobParams
35+
from feathr.definition.source import InputContext
3536

3637

3738
class FeathrClient(object):
@@ -633,8 +634,15 @@ def materialize_features(self, settings: MaterializationSettings, execution_conf
633634
allow_materialize_non_agg_feature: Materializing non-aggregated features (the features without WindowAggTransformation) doesn't output meaningful results so it's by default set to False, but if you really want to materialize non-aggregated features, set this to True.
634635
"""
635636
feature_list = settings.feature_names
636-
if len(feature_list) > 0 and not self._valid_materialize_keys(feature_list):
637-
raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.")
637+
if len(feature_list) > 0:
638+
if 'anchor_list' in dir(self):
639+
anchors = [anchor for anchor in self.anchor_list if isinstance(anchor.source, InputContext)]
640+
anchor_feature_names = set(feature.name for anchor in anchors for feature in anchor.features)
641+
for feature in feature_list:
642+
if feature in anchor_feature_names:
643+
raise RuntimeError(f"Materializing features that are defined on INPUT_CONTEXT is not supported. {feature} is defined on INPUT_CONTEXT so you should remove it from the feature list in MaterializationSettings.")
644+
if not self._valid_materialize_keys(feature_list):
645+
raise RuntimeError(f"Invalid materialization features: {feature_list}, since they have different keys. Currently Feathr only supports materializing features of the same keys.")
638646

639647
if not allow_materialize_non_agg_feature:
640648
# Check if there are non-aggregation features in the list

feathr_project/feathr/constants.py

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,6 @@
2828
TYPEDEF_ARRAY_DERIVED_FEATURE=f"array<feathr_derived_feature_{REGISTRY_TYPEDEF_VERSION}>"
2929
TYPEDEF_ARRAY_ANCHOR_FEATURE=f"array<feathr_anchor_feature_{REGISTRY_TYPEDEF_VERSION}>"
3030

31-
# Decouple Feathr MAVEN Version from Feathr Python SDK Version
32-
import os
33-
from feathr.version import __version__
34-
FEATHR_MAVEN_VERSION = os.environ.get("FEATHR_MAVEN_VERSION", __version__)
35-
FEATHR_MAVEN_ARTIFACT=f"com.linkedin.feathr:feathr_2.12:{FEATHR_MAVEN_VERSION}"
3631

3732
JOIN_CLASS_NAME="com.linkedin.feathr.offline.job.FeatureJoinJob"
3833
GEN_CLASS_NAME="com.linkedin.feathr.offline.job.FeatureGenJob"

feathr_project/feathr/registry/_feature_registry_purview.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -758,8 +758,7 @@ def upload_single_entity_to_purview(self,entity:Union[AtlasEntity,AtlasProcess])
758758
"""
759759
Try to find existing entity/process first, if found, return the existing entity's GUID
760760
"""
761-
id = self.get_entity_id(entity.qualifiedName)
762-
response = self.purview_client.get_entity(id)['entities'][0]
761+
response = self.purview_client.get_entity(qualifiedName=entity.qualifiedName)['entities'][0]
763762
j = entity.to_json()
764763
if j["typeName"] == response["typeName"]:
765764
if j["typeName"] == "Process":

feathr_project/feathr/spark_provider/_databricks_submission.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from databricks_cli.runs.api import RunsApi
1616
from databricks_cli.sdk.api_client import ApiClient
1717
from feathr.constants import *
18+
from feathr.version import get_maven_artifact_fullname
1819
from feathr.spark_provider._abc import SparkJobLauncher
1920
from loguru import logger
2021
from requests.structures import CaseInsensitiveDict
@@ -166,8 +167,8 @@ def submit_feathr_job(self, job_name: str, main_jar_path: str, main_class_name:
166167

167168
# the feathr main jar file is anyway needed regardless it's pyspark or scala spark
168169
if not main_jar_path:
169-
logger.info(f"Main JAR file is not set, using default package '{FEATHR_MAVEN_ARTIFACT}' from Maven")
170-
submission_params['libraries'][0]['maven'] = { "coordinates": FEATHR_MAVEN_ARTIFACT }
170+
logger.info(f"Main JAR file is not set, using default package '{get_maven_artifact_fullname()}' from Maven")
171+
submission_params['libraries'][0]['maven'] = { "coordinates": get_maven_artifact_fullname() }
171172
else:
172173
submission_params['libraries'][0]['jar'] = self.upload_or_get_cloud_path(main_jar_path)
173174
# see here for the submission parameter definition https://docs.microsoft.com/en-us/azure/databricks/dev-tools/api/2.0/jobs#--request-structure-6

feathr_project/feathr/spark_provider/_localspark_submission.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from loguru import logger
1111
from pyspark import *
1212

13-
from feathr.constants import FEATHR_MAVEN_ARTIFACT
13+
from feathr.version import get_maven_artifact_fullname
1414
from feathr.spark_provider._abc import SparkJobLauncher
1515

1616

@@ -77,7 +77,7 @@ def submit_feathr_job(
7777

7878
# Get conf and package arguments
7979
cfg = configuration.copy() if configuration else {}
80-
maven_dependency = f"{cfg.pop('spark.jars.packages', self.packages)},{FEATHR_MAVEN_ARTIFACT}"
80+
maven_dependency = f"{cfg.pop('spark.jars.packages', self.packages)},{get_maven_artifact_fullname()}"
8181
spark_args = self._init_args(job_name=job_name, confs=cfg)
8282

8383
if not main_jar_path:
@@ -86,7 +86,7 @@ def submit_feathr_job(
8686
# This is a JAR job
8787
# Azure Synapse/Livy doesn't allow JAR job starts from Maven directly, we must have a jar file uploaded.
8888
# so we have to use a dummy jar as the main file.
89-
logger.info(f"Main JAR file is not set, using default package '{FEATHR_MAVEN_ARTIFACT}' from Maven")
89+
logger.info(f"Main JAR file is not set, using default package '{get_maven_artifact_fullname()}' from Maven")
9090
# Use the no-op jar as the main file
9191
# This is a dummy jar which contains only one `org.example.Noop` class with one empty `main` function
9292
# which does nothing

feathr_project/feathr/spark_provider/_synapse_submission.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222

2323
from feathr.spark_provider._abc import SparkJobLauncher
2424
from feathr.constants import *
25+
from feathr.version import get_maven_artifact_fullname
2526

2627
class LivyStates(Enum):
2728
""" Adapt LivyStates over to relax the dependency for azure-synapse-spark pacakge.
@@ -114,12 +115,12 @@ def submit_feathr_job(self, job_name: str, main_jar_path: str = None, main_clas
114115
if not main_jar_path:
115116
# We don't have the main jar, use Maven
116117
# Add Maven dependency to the job configuration
117-
logger.info(f"Main JAR file is not set, using default package '{FEATHR_MAVEN_ARTIFACT}' from Maven")
118+
logger.info(f"Main JAR file is not set, using default package '{get_maven_artifact_fullname()}' from Maven")
118119
if "spark.jars.packages" in cfg:
119120
cfg["spark.jars.packages"] = ",".join(
120-
[cfg["spark.jars.packages"], FEATHR_MAVEN_ARTIFACT])
121+
[cfg["spark.jars.packages"], get_maven_artifact_fullname()])
121122
else:
122-
cfg["spark.jars.packages"] = FEATHR_MAVEN_ARTIFACT
123+
cfg["spark.jars.packages"] = get_maven_artifact_fullname()
123124

124125
if not python_files:
125126
# This is a JAR job

feathr_project/feathr/version.py

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,10 @@
1-
__version__ = "0.9.0-rc2"
1+
__version__ = "0.9.0-rc2"
2+
3+
def get_version():
4+
return __version__
5+
6+
# Decouple Feathr MAVEN Version from Feathr Python SDK Version
7+
import os
8+
def get_maven_artifact_fullname():
9+
maven_artifact_version = os.environ.get("MAVEN_ARTIFACT_VERSION", __version__)
10+
return f"com.linkedin.feathr:feathr_2.12:{maven_artifact_version}"

feathr_project/feathrcli/data/feathr_user_workspace/feathr_config.yaml

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ project_config:
2525
# the environemnt variables are optional, however you will need them if you want to use some of the services:
2626
- ADLS_ACCOUNT
2727
- ADLS_KEY
28-
- WASB_ACCOUNT
29-
- WASB_KEY
28+
- BLOB_ACCOUNT
29+
- BLOB_KEY
3030
- S3_ACCESS_KEY
3131
- S3_SECRET_KEY
3232
- JDBC_TABLE
@@ -41,7 +41,7 @@ offline_store:
4141
adls_enabled: true
4242

4343
# paths starts with wasb:// or wasbs://
44-
# WASB_ACCOUNT and WASB_KEY should be set in environment variable
44+
# BLOB_ACCOUNT and BLOB_KEY should be set in environment variable
4545
wasb:
4646
wasb_enabled: true
4747

@@ -118,8 +118,8 @@ feature_registry:
118118
delimiter: "__"
119119
# controls whether the type system will be initialized or not. Usually this is only required to be executed once.
120120
type_system_initialization: false
121-
122-
121+
122+
123123
secrets:
124124
azure_key_vault:
125125
name: feathrazuretest3-kv

feathr_project/test/test_feature_materialization.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@
1818
from test_fixture import basic_test_setup
1919
from test_fixture import get_online_test_table_name
2020
from test_utils.constants import Constants
21+
from logging import raiseExceptions
22+
import pytest
2123

2224
def test_feature_materialization_config():
2325
backfill_time = BackfillTime(start=datetime(2020, 5, 20), end=datetime(2020, 5,20), step=timedelta(days=1))
@@ -255,4 +257,21 @@ def test_delete_feature_from_redis():
255257
res = client.get_online_features(online_test_table, '265', ['f_location_avg_fare'])
256258

257259
assert len(res) == 1
258-
assert res[0] == None
260+
assert res[0] == None
261+
262+
def test_feature_list_on_input_context():
263+
with pytest.raises(RuntimeError) as e_info:
264+
test_workspace_dir = Path(__file__).parent.resolve() / "test_user_workspace"
265+
266+
client: FeathrClient = basic_test_setup(os.path.join(test_workspace_dir, "feathr_config.yaml"))
267+
online_test_table = get_online_test_table_name('nycTaxiCITableDeletion')
268+
redisSink = RedisSink(table_name=online_test_table)
269+
settings = MaterializationSettings(name="py_udf",
270+
sinks=[redisSink],
271+
feature_names=[
272+
"f_location_avg_fare",
273+
"f_day_of_week"
274+
])
275+
client.materialize_features(settings, allow_materialize_non_agg_feature=True)
276+
assert e_info is not None
277+
assert e_info.value.args[0] == "Materializing features that are defined on INPUT_CONTEXT is not supported. f_day_of_week is defined on INPUT_CONTEXT so you should remove it from the feature list in MaterializationSettings."

feathr_project/test/test_user_workspace/feathr_config.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ project_config:
2525
# the environemnt variables are optional, however you will need them if you want to use some of the services:
2626
- ADLS_ACCOUNT
2727
- ADLS_KEY
28-
- WASB_ACCOUNT
29-
- WASB_KEY
28+
- BLOB_ACCOUNT
29+
- BLOB_KEY
3030
- S3_ACCESS_KEY
3131
- S3_SECRET_KEY
3232
- JDBC_TABLE
@@ -41,7 +41,7 @@ offline_store:
4141
adls_enabled: true
4242

4343
# paths starts with wasb:// or wasbs://
44-
# WASB_ACCOUNT and WASB_KEY should be set in environment variable
44+
# BLOB_ACCOUNT and BLOB_KEY should be set in environment variable
4545
wasb:
4646
wasb_enabled: true
4747

feathr_project/test/test_user_workspace/feathr_config_local.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,8 @@ project_config:
1717
# the environemnt variables are optional, however you will need them if you want to use some of the services:
1818
- ADLS_ACCOUNT
1919
- ADLS_KEY
20-
- WASB_ACCOUNT
21-
- WASB_KEY
20+
- BLOB_ACCOUNT
21+
- BLOB_KEY
2222
- S3_ACCESS_KEY
2323
- S3_SECRET_KEY
2424
- JDBC_TABLE

feathr_project/test/test_user_workspace/feathr_config_maven.yaml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,8 @@ project_config:
2525
# the environemnt variables are optional, however you will need them if you want to use some of the services:
2626
- ADLS_ACCOUNT
2727
- ADLS_KEY
28-
- WASB_ACCOUNT
29-
- WASB_KEY
28+
- BLOB_ACCOUNT
29+
- BLOB_KEY
3030
- S3_ACCESS_KEY
3131
- S3_SECRET_KEY
3232
- JDBC_TABLE
@@ -41,7 +41,7 @@ offline_store:
4141
adls_enabled: true
4242

4343
# paths starts with wasb:// or wasbs://
44-
# WASB_ACCOUNT and WASB_KEY should be set in environment variable
44+
# BLOB_ACCOUNT and BLOB_KEY should be set in environment variable
4545
wasb:
4646
wasb_enabled: true
4747

registry/data-models/common/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)