apache
diff --git a/‎sdks/python/apache_beam/yaml/main.py
+130-7 b/‎sdks/python/apache_beam/yaml/main.py
+130-7
diff --git a/‎sdks/python/apache_beam/yaml/main_test.py
+114 b/‎sdks/python/apache_beam/yaml/main_test.py
+114
diff --git a/‎sdks/python/apache_beam/yaml/readme_test.py
+25-1 b/‎sdks/python/apache_beam/yaml/readme_test.py
+25-1
@@ -18,6 +18,9 @@
 import argparse
 import contextlib
 import json
+import os
+import sys
+import unittest
 
 import yaml
 
@@ -26,7 +29,9 @@
 from apache_beam.transforms import resources
 from apache_beam.typehints.schemas import LogicalType
 from apache_beam.typehints.schemas import MillisInstant
+from apache_beam.yaml import yaml_testing
 from apache_beam.yaml import yaml_transform
+from apache_beam.yaml import yaml_utils
 
 
 def _preparse_jinja_flags(argv):
@@ -90,6 +95,28 @@ def _parse_arguments(argv):
       type=json.loads,
       help='A json dict of variables used when invoking the jinja preprocessor '
       'on the provided yaml pipeline.')
+  parser.add_argument(
+      '--test',
+      action=argparse.BooleanOptionalAction,
+      help='Run the tests associated with the given pipeline, rather than the '
+      'pipeline itself.')
+  parser.add_argument(
+      '--fix_tests',
+      action=argparse.BooleanOptionalAction,
+      help='Update failing test expectations to match the actual ouput. '
+      'Requires --test_suite if the pipeline uses jinja formatting.')
+  parser.add_argument(
+      '--create_test',
+      action=argparse.BooleanOptionalAction,
+      help='Automatically creates a regression test for the given pipeline, '
+      'adding it to the pipeline spec or test suite dependon on whether '
+      '--test_suite is given. '
+      'Requires --test_suite if the pipeline uses jinja formatting.')
+  parser.add_argument(
+      '--test_suite',
+      help='Run the given tests against the given pipeline, rather than the '
+      'pipeline itself. '
+      'Should be a file containing a list of yaml test specifications.')
   return parser.parse_known_args(argv)
 
 
@@ -130,12 +157,109 @@ def run(argv=None):
       print('Running pipeline...')
 
 
-def build_pipeline_components_from_argv(argv):
+def run_tests(argv=None, exit=True):
+  known_args, pipeline_args, _, pipeline_yaml = _build_pipeline_yaml_from_argv(
+      argv)
+  pipeline_spec = yaml.load(pipeline_yaml, Loader=yaml_transform.SafeLineLoader)
+  options = _build_pipeline_options(pipeline_spec, pipeline_args)
+
+  if known_args.create_test and known_args.fix_tests:
+    raise ValueError(
+        'At most one of --create_test and --fix_tests may be specified.')
+  elif known_args.create_test:
+    result = unittest.TestResult()
+    tests = []
+  else:
+    if known_args.test_suite:
+      with open(known_args.test_suite) as fin:
+        test_suite_holder = yaml.load(
+            fin, Loader=yaml_transform.SafeLineLoader) or {}
+    else:
+      test_suite_holder = pipeline_spec
+    test_specs = test_suite_holder.get('tests', [])
+    if not isinstance(test_specs, list):
+      raise TypeError('tests attribute must be a list of test specifications.')
+    elif not test_specs:
+      raise RuntimeError(
+          'No tests found. '
+          "If you haven't added a set of tests yet, you can get started by "
+          'running your pipeline with the --create_test flag enabled.')
+
+    with _fix_xlang_instant_coding():
+      tests = [
+          yaml_testing.YamlTestCase(
+              pipeline_spec, test_spec, options, known_args.fix_tests)
+          for test_spec in test_specs
+      ]
+      suite = unittest.TestSuite(tests)
+      result = unittest.TextTestRunner().run(suite)
+
+  if known_args.fix_tests or known_args.create_test:
+    update_tests(known_args, pipeline_yaml, pipeline_spec, options, tests)
+
+  if exit:
+    # emulates unittest.main()
+    sys.exit(0 if result.wasSuccessful() else 1)
+  else:
+    if not result.wasSuccessful():
+      raise RuntimeError(result)
+
+
+def update_tests(known_args, pipeline_yaml, pipeline_spec, options, tests):
+  if known_args.test_suite:
+    path = known_args.test_suite
+    if not os.path.exists(path) and known_args.create_test:
+      with open(path, 'w') as fout:
+        fout.write('tests: []')
+  elif known_args.yaml_pipeline_file:
+    path = known_args.yaml_pipeline_file
+  else:
+    raise RuntimeError(
+        'Test fixing only supported for file-backed tests. '
+        'Please use the --test_suite flag.')
+  with open(path) as fin:
+    original_yaml = fin.read()
+  if path == known_args.yaml_pipeline_file and pipeline_yaml.strip(
+  ) != original_yaml.strip():
+    raise RuntimeError(
+        'In-file test fixing not yet supported for templated pipelines. '
+        'Please use the --test_suite flag.')
+  updated_spec = yaml.load(original_yaml, Loader=yaml.SafeLoader) or {}
+
+  if known_args.fix_tests:
+    updated_spec['tests'] = [test.fixed_test() for test in tests]
+
+  if known_args.create_test:
+    if 'tests' not in updated_spec:
+      updated_spec['tests'] = []
+    updated_spec['tests'].append(
+        yaml_testing.create_test(pipeline_spec, options))
+
+  updated_yaml = yaml_utils.patch_yaml(original_yaml, updated_spec)
+  with open(path, 'w') as fout:
+    fout.write(updated_yaml)
+
+
+def _build_pipeline_yaml_from_argv(argv):
   argv = _preparse_jinja_flags(argv)
   known_args, pipeline_args = _parse_arguments(argv)
   pipeline_template = _pipeline_spec_from_args(known_args)
   pipeline_yaml = yaml_transform.expand_jinja(
       pipeline_template, known_args.jinja_variables or {})
+  return known_args, pipeline_args, pipeline_template, pipeline_yaml
+
+
+def _build_pipeline_options(pipeline_spec, pipeline_args):
+  return beam.options.pipeline_options.PipelineOptions(
+      pipeline_args,
+      pickle_library='cloudpickle',
+      **yaml_transform.SafeLineLoader.strip_metadata(
+          pipeline_spec.get('options', {})))
+
+
+def build_pipeline_components_from_argv(argv):
+  (known_args, pipeline_args, pipeline_template,
+   pipeline_yaml) = _build_pipeline_yaml_from_argv(argv)
   display_data = {
       'yaml': pipeline_yaml,
       'yaml_jinja_template': pipeline_template,
@@ -154,11 +278,7 @@ def build_pipeline_components_from_yaml(
     pipeline_yaml, pipeline_args, validate_schema='generic', pipeline_path=''):
   pipeline_spec = yaml.load(pipeline_yaml, Loader=yaml_transform.SafeLineLoader)
 
-  options = beam.options.pipeline_options.PipelineOptions(
-      pipeline_args,
-      pickle_library='cloudpickle',
-      **yaml_transform.SafeLineLoader.strip_metadata(
-          pipeline_spec.get('options', {})))
+  options = _build_pipeline_options(pipeline_spec, pipeline_args)
 
   def constructor(root):
     if 'resource_hints' in pipeline_spec.get('pipeline', {}):
@@ -180,4 +300,7 @@ def constructor(root):
 if __name__ == '__main__':
   import logging
   logging.getLogger().setLevel(logging.INFO)
-  run()
+  if '--test' in sys.argv:
+    run_tests()
+  else:
+    run()
@@ -38,6 +38,44 @@
     - type: WriteToText
       config:
         path: PATH
+
+tests:
+  - name: InlineTest
+    mock_outputs:
+      - name: Create
+        elements: ['a', 'b', 'c']
+    expected_inputs:
+      - name: WriteToText
+        elements:
+          - {element: a}
+          - {element: b}
+          - {element: c}
+'''
+
+PASSING_TEST_SUITE = '''
+tests:
+  - name: ExternalTest  # comment
+    mock_outputs:
+      - name: Create
+        elements: ['a', 'b', 'c']
+    expected_inputs:
+      - name: WriteToText
+        elements:
+          - element: a
+          - element: b
+          - element: c
+'''
+
+FAILING_TEST_SUITE = '''
+tests:
+  - name: ExternalTest  # comment
+    mock_outputs:
+      - name: Create
+        elements: ['a', 'b', 'c']
+    expected_inputs:
+      - name: WriteToText
+        elements:
+          - element: x
 '''
 
 
@@ -113,6 +151,82 @@ def test_jinja_datetime(self):
         self.assertEqual(
             fin.read().strip(), datetime.datetime.now().strftime("%Y-%m-%d"))
 
+  def test_inline_test_specs(self):
+    main.run_tests(['--yaml_pipeline', TEST_PIPELINE, '--test'], exit=False)
+
+  def test_external_test_specs(self):
+    with tempfile.TemporaryDirectory() as tmpdir:
+      good_suite = os.path.join(tmpdir, 'good.yaml')
+      with open(good_suite, 'w') as fout:
+        fout.write(PASSING_TEST_SUITE)
+      bad_suite = os.path.join(tmpdir, 'bad.yaml')
+      with open(bad_suite, 'w') as fout:
+        fout.write(FAILING_TEST_SUITE)
+
+      # Must pass.
+      main.run_tests([
+          '--yaml_pipeline',
+          TEST_PIPELINE,
+          '--test_suite',
+          good_suite,
+      ],
+                     exit=False)
+
+      # Must fail. (Ensures testing is not a no-op.)
+      with self.assertRaisesRegex(Exception, 'errors=1 failures=0'):
+        main.run_tests([
+            '--yaml_pipeline',
+            TEST_PIPELINE,
+            '--test_suite',
+            bad_suite,
+        ],
+                       exit=False)
+
+  def test_fix_suite(self):
+    with tempfile.TemporaryDirectory() as tmpdir:
+      test_suite = os.path.join(tmpdir, 'tests.yaml')
+      with open(test_suite, 'w') as fout:
+        fout.write(FAILING_TEST_SUITE)
+
+      main.run_tests([
+          '--yaml_pipeline',
+          TEST_PIPELINE,
+          '--test_suite',
+          test_suite,
+          '--fix_tests'
+      ],
+                     exit=False)
+
+      with open(test_suite) as fin:
+        self.assertEqual(fin.read(), PASSING_TEST_SUITE)
+
+  def test_create_test(self):
+    with tempfile.TemporaryDirectory() as tmpdir:
+      test_suite = os.path.join(tmpdir, 'tests.yaml')
+      with open(test_suite, 'w') as fout:
+        fout.write('')
+
+      main.run_tests([
+          '--yaml_pipeline',
+          TEST_PIPELINE.replace('ELEMENT', 'x'),
+          '--test_suite',
+          test_suite,
+          '--create_test'
+      ],
+                     exit=False)
+
+      with open(test_suite) as fin:
+        self.assertEqual(
+            fin.read(),
+            '''
+tests:
+- mock_outputs: []
+  expected_inputs:
+  - name: WriteToText
+    elements:
+    - element: x
+'''.lstrip())
+
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
 
@@ -34,7 +34,9 @@
 from apache_beam.options.pipeline_options import PipelineOptions
 from apache_beam.typehints import trivial_inference
 from apache_beam.yaml import yaml_provider
+from apache_beam.yaml import yaml_testing
 from apache_beam.yaml import yaml_transform
+from apache_beam.yaml import yaml_utils
 
 
 class FakeSql(beam.PTransform):
@@ -288,6 +290,7 @@ def extract_name(input_spec):
     return input_spec.get('name', input_spec.get('type'))
 
   code_lines = None
+  last_pipeline = None
   for ix, line in enumerate(markdown_lines):
     line = line.rstrip()
     if line == '```':
@@ -320,12 +323,30 @@ def extract_name(input_spec):
             ] + ['    ' + line for line in code_lines]
           if code_lines[0] == 'pipeline:':
             yaml_pipeline = '\n'.join(code_lines)
-            if 'providers:' in yaml_pipeline:
+            last_pipeline = yaml_pipeline
+            if 'providers:' in yaml_pipeline or 'tests:' in yaml_pipeline:
               test_type = 'PARSE'
             yield test_name, create_test_method(
                 test_type,
                 test_name,
                 yaml_pipeline)
+          if 'tests:' in code_lines:
+            test_spec = '\n'.join(code_lines)
+            if code_lines[0] == 'pipeline:':
+              yaml_pipeline = '\n'.join(code_lines)
+            else:
+              yaml_pipeline = last_pipeline
+            for sub_ix, test_spec in enumerate(yaml.load(
+                '\n'.join(code_lines),
+                Loader=yaml_utils.SafeLineLoader)['tests']):
+              suffix = test_spec.get('name', str(sub_ix))
+              yield (
+                  test_name + '_' + suffix,
+                  # The yp=... ts=... is to capture the looped closure values.
+                  lambda _,
+                  yp=yaml_pipeline,
+                  ts=test_spec: yaml_testing.run_test(yp, ts))
+
         code_lines = None
     elif code_lines is not None:
       code_lines.append(line)
@@ -358,6 +379,9 @@ def createTestSuite(name, path):
 JoinTest = createTestSuite(
     'JoinTest', os.path.join(YAML_DOCS_DIR, 'yaml-join.md'))
 
+TestingTest = createTestSuite(
+    'TestingTest', os.path.join(YAML_DOCS_DIR, 'yaml-testing.md'))
+
 if __name__ == '__main__':
   parser = argparse.ArgumentParser()
   parser.add_argument('--render_dir', default=None)