From d85f85ea6c1bb7a8b02c6446a547ca287466de28 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Mon, 2 Aug 2021 21:11:35 -0400 Subject: [PATCH 01/28] chore: skip samples checks for python 3.6/8/9 (#582) --- samples/snippets/noxfile_config.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/noxfile_config.py b/samples/snippets/noxfile_config.py index 82a6331eee..a64317c506 100644 --- a/samples/snippets/noxfile_config.py +++ b/samples/snippets/noxfile_config.py @@ -19,7 +19,7 @@ TEST_CONFIG_OVERRIDE = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + "ignored_versions": ["2.7", "3.6", "3.8", "3.9"], # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string From ef8f649c84bad27a0da092951bde071e3186b3bd Mon Sep 17 00:00:00 2001 From: WhiteSource Renovate Date: Tue, 3 Aug 2021 03:12:05 +0200 Subject: [PATCH 02/28] chore(deps): update dependency google-cloud-aiplatform to v1.3.0 (#585) --- samples/snippets/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index e7ba2097ae..76902b2424 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,3 +1,3 @@ pytest==6.2.4 google-cloud-storage>=1.26.0, <2.0.0dev -google-cloud-aiplatform==1.2.0 +google-cloud-aiplatform==1.3.0 From 193ef7d28ca9c302f86c4c295f4f468e8f02d529 Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Mon, 2 Aug 2021 19:20:34 -0600 Subject: [PATCH 03/28] chore: require CODEOWNER review and up to date branches (#587) These two lines bring the rules on this repo in line with the defaults: https://github.com/googleapis/repo-automation-bots/blob/63c858e539e1f4d9bb8ea66e12f9c0a0de5fef55/packages/sync-repo-settings/src/required-checks.json#L40-L50 --- .github/sync-repo-settings.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index 1e00173609..dc9c647dbb 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -5,6 +5,8 @@ branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. # Defaults to `master` - pattern: master + requiresCodeOwnerReviews: true + requiresStrictStatusChecks: true requiredStatusCheckContexts: - 'Kokoro' - 'cla/google' From 2a6b0a369296698f79d75e93007e4c7319f3523c Mon Sep 17 00:00:00 2001 From: Mark Date: Tue, 3 Aug 2021 11:56:09 -0700 Subject: [PATCH 04/28] =?UTF-8?q?feat:=20add=20support=20for=20export=5Fev?= =?UTF-8?q?aluated=5Fdata=5Fitems=5Fconfig=20in=20AutoMLTab=E2=80=A6=20(#5?= =?UTF-8?q?83)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit add support for export_evaluated_data_items_config in AutoMLTabularTrainingJob --- google/cloud/aiplatform/training_jobs.py | 63 ++++++++++ .../test_automl_tabular_training_jobs.py | 108 ++++++++++++++++++ 2 files changed, 171 insertions(+) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 05a9a3aeb3..8e89509246 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -2705,6 +2705,9 @@ def run( budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, disable_early_stopping: bool = False, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -2777,6 +2780,27 @@ def run( that training might stop before the entire training budget has been used, if further training does no longer brings significant improvement to the model. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. + + Expected format: + ``bq://::`` + + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` + + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. + + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -2806,6 +2830,9 @@ def run( budget_milli_node_hours=budget_milli_node_hours, model_display_name=model_display_name, disable_early_stopping=disable_early_stopping, + export_evaluated_data_items=export_evaluated_data_items, + export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, + export_evaluated_data_items_override_destination=export_evaluated_data_items_override_destination, sync=sync, ) @@ -2822,6 +2849,9 @@ def _run( budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, disable_early_stopping: bool = False, + export_evaluated_data_items: bool = False, + export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, + export_evaluated_data_items_override_destination: bool = False, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -2894,6 +2924,27 @@ def _run( that training might stop before the entire training budget has been used, if further training does no longer brings significant improvement to the model. + export_evaluated_data_items (bool): + Whether to export the test set predictions to a BigQuery table. + If False, then the export is not performed. + export_evaluated_data_items_bigquery_destination_uri (string): + Optional. URI of desired destination BigQuery table for exported test set predictions. + + Expected format: + ``bq://::
`` + + If not specified, then results are exported to the following auto-created BigQuery + table: + ``:export_evaluated_examples__.evaluated_examples`` + + Applies only if [export_evaluated_data_items] is True. + export_evaluated_data_items_override_destination (bool): + Whether to override the contents of [export_evaluated_data_items_bigquery_destination_uri], + if the table exists, for exported test set predictions. If False, and the + table exists, then the training job will fail. + + Applies only if [export_evaluated_data_items] is True and + [export_evaluated_data_items_bigquery_destination_uri] is specified. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -2940,6 +2991,18 @@ def _run( "optimizationObjectivePrecisionValue": self._optimization_objective_precision_value, } + final_export_eval_bq_uri = export_evaluated_data_items_bigquery_destination_uri + if final_export_eval_bq_uri and not final_export_eval_bq_uri.startswith( + "bq://" + ): + final_export_eval_bq_uri = f"bq://{final_export_eval_bq_uri}" + + if export_evaluated_data_items: + training_task_inputs_dict["exportEvaluatedDataItemsConfig"] = { + "destinationBigqueryUri": final_export_eval_bq_uri, + "overrideExistingTable": export_evaluated_data_items_override_destination, + } + if self._additional_experiments: training_task_inputs_dict[ "additionalExperiments" diff --git a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py index 78a99ee6e3..02ddad688b 100644 --- a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py @@ -79,6 +79,11 @@ _TEST_TRAINING_DISABLE_EARLY_STOPPING = True _TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME = "minimize-log-loss" _TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE = "classification" +_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS = True +_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI = ( + "bq://path.to.table" +) +_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION = False _TEST_ADDITIONAL_EXPERIMENTS = ["exp1", "exp2"] _TEST_TRAINING_TASK_INPUTS_DICT = { # required inputs @@ -117,6 +122,16 @@ }, struct_pb2.Value(), ) +_TEST_TRAINING_TASK_INPUTS_WITH_EXPORT_EVAL_DATA_ITEMS = json_format.ParseDict( + { + **_TEST_TRAINING_TASK_INPUTS_DICT, + "exportEvaluatedDataItemsConfig": { + "destinationBigqueryUri": _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, + "overrideExistingTable": _TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, + }, + }, + struct_pb2.Value(), +) _TEST_DATASET_NAME = "test-dataset-name" @@ -366,6 +381,99 @@ def test_run_call_pipeline_service_create( assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + @pytest.mark.parametrize("sync", [True, False]) + def test_run_call_pipeline_service_create_with_export_eval_data_items( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_tabular, + mock_model_service_get, + sync, + ): + aiplatform.init( + project=_TEST_PROJECT, + staging_bucket=_TEST_BUCKET_NAME, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTabularTrainingJob( + display_name=_TEST_DISPLAY_NAME, + optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, + optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, + column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + optimization_objective_recall_value=None, + optimization_objective_precision_value=None, + ) + + model_from_job = job.run( + dataset=mock_dataset_tabular, + target_column=_TEST_TRAINING_TARGET_COLUMN, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, + validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, + test_fraction_split=_TEST_TEST_FRACTION_SPLIT, + predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, + weight_column=_TEST_TRAINING_WEIGHT_COLUMN, + budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + export_evaluated_data_items=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS, + export_evaluated_data_items_bigquery_destination_uri=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_BIGQUERY_DESTINATION_URI, + export_evaluated_data_items_override_destination=_TEST_TRAINING_EXPORT_EVALUATED_DATA_ITEMS_OVERRIDE_DESTINATION, + sync=sync, + ) + + job.wait_for_resource_creation() + + assert job.resource_name == _TEST_PIPELINE_RESOURCE_NAME + + if not sync: + model_from_job.wait() + + true_fraction_split = gca_training_pipeline.FractionSplit( + training_fraction=_TEST_TRAINING_FRACTION_SPLIT, + validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, + test_fraction=_TEST_TEST_FRACTION_SPLIT, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + fraction_split=true_fraction_split, + predefined_split=gca_training_pipeline.PredefinedSplit( + key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME + ), + dataset_id=mock_dataset_tabular.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_tabular, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_EXPORT_EVAL_DATA_ITEMS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + assert job._gca_resource is mock_pipeline_service_get.return_value + + mock_model_service_get.assert_called_once_with(name=_TEST_MODEL_NAME) + + assert model_from_job._gca_resource is mock_model_service_get.return_value + + assert job.get_model()._gca_resource is mock_model_service_get.return_value + + assert not job.has_failed + + assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) def test_run_call_pipeline_if_no_model_display_name( From 2f138d1dfe4959d1b5f53a9dfef90a18de9908ec Mon Sep 17 00:00:00 2001 From: Morgan Du Date: Tue, 3 Aug 2021 16:21:30 -0700 Subject: [PATCH 05/28] feat: expose base_output_dir for custom job (#586) --- google/cloud/aiplatform/jobs.py | 17 +++++++- tests/unit/aiplatform/test_custom_job.py | 41 ++++++++++++++++--- .../test_hyperparameter_tuning_job.py | 6 +++ 3 files changed, 57 insertions(+), 7 deletions(-) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 6cc549027b..66b0479ced 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -59,6 +59,7 @@ study as gca_study_compat, ) + _LOGGER = base.Logger(__name__) _JOB_COMPLETE_STATES = ( @@ -930,6 +931,7 @@ def __init__( self, display_name: str, worker_pool_specs: Union[List[Dict], List[aiplatform.gapic.WorkerPoolSpec]], + base_output_dir: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, @@ -977,6 +979,9 @@ def __init__( worker_pool_specs (Union[List[Dict], List[aiplatform.gapic.WorkerPoolSpec]]): Required. The spec of the worker pools including machine type and Docker image. Can provided as a list of dictionaries or list of WorkerPoolSpec proto messages. + base_output_dir (str): + Optional. GCS output directory of job. If not provided a + timestamped directory in the staging directory will be used. project (str): Optional.Project to run the custom job in. Overrides project set in aiplatform.init. location (str): @@ -1008,12 +1013,17 @@ def __init__( "should be set using aiplatform.init(staging_bucket='gs://my-bucket')" ) + # default directory if not given + base_output_dir = base_output_dir or utils._timestamped_gcs_dir( + staging_bucket, "aiplatform-custom-job" + ) + self._gca_resource = gca_custom_job_compat.CustomJob( display_name=display_name, job_spec=gca_custom_job_compat.CustomJobSpec( worker_pool_specs=worker_pool_specs, base_output_directory=gca_io_compat.GcsDestination( - output_uri_prefix=staging_bucket + output_uri_prefix=base_output_dir ), ), encryption_spec=initializer.global_config.get_encryption_spec( @@ -1049,6 +1059,7 @@ def from_local_script( machine_type: str = "n1-standard-4", accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", accelerator_count: int = 0, + base_output_dir: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, @@ -1105,6 +1116,9 @@ def from_local_script( NVIDIA_TESLA_T4 accelerator_count (int): Optional. The number of accelerators to attach to a worker replica. + base_output_dir (str): + Optional. GCS output directory of job. If not provided a + timestamped directory in the staging directory will be used. project (str): Optional. Project to run the custom job in. Overrides project set in aiplatform.init. location (str): @@ -1170,6 +1184,7 @@ def from_local_script( return cls( display_name=display_name, worker_pool_specs=worker_pool_specs, + base_output_dir=base_output_dir, project=project, location=location, credentials=credentials, diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index de144d5241..363ad18048 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -71,6 +71,7 @@ ] _TEST_STAGING_BUCKET = "gs://test-staging-bucket" +_TEST_BASE_OUTPUT_DIR = f"{_TEST_STAGING_BUCKET}/{_TEST_DISPLAY_NAME}" # CMEK encryption _TEST_DEFAULT_ENCRYPTION_KEY_NAME = "key_default" @@ -91,7 +92,7 @@ job_spec=gca_custom_job_compat.CustomJobSpec( worker_pool_specs=_TEST_WORKER_POOL_SPEC, base_output_directory=gca_io_compat.GcsDestination( - output_uri_prefix=_TEST_STAGING_BUCKET + output_uri_prefix=_TEST_BASE_OUTPUT_DIR ), scheduling=gca_custom_job_compat.Scheduling( timeout=duration_pb2.Duration(seconds=_TEST_TIMEOUT), @@ -224,7 +225,9 @@ def test_create_custom_job(self, create_custom_job_mock, get_custom_job_mock, sy ) job = aiplatform.CustomJob( - display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC + display_name=_TEST_DISPLAY_NAME, + worker_pool_specs=_TEST_WORKER_POOL_SPEC, + base_output_dir=_TEST_BASE_OUTPUT_DIR, ) job.run( @@ -265,7 +268,9 @@ def test_run_custom_job_with_fail_raises( ) job = aiplatform.CustomJob( - display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC + display_name=_TEST_DISPLAY_NAME, + worker_pool_specs=_TEST_WORKER_POOL_SPEC, + base_output_dir=_TEST_BASE_OUTPUT_DIR, ) with pytest.raises(RuntimeError) as e: @@ -306,7 +311,9 @@ def test_run_custom_job_with_fail_at_creation(self): ) job = aiplatform.CustomJob( - display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC + display_name=_TEST_DISPLAY_NAME, + worker_pool_specs=_TEST_WORKER_POOL_SPEC, + base_output_dir=_TEST_BASE_OUTPUT_DIR, ) job.run( @@ -342,7 +349,9 @@ def test_custom_job_get_state_raises_without_run(self): ) job = aiplatform.CustomJob( - display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC + display_name=_TEST_DISPLAY_NAME, + worker_pool_specs=_TEST_WORKER_POOL_SPEC, + base_output_dir=_TEST_BASE_OUTPUT_DIR, ) with pytest.raises(RuntimeError): @@ -385,6 +394,7 @@ def test_create_from_local_script( display_name=_TEST_DISPLAY_NAME, script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, + base_output_dir=_TEST_BASE_OUTPUT_DIR, ) job.run(sync=sync) @@ -428,7 +438,9 @@ def test_create_custom_job_with_tensorboard( ) job = aiplatform.CustomJob( - display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC + display_name=_TEST_DISPLAY_NAME, + worker_pool_specs=_TEST_WORKER_POOL_SPEC, + base_output_dir=_TEST_BASE_OUTPUT_DIR, ) job.run( @@ -454,3 +466,20 @@ def test_create_custom_job_with_tensorboard( assert ( job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED ) + + def test_create_custom_job_without_base_output_dir(self,): + + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + staging_bucket=_TEST_STAGING_BUCKET, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = aiplatform.CustomJob( + display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC, + ) + + assert job.job_spec.base_output_directory.output_uri_prefix.startswith( + f"{_TEST_STAGING_BUCKET}/aiplatform-custom-job" + ) diff --git a/tests/unit/aiplatform/test_hyperparameter_tuning_job.py b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py index e2d716e729..752d39a93c 100644 --- a/tests/unit/aiplatform/test_hyperparameter_tuning_job.py +++ b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py @@ -49,6 +49,7 @@ _TEST_PARENT = f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}" _TEST_STAGING_BUCKET = test_custom_job._TEST_STAGING_BUCKET +_TEST_BASE_OUTPUT_DIR = test_custom_job._TEST_BASE_OUTPUT_DIR _TEST_HYPERPARAMETERTUNING_JOB_NAME = ( f"{_TEST_PARENT}/hyperparameterTuningJobs/{_TEST_ID}" @@ -260,6 +261,7 @@ def test_create_hyperparameter_tuning_job( custom_job = aiplatform.CustomJob( display_name=test_custom_job._TEST_DISPLAY_NAME, worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC, + base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR, ) job = aiplatform.HyperparameterTuningJob( @@ -321,6 +323,7 @@ def test_run_hyperparameter_tuning_job_with_fail_raises( custom_job = aiplatform.CustomJob( display_name=test_custom_job._TEST_DISPLAY_NAME, worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC, + base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR, ) job = aiplatform.HyperparameterTuningJob( @@ -376,6 +379,7 @@ def test_run_hyperparameter_tuning_job_with_fail_at_creation(self): custom_job = aiplatform.CustomJob( display_name=test_custom_job._TEST_DISPLAY_NAME, worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC, + base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR, ) job = aiplatform.HyperparameterTuningJob( @@ -440,6 +444,7 @@ def test_hyperparameter_tuning_job_get_state_raises_without_run(self): custom_job = aiplatform.CustomJob( display_name=test_custom_job._TEST_DISPLAY_NAME, worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC, + base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR, ) job = aiplatform.HyperparameterTuningJob( @@ -497,6 +502,7 @@ def test_create_hyperparameter_tuning_job_with_tensorboard( custom_job = aiplatform.CustomJob( display_name=test_custom_job._TEST_DISPLAY_NAME, worker_pool_specs=test_custom_job._TEST_WORKER_POOL_SPEC, + base_output_dir=test_custom_job._TEST_BASE_OUTPUT_DIR, ) job = aiplatform.HyperparameterTuningJob( From 0fbcd592cd7e9c4b0a131d777fa84e592a43a21c Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Wed, 4 Aug 2021 10:51:07 -0700 Subject: [PATCH 06/28] fix: Fixed bug in TabularDataset.column_names (#590) Fixes https://github.com/googleapis/python-aiplatform/issues/589 The `end` parameter of the `blob.download_as_bytes` function is inclusive, not exclusive. > There are 2 hard problems in computer science: cache invalidation, naming things, and off-by-1 errors. Co-authored-by: gcf-merge-on-green[bot] <60162190+gcf-merge-on-green[bot]@users.noreply.github.com> --- google/cloud/aiplatform/datasets/tabular_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/datasets/tabular_dataset.py b/google/cloud/aiplatform/datasets/tabular_dataset.py index 1fe23f5ee2..f9a9658d7e 100644 --- a/google/cloud/aiplatform/datasets/tabular_dataset.py +++ b/google/cloud/aiplatform/datasets/tabular_dataset.py @@ -150,7 +150,7 @@ def _retrieve_gcs_source_columns( while first_new_line_index == -1: line += blob.download_as_bytes( - start=start_index, end=start_index + increment + start=start_index, end=start_index + increment - 1 ).decode("utf-8") first_new_line_index = line.find("\n") From 433b94a78004de6d3a4726317d8bac32c358ace8 Mon Sep 17 00:00:00 2001 From: Yicheng Fang <58752348+yfang1@users.noreply.github.com> Date: Thu, 5 Aug 2021 09:14:33 -0700 Subject: [PATCH 07/28] fix: re-remove extra TB dependencies introduced due to merge conflict (#593) Reapplying #499 to remove the extra dependencies introduced by merge conflict in https://github.com/googleapis/python-aiplatform/commit/f6f9a97bb178d9859b8d43166a43792d88e57710#diff-60f61ab7a8d1910d86d9fda2261620314edcae5894d5aaa236b821c7256badd7L32-R36 --- setup.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 2101c9033b..02d1bf881b 100644 --- a/setup.py +++ b/setup.py @@ -29,11 +29,7 @@ with io.open(readme_filename, encoding="utf-8") as readme_file: readme = readme_file.read() -tensorboard_extra_require = [ - "tensorflow >=2.3.0, <=2.5.0", - "grpcio~=1.39.0", - "six~=1.15.0", -] +tensorboard_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] metadata_extra_require = ["pandas >= 1.0.0"] xai_extra_require = ["tensorflow >=2.3.0, <=2.5.0"] full_extra_require = list( From 726dea73e3a638d8c568596939f34da15fecb930 Mon Sep 17 00:00:00 2001 From: Yicheng Fang <58752348+yfang1@users.noreply.github.com> Date: Fri, 6 Aug 2021 12:18:19 -0700 Subject: [PATCH 08/28] refactor: Extract out common logic from Tensorboard BatchedRequestSender into a base class. (#592) Exacted out common logic from the 3 BatchedRequestSenders into a base class. This will facilitate optimizations down the road. As nice side effects this brings up the uploader's test coverage due to fewer lines of code, and allows blob references to be batched instead of sent one at a time. unit test passed. Tried uploading all 3 types of data. Working well. --- .../cloud/aiplatform/tensorboard/uploader.py | 426 ++++++++---------- tests/unit/aiplatform/test_uploader.py | 10 +- 2 files changed, 181 insertions(+), 255 deletions(-) diff --git a/google/cloud/aiplatform/tensorboard/uploader.py b/google/cloud/aiplatform/tensorboard/uploader.py index 95e25d8cd8..e74d119f4a 100644 --- a/google/cloud/aiplatform/tensorboard/uploader.py +++ b/google/cloud/aiplatform/tensorboard/uploader.py @@ -15,13 +15,24 @@ # limitations under the License. # """Uploads a TensorBoard logdir to TensorBoard.gcp.""" +import abc import contextlib import functools import json +import logging import os import time import re -from typing import Callable, Dict, FrozenSet, Generator, Iterable, Optional, Tuple +from typing import ( + Callable, + Dict, + FrozenSet, + Generator, + Iterable, + Optional, + Tuple, + ContextManager, +) import uuid import grpc @@ -96,7 +107,7 @@ # Default maximum WriteTensorbordRunData request size in bytes. _DEFAULT_MAX_TENSOR_REQUEST_SIZE = 512 * (2 ** 10) # 512KiB -_DEFAULT_MAX_BLOB_REQUEST_SIZE = 4 * (2 ** 20) - 256 * (2 ** 10) # 4MiB-256KiB +_DEFAULT_MAX_BLOB_REQUEST_SIZE = 24 * (2 ** 10) # 24KiB # Default maximum tensor point size in bytes. _DEFAULT_MAX_TENSOR_POINT_SIZE = 16 * (2 ** 10) # 16KiB @@ -104,6 +115,7 @@ _DEFAULT_MAX_BLOB_SIZE = 10 * (2 ** 30) # 10GiB logger = tb_logging.get_logger() +logger.setLevel(logging.WARNING) class TensorBoardUploader(object): @@ -671,7 +683,7 @@ def get_or_create( return time_series -class _ScalarBatchedRequestSender(object): +class _BaseBatchedRequestSender(object): """Helper class for building requests that fit under a size limit. This class accumulates a current request. `add_event(...)` may or may not @@ -690,7 +702,7 @@ def __init__( max_request_size: int, tracker: upload_tracker.UploadTracker, ): - """Constructor for _ScalarBatchedRequestSender. + """Constructor for _BaseBatchedRequestSender. Args: run_resource_id: The resource id for the run with the following format @@ -761,7 +773,7 @@ def _add_event_internal( time_series_data_proto = self._tag_to_time_series_data.get(value.tag) if time_series_data_proto is None: time_series_data_proto = self._create_time_series_data(value.tag, metadata) - self._create_point(time_series_data_proto, event, value) + self._create_point(time_series_data_proto, event, value, metadata) def flush(self): """Sends the active request after removing empty runs and tags. @@ -777,7 +789,7 @@ def flush(self): self._rpc_rate_limiter.tick() with _request_logger(request): - with self._tracker.scalars_tracker(self._num_values): + with self._get_tracker(): try: self._api.write_tensorboard_run_data( tensorboard_run=self._run_resource_id, @@ -813,15 +825,14 @@ def _create_time_series_data( tag_name, lambda: tensorboard_time_series.TensorboardTimeSeries( display_name=tag_name, - value_type=tensorboard_time_series.TensorboardTimeSeries.ValueType.SCALAR, + value_type=self._value_type, plugin_name=metadata.plugin_data.plugin_name, plugin_data=metadata.plugin_data.content, ), ).name.split("/")[-1], - value_type=tensorboard_time_series.TensorboardTimeSeries.ValueType.SCALAR, + value_type=self._value_type, ) - self._request.time_series_data.extend([time_series_data_proto]) self._byte_budget_manager.add_time_series(time_series_data_proto) self._tag_to_time_series_data[tag_name] = time_series_data_proto return time_series_data_proto @@ -831,29 +842,25 @@ def _create_point( time_series_proto: tensorboard_data.TimeSeriesData, event: tf.compat.v1.Event, value: tf.compat.v1.Summary.Value, + metadata: tf.compat.v1.SummaryMetadata, ): """Adds a scalar point to the given tag, if there's space. Args: time_series_proto: TimeSeriesData proto to which to add a point. event: Enclosing `Event` proto with the step and wall time data. - value: Scalar `Summary.Value` proto with the actual scalar data. + value: `Summary.Value` proto. + metadata: SummaryMetadata of the event. Raises: _OutOfSpaceError: If adding the point would exceed the remaining request budget. """ - scalar_proto = tensorboard_data.Scalar( - value=tensor_util.make_ndarray(value.tensor).item() - ) - point = tensorboard_data.TimeSeriesDataPoint( - step=event.step, - scalar=scalar_proto, - wall_time=timestamp.Timestamp( - seconds=int(event.wall_time), - nanos=int(round((event.wall_time % 1) * 10 ** 9)), - ), - ) + point = self._create_data_point(event, value, metadata) + + if not self._validate(point, event, value): + return + time_series_proto.values.extend([point]) try: self._byte_budget_manager.add_point(point) @@ -861,27 +868,69 @@ def _create_point( time_series_proto.values.pop() raise + @abc.abstractmethod + def _get_tracker(self) -> ContextManager: + """ + :return: tracker function from upload_tracker.UploadTracker + """ + pass -class _TensorBatchedRequestSender(object): - """Helper class for building WriteTensor() requests that fit under a size limit. + @property + @classmethod + @abc.abstractmethod + def _value_type(cls,) -> tensorboard_time_series.TensorboardTimeSeries.ValueType: + """ + :return: Value type of the time series. + """ + pass + + @abc.abstractmethod + def _create_data_point( + self, + event: tf.compat.v1.Event, + value: tf.compat.v1.Summary.Value, + metadata: tf.compat.v1.SummaryMetadata, + ) -> tensorboard_data.TimeSeriesDataPoint: + """ + Creates data point protos for sending to the OnePlatform API. + """ + pass + + def _validate( + self, + point: tensorboard_data.TimeSeriesDataPoint, + event: tf.compat.v1.Event, + value: tf.compat.v1.Summary.Value, + ): + """ + Validations performed before including the data point to be sent to the + OnePlatform API. + """ + return True + + +class _ScalarBatchedRequestSender(_BaseBatchedRequestSender): + """Helper class for building requests that fit under a size limit. This class accumulates a current request. `add_event(...)` may or may not send the request (and start a new one). After all `add_event(...)` calls are complete, a final call to `flush()` is needed to send the final request. + This class is not threadsafe. Use external synchronization if calling its methods concurrently. """ + _value_type = tensorboard_time_series.TensorboardTimeSeries.ValueType.SCALAR + def __init__( self, run_resource_id: str, api: TensorboardServiceClient, rpc_rate_limiter: util.RateLimiter, max_request_size: int, - max_tensor_point_size: int, tracker: upload_tracker.UploadTracker, ): - """Constructor for _TensorBatchedRequestSender. + """Constructor for _ScalarBatchedRequestSender. Args: run_resource_id: The resource id for the run with the following format @@ -891,153 +940,91 @@ def __init__( max_request_size: max number of bytes to send tracker: """ - self._run_resource_id = run_resource_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._byte_budget_manager = _ByteBudgetManager(max_request_size) - self._max_tensor_point_size = max_tensor_point_size - self._tracker = tracker - - # cache: map from Tensorboard tag to TimeSeriesData - # cleared whenever a new request is created - self._tag_to_time_series_data: Dict[str, tensorboard_data.TimeSeriesData] = {} - - self._time_series_resource_manager = _TimeSeriesResourceManager( - run_resource_id, api + super().__init__( + run_resource_id, api, rpc_rate_limiter, max_request_size, tracker ) - self._new_request() - def _new_request(self): - """Allocates a new request and refreshes the budget.""" - self._request = tensorboard_service.WriteTensorboardRunDataRequest() - self._tag_to_time_series_data.clear() - self._num_values = 0 - self._request.tensorboard_run = self._run_resource_id - self._byte_budget_manager.reset(self._request) - self._num_values = 0 - self._num_values_skipped = 0 - self._tensor_bytes = 0 - self._tensor_bytes_skipped = 0 - - def add_event( - self, - event: tf.compat.v1.Event, - value: tf.compat.v1.Summary.Value, - metadata: tf.compat.v1.SummaryMetadata, - ): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - try: - self._add_event_internal(event, value, metadata) - except _OutOfSpaceError: - self.flush() - # Try again. This attempt should never produce OutOfSpaceError - # because we just flushed. - try: - self._add_event_internal(event, value, metadata) - except _OutOfSpaceError: - raise RuntimeError("add_event failed despite flush") + def _get_tracker(self) -> ContextManager: + return self._tracker.scalars_tracker(self._num_values) - def _add_event_internal( + def _create_data_point( self, event: tf.compat.v1.Event, value: tf.compat.v1.Summary.Value, metadata: tf.compat.v1.SummaryMetadata, - ): - self._num_values += 1 - time_series_data_proto = self._tag_to_time_series_data.get(value.tag) - if time_series_data_proto is None: - time_series_data_proto = self._create_time_series_data(value.tag, metadata) - self._create_point(time_series_data_proto, event, value) - - def flush(self): - """Sends the active request after removing empty runs and tags. + ) -> tensorboard_data.TimeSeriesDataPoint: + scalar_proto = tensorboard_data.Scalar( + value=tensor_util.make_ndarray(value.tensor).item() + ) + return tensorboard_data.TimeSeriesDataPoint( + step=event.step, + scalar=scalar_proto, + wall_time=timestamp.Timestamp( + seconds=int(event.wall_time), + nanos=int(round((event.wall_time % 1) * 10 ** 9)), + ), + ) - Starts a new, empty active request. - """ - request = self._request - request.time_series_data = list(self._tag_to_time_series_data.values()) - _prune_empty_time_series(request) - if not request.time_series_data: - return - self._rpc_rate_limiter.tick() +class _TensorBatchedRequestSender(_BaseBatchedRequestSender): + """Helper class for building WriteTensor() requests that fit under a size limit. - with _request_logger(request): - with self._tracker.tensors_tracker( - self._num_values, - self._num_values_skipped, - self._tensor_bytes, - self._tensor_bytes_skipped, - ): - try: - self._api.write_tensorboard_run_data( - tensorboard_run=self._run_resource_id, - time_series_data=request.time_series_data, - ) - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) + This class accumulates a current request. `add_event(...)` may or may not + send the request (and start a new one). After all `add_event(...)` calls + are complete, a final call to `flush()` is needed to send the final request. + This class is not threadsafe. Use external synchronization if calling its + methods concurrently. + """ - self._new_request() + _value_type = tensorboard_time_series.TensorboardTimeSeries.ValueType.TENSOR - def _create_time_series_data( - self, tag_name: str, metadata: tf.compat.v1.SummaryMetadata - ) -> tensorboard_data.TimeSeriesData: - """Adds a time_series for the tag_name, if there's space. + def __init__( + self, + run_resource_id: str, + api: TensorboardServiceClient, + rpc_rate_limiter: util.RateLimiter, + max_request_size: int, + max_tensor_point_size: int, + tracker: upload_tracker.UploadTracker, + ): + """Constructor for _TensorBatchedRequestSender. Args: - tag_name: String name of the tag to add (as `value.tag`). - metadata: SummaryMetadata of the event. - - Returns: - The TimeSeriesData in _request proto with the given tag name. - - Raises: - _OutOfSpaceError: If adding the tag would exceed the remaining - request budget. + run_resource_id: The resource id for the run with the following format + projects/{project}/locations/{location}/tensorboards/{tensorboard}/experiments/{experiment}/runs/{run} + api: TensorboardServiceStub + rpc_rate_limiter: until.RateLimiter to limit rate of this request sender + max_request_size: max number of bytes to send + tracker: """ - time_series_data_proto = tensorboard_data.TimeSeriesData( - tensorboard_time_series_id=self._time_series_resource_manager.get_or_create( - tag_name, - lambda: tensorboard_time_series.TensorboardTimeSeries( - display_name=tag_name, - value_type=tensorboard_time_series.TensorboardTimeSeries.ValueType.TENSOR, - plugin_name=metadata.plugin_data.plugin_name, - plugin_data=metadata.plugin_data.content, - ), - ).name.split("/")[-1], - value_type=tensorboard_time_series.TensorboardTimeSeries.ValueType.TENSOR, + super().__init__( + run_resource_id, api, rpc_rate_limiter, max_request_size, tracker ) + self._max_tensor_point_size = max_tensor_point_size - self._request.time_series_data.extend([time_series_data_proto]) - self._byte_budget_manager.add_time_series(time_series_data_proto) - self._tag_to_time_series_data[tag_name] = time_series_data_proto - return time_series_data_proto + def _new_request(self): + """Allocates a new request and refreshes the budget.""" + super()._new_request() + self._num_values = 0 + self._num_values_skipped = 0 + self._tensor_bytes = 0 + self._tensor_bytes_skipped = 0 - def _create_point( + def _get_tracker(self) -> ContextManager: + return self._tracker.tensors_tracker( + self._num_values, + self._num_values_skipped, + self._tensor_bytes, + self._tensor_bytes_skipped, + ) + + def _create_data_point( self, - time_series_proto: tensorboard_data.TimeSeriesData, event: tf.compat.v1.Event, value: tf.compat.v1.Summary.Value, - ): - """Adds a tensor point to the given tag, if there's space. - - Args: - tag_proto: `WriteTensorRequest.Tag` proto to which to add a point. - event: Enclosing `Event` proto with the step and wall time data. - value: Tensor `Summary.Value` proto with the actual tensor data. - - Raises: - _OutOfSpaceError: If adding the point would exceed the remaining - request budget. - """ - point = tensorboard_data.TimeSeriesDataPoint( + metadata: tf.compat.v1.SummaryMetadata, + ) -> tensorboard_data.TimeSeriesDataPoint: + return tensorboard_data.TimeSeriesDataPoint( step=event.step, tensor=tensorboard_data.TensorboardTensor( value=value.tensor.SerializeToString() @@ -1048,6 +1035,12 @@ def _create_point( ), ) + def _validate( + self, + point: tensorboard_data.TimeSeriesDataPoint, + event: tf.compat.v1.Event, + value: tf.compat.v1.Summary.Value, + ): self._num_values += 1 tensor_size = len(point.tensor.value) self._tensor_bytes += tensor_size @@ -1059,32 +1052,19 @@ def _create_point( ) self._num_values_skipped += 1 self._tensor_bytes_skipped += tensor_size - return - - self._validate_tensor_value( - value.tensor, value.tag, event.step, event.wall_time - ) - - time_series_proto.values.extend([point]) - - try: - self._byte_budget_manager.add_point(point) - except _OutOfSpaceError: - time_series_proto.values.pop() - raise + return False - def _validate_tensor_value(self, tensor_proto, tag, step, wall_time): - """Validate a TensorProto by attempting to parse it.""" try: - tensor_util.make_ndarray(tensor_proto) + tensor_util.make_ndarray(value.tensor) except ValueError as error: raise ValueError( "The uploader failed to upload a tensor. This seems to be " "due to a malformation in the tensor, which may be caused by " "a bug in the process that wrote the tensor.\n\n" "The tensor has tag '%s' and is at step %d and wall_time %.6f.\n\n" - "Original error:\n%s" % (tag, step, wall_time, error) + "Original error:\n%s" % (value.tag, event.step, event.wall_time, error) ) + return True class _ByteBudgetManager(object): @@ -1176,7 +1156,7 @@ def add_point(self, point_proto: tensorboard_data.TimeSeriesDataPoint): self._byte_budget -= cost -class _BlobRequestSender(object): +class _BlobRequestSender(_BaseBatchedRequestSender): """Uploader for blob-type event data. Unlike the other types, this class does not accumulate events in batches; @@ -1187,6 +1167,8 @@ class _BlobRequestSender(object): methods concurrently. """ + _value_type = tensorboard_time_series.TensorboardTimeSeries.ValueType.BLOB_SEQUENCE + def __init__( self, run_resource_id: str, @@ -1198,73 +1180,46 @@ def __init__( blob_storage_folder: str, tracker: upload_tracker.UploadTracker, ): - self._run_resource_id = run_resource_id - self._api = api - self._rpc_rate_limiter = rpc_rate_limiter - self._max_blob_request_size = max_blob_request_size - self._max_blob_size = max_blob_size - self._tracker = tracker - self._time_series_resource_manager = _TimeSeriesResourceManager( - run_resource_id, api + super().__init__( + run_resource_id, api, rpc_rate_limiter, max_blob_request_size, tracker ) - + self._max_blob_size = max_blob_size self._bucket = blob_storage_bucket self._folder = blob_storage_folder - self._new_request() - def _new_request(self): - """Declares the previous event complete.""" - self._event = None - self._value = None - self._metadata = None + super()._new_request() + self._blob_sizes = 0 - def add_event( + def _get_tracker(self) -> ContextManager: + return self._tracker.blob_tracker(0) + + def _create_data_point( self, event: tf.compat.v1.Event, value: tf.compat.v1.Summary.Value, metadata: tf.compat.v1.SummaryMetadata, - ): - """Attempts to add the given event to the current request. - - If the event cannot be added to the current request because the byte - budget is exhausted, the request is flushed, and the event is added - to the next request. - """ - if self._value: - raise RuntimeError("Tried to send blob while another is pending") - self._event = event # provides step and possibly plugin_name - self._value = value - self._blobs = tensor_util.make_ndarray(self._value.tensor) - if self._blobs.ndim == 1: - self._metadata = metadata - self.flush() - else: + ) -> tensorboard_data.TimeSeriesDataPoint: + blobs = tensor_util.make_ndarray(value.tensor) + if blobs.ndim != 1: logger.warning( "A blob sequence must be represented as a rank-1 Tensor. " "Provided data has rank %d, for run %s, tag %s, step %s ('%s' plugin) .", - self._blobs.ndim, + blobs.ndim, self._run_resource_id, - self._value.tag, - self._event.step, + value.tag, + event.step, metadata.plugin_data.plugin_name, ) - # Skip this upload. - self._new_request() - - def flush(self): - """Sends the current blob sequence fully, and clears it to make way for the next.""" - if not self._value: - self._new_request() - return + return None time_series_proto = self._time_series_resource_manager.get_or_create( - self._value.tag, + value.tag, lambda: tensorboard_time_series.TensorboardTimeSeries( - display_name=self._value.tag, + display_name=value.tag, value_type=tensorboard_time_series.TensorboardTimeSeries.ValueType.BLOB_SEQUENCE, - plugin_name=self._metadata.plugin_data.plugin_name, - plugin_data=self._metadata.plugin_data.content, + plugin_name=metadata.plugin_data.plugin_name, + plugin_data=metadata.plugin_data.content, ), ) m = re.match( @@ -1278,16 +1233,15 @@ def flush(self): else blob_path_prefix ) sent_blob_ids = [] - for blob in self._blobs: - self._rpc_rate_limiter.tick() + for blob in blobs: with self._tracker.blob_tracker(len(blob)) as blob_tracker: blob_id = self._send_blob(blob, blob_path_prefix) if blob_id is not None: sent_blob_ids.append(str(blob_id)) - blob_tracker.mark_uploaded(blob_id is not None) + blob_tracker.mark_uploaded(blob_id is not None) - data_point = tensorboard_data.TimeSeriesDataPoint( - step=self._event.step, + return tensorboard_data.TimeSeriesDataPoint( + step=event.step, blobs=tensorboard_data.TensorboardBlobSequence( values=[ tensorboard_data.TensorboardBlob(id=blob_id) @@ -1295,37 +1249,11 @@ def flush(self): ] ), wall_time=timestamp.Timestamp( - seconds=int(self._event.wall_time), - nanos=int(round((self._event.wall_time % 1) * 10 ** 9)), + seconds=int(event.wall_time), + nanos=int(round((event.wall_time % 1) * 10 ** 9)), ), ) - time_series_data_proto = tensorboard_data.TimeSeriesData( - tensorboard_time_series_id=time_series_proto.name.split("/")[-1], - value_type=tensorboard_time_series.TensorboardTimeSeries.ValueType.BLOB_SEQUENCE, - values=[data_point], - ) - request = tensorboard_service.WriteTensorboardRunDataRequest( - time_series_data=[time_series_data_proto] - ) - - _prune_empty_time_series(request) - if not request.time_series_data: - return - - with _request_logger(request): - try: - self._api.write_tensorboard_run_data( - tensorboard_run=self._run_resource_id, - time_series_data=request.time_series_data, - ) - except grpc.RpcError as e: - if e.code() == grpc.StatusCode.NOT_FOUND: - raise ExperimentNotFoundError() - logger.error("Upload call failed with error %s", e) - - self._new_request() - def _send_blob(self, blob, blob_path_prefix): """Sends a single blob to a GCS bucket in the consumer project. diff --git a/tests/unit/aiplatform/test_uploader.py b/tests/unit/aiplatform/test_uploader.py index f4029fb063..56385274c3 100644 --- a/tests/unit/aiplatform/test_uploader.py +++ b/tests/unit/aiplatform/test_uploader.py @@ -225,6 +225,7 @@ def _create_request_sender( max_scalar_request_size=128000, max_tensor_request_size=128000, max_tensor_point_size=52000, + max_blob_request_size=128000, ) rpc_rate_limiter = util.RateLimiter(0) @@ -771,8 +772,7 @@ def create_time_series(tensorboard_time_series, parent=None): uploader = _create_uploader( writer_client=mock_client, logdir=_TEST_LOG_DIR_NAME, - # Verify behavior with lots of small chunks - max_blob_request_size=100, + max_blob_request_size=1000, rpc_rate_limiter=mock_rate_limiter, blob_storage_bucket=mock_bucket, verbosity=1, # In order to test tracker. @@ -829,7 +829,7 @@ def create_time_series(tensorboard_time_series, parent=None): self.assertEqual( time_series_data[0].tensorboard_time_series_id, _TEST_TIME_SERIES_NAME ) - self.assertEqual(len(time_series_data[0].values), 1) + self.assertEqual(len(time_series_data[0].values), 2) blobs = time_series_data[0].values[0].blobs.values self.assertEqual(len(blobs), 1) self.assertIn(blobs[0].id, blob_ids) @@ -838,9 +838,7 @@ def create_time_series(tensorboard_time_series, parent=None): self.assertEqual(mock_tracker.send_tracker.call_count, 2) self.assertEqual(mock_tracker.scalars_tracker.call_count, 0) self.assertEqual(mock_tracker.tensors_tracker.call_count, 0) - self.assertEqual(mock_tracker.blob_tracker.call_count, 10) - self.assertLen(mock_tracker.blob_tracker.call_args[0], 1) - self.assertGreater(mock_tracker.blob_tracker.call_args[0][0], 0) + self.assertEqual(mock_tracker.blob_tracker.call_count, 15) def test_filter_graphs(self): # Three graphs: one short, one long, one corrupt. From b478075efb05553760514256fee9a63126a9916f Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Mon, 9 Aug 2021 08:52:56 -0700 Subject: [PATCH 09/28] feat: Added the VertexAiResourceNoun.to_dict() method (#588) --- google/cloud/aiplatform/base.py | 5 +++++ google/cloud/aiplatform/metadata/resource.py | 3 +-- tests/system/aiplatform/test_dataset.py | 6 ++---- 3 files changed, 8 insertions(+), 6 deletions(-) diff --git a/google/cloud/aiplatform/base.py b/google/cloud/aiplatform/base.py index 1a3eed8add..20f9aa07ad 100644 --- a/google/cloud/aiplatform/base.py +++ b/google/cloud/aiplatform/base.py @@ -44,6 +44,7 @@ from google.cloud.aiplatform import initializer from google.cloud.aiplatform import utils from google.cloud.aiplatform.compat.types import encryption_spec as gca_encryption_spec +from google.protobuf import json_format logging.basicConfig(level=logging.INFO, stream=sys.stdout) @@ -607,6 +608,10 @@ def _assert_gca_resource_is_available(self) -> None: def __repr__(self) -> str: return f"{object.__repr__(self)} \nresource name: {self.resource_name}" + def to_dict(self) -> Dict[str, Any]: + """Returns the resource proto as a dictionary.""" + return json_format.MessageToDict(self.gca_resource._pb) + def optional_sync( construct_object_on_arg: Optional[str] = None, diff --git a/google/cloud/aiplatform/metadata/resource.py b/google/cloud/aiplatform/metadata/resource.py index 85ac419d40..37bb7327cd 100644 --- a/google/cloud/aiplatform/metadata/resource.py +++ b/google/cloud/aiplatform/metadata/resource.py @@ -24,7 +24,6 @@ import proto from google.api_core import exceptions from google.auth import credentials as auth_credentials -from google.protobuf import json_format from google.cloud.aiplatform import base, initializer from google.cloud.aiplatform import utils @@ -98,7 +97,7 @@ def __init__( @property def metadata(self) -> Dict: - return json_format.MessageToDict(self._gca_resource._pb)["metadata"] + return self.to_dict()["metadata"] @property def schema_title(self) -> str: diff --git a/tests/system/aiplatform/test_dataset.py b/tests/system/aiplatform/test_dataset.py index e852933dc3..0167cb8f20 100644 --- a/tests/system/aiplatform/test_dataset.py +++ b/tests/system/aiplatform/test_dataset.py @@ -21,7 +21,6 @@ import importlib from google import auth as google_auth -from google.protobuf import json_format from google.api_core import exceptions from google.api_core import client_options @@ -239,16 +238,15 @@ def test_create_tabular_dataset(self, dataset_gapic_client, shared_state): gcs_source=[_TEST_TABULAR_CLASSIFICATION_GCS_SOURCE], ) - gapic_dataset = tabular_dataset._gca_resource shared_state["dataset_name"] = tabular_dataset.resource_name - gapic_metadata = json_format.MessageToDict(gapic_dataset._pb.metadata) + gapic_metadata = tabular_dataset.to_dict()["metadata"] gcs_source_uris = gapic_metadata["inputConfig"]["gcsSource"]["uri"] assert len(gcs_source_uris) == 1 assert _TEST_TABULAR_CLASSIFICATION_GCS_SOURCE == gcs_source_uris[0] assert ( - gapic_dataset.metadata_schema_uri + tabular_dataset.metadata_schema_uri == aiplatform.schema.dataset.metadata.tabular ) From 4e7666a30b4472698ed980d9d746ba85ad4142d8 Mon Sep 17 00:00:00 2001 From: Morgan Du Date: Wed, 11 Aug 2021 08:32:39 -0700 Subject: [PATCH 10/28] Feat: add labels to all resource creation apis (#601) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes #595, 🦕 - Add support for labels in resources creation: - datasets (`create`) - ImageDataset / TabularDataset / TextDataset / TimeSeriesDataset / VideoDataset - tensorboard - `Tensorboard.create` - jobs - `BatchPredictionJob.create` - `CustomJob.__init__` - `HyperparameterTuningJob.__init__` - models - `Endpoint.create` - `Model.upload` / `Model.batch_predict` - training_jobs (`__init__` for training labels, `run` for model labels) - CustomTrainingJob / CustomContainerTrainingJob / CustomPythonPackageTrainingJob - AutoMLTabularTrainingJob / AutoMLForecastingTrainingJob / AutoMLImageTrainingJob / AutoMLVideoTrainingJob / AutoMLTextTrainingJob - Modify `pipeline_jobs.py` and `utils.py` for `validate_labels` reusability - Add / modify unit tests to verify: - standard resource creation with / without labels - training pipeline resource creation - when model labels is not provided - when base model labels is provided --- google/cloud/aiplatform/datasets/dataset.py | 40 +- .../aiplatform/datasets/image_dataset.py | 16 +- .../aiplatform/datasets/tabular_dataset.py | 16 +- .../cloud/aiplatform/datasets/text_dataset.py | 16 +- .../datasets/time_series_dataset.py | 16 +- .../aiplatform/datasets/video_dataset.py | 16 +- google/cloud/aiplatform/jobs.py | 58 ++- google/cloud/aiplatform/models.py | 28 +- google/cloud/aiplatform/pipeline_jobs.py | 7 +- .../aiplatform/tensorboard/tensorboard.py | 3 + google/cloud/aiplatform/training_jobs.py | 368 +++++++++++++++++- google/cloud/aiplatform/utils/__init__.py | 18 +- .../test_automl_forecasting_training_jobs.py | 17 +- .../test_automl_image_training_jobs.py | 18 +- .../test_automl_tabular_training_jobs.py | 16 +- .../test_automl_text_training_jobs.py | 30 +- .../test_automl_video_training_jobs.py | 16 +- tests/unit/aiplatform/test_custom_job.py | 7 + tests/unit/aiplatform/test_datasets.py | 115 ++++++ tests/unit/aiplatform/test_endpoints.py | 18 + .../test_hyperparameter_tuning_job.py | 5 + tests/unit/aiplatform/test_models.py | 38 ++ tests/unit/aiplatform/test_training_jobs.py | 40 +- tests/unit/aiplatform/test_utils.py | 14 + 24 files changed, 872 insertions(+), 64 deletions(-) diff --git a/google/cloud/aiplatform/datasets/dataset.py b/google/cloud/aiplatform/datasets/dataset.py index c41b252869..5e5de0058b 100644 --- a/google/cloud/aiplatform/datasets/dataset.py +++ b/google/cloud/aiplatform/datasets/dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple, Union, List +from typing import Dict, List, Optional, Sequence, Tuple, Union from google.api_core import operation from google.auth import credentials as auth_credentials @@ -115,6 +115,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "_Dataset": @@ -176,6 +177,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -198,6 +209,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -221,6 +234,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), @@ -240,6 +254,7 @@ def _create_and_import( location: str, credentials: Optional[auth_credentials.Credentials], request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None, sync: bool = True, ) -> "_Dataset": @@ -277,6 +292,16 @@ def _create_and_import( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec (Optional[gca_encryption_spec.EncryptionSpec]): Optional. The Cloud KMS customer managed encryption key used to protect the dataset. The key needs to be in the same region as where the compute @@ -300,6 +325,7 @@ def _create_and_import( metadata_schema_uri=metadata_schema_uri, datasource=datasource, request_metadata=request_metadata, + labels=labels, encryption_spec=encryption_spec, ) @@ -346,6 +372,7 @@ def _create( metadata_schema_uri: str, datasource: _datasources.Datasource, request_metadata: Sequence[Tuple[str, str]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None, ) -> operation.Operation: """Creates a new managed dataset by directly calling API client. @@ -373,6 +400,16 @@ def _create( request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the create_dataset request as metadata. Usually to specify special dataset config. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec (Optional[gca_encryption_spec.EncryptionSpec]): Optional. The Cloud KMS customer managed encryption key used to protect the dataset. The key needs to be in the same region as where the compute @@ -388,6 +425,7 @@ def _create( display_name=display_name, metadata_schema_uri=metadata_schema_uri, metadata=datasource.dataset_metadata, + labels=labels, encryption_spec=encryption_spec, ) diff --git a/google/cloud/aiplatform/datasets/image_dataset.py b/google/cloud/aiplatform/datasets/image_dataset.py index 506338c915..bebc75beab 100644 --- a/google/cloud/aiplatform/datasets/image_dataset.py +++ b/google/cloud/aiplatform/datasets/image_dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple, Union +from typing import Dict, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials @@ -44,6 +44,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "ImageDataset": @@ -95,6 +96,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -117,6 +128,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -141,6 +154,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/datasets/tabular_dataset.py b/google/cloud/aiplatform/datasets/tabular_dataset.py index f9a9658d7e..741a2cc643 100644 --- a/google/cloud/aiplatform/datasets/tabular_dataset.py +++ b/google/cloud/aiplatform/datasets/tabular_dataset.py @@ -18,7 +18,7 @@ import csv import logging -from typing import List, Optional, Sequence, Set, Tuple, Union +from typing import Dict, List, Optional, Sequence, Set, Tuple, Union from google.auth import credentials as auth_credentials @@ -269,6 +269,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "TabularDataset": @@ -302,6 +303,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -324,6 +335,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -347,6 +360,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/datasets/text_dataset.py b/google/cloud/aiplatform/datasets/text_dataset.py index 85676ed2ed..140fd17335 100644 --- a/google/cloud/aiplatform/datasets/text_dataset.py +++ b/google/cloud/aiplatform/datasets/text_dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple, Union +from typing import Dict, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials @@ -44,6 +44,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "TextDataset": @@ -102,6 +103,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -124,6 +135,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -148,6 +161,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/datasets/time_series_dataset.py b/google/cloud/aiplatform/datasets/time_series_dataset.py index 1a5d62bb39..5bad36b896 100644 --- a/google/cloud/aiplatform/datasets/time_series_dataset.py +++ b/google/cloud/aiplatform/datasets/time_series_dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Tuple, Union +from typing import Dict, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials @@ -43,6 +43,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "TimeSeriesDataset": @@ -76,6 +77,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -99,6 +110,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -122,6 +135,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/datasets/video_dataset.py b/google/cloud/aiplatform/datasets/video_dataset.py index 594a4ac407..2964b77f19 100644 --- a/google/cloud/aiplatform/datasets/video_dataset.py +++ b/google/cloud/aiplatform/datasets/video_dataset.py @@ -15,7 +15,7 @@ # limitations under the License. # -from typing import Optional, Sequence, Dict, Tuple, Union +from typing import Dict, Optional, Sequence, Tuple, Union from google.auth import credentials as auth_credentials @@ -44,6 +44,7 @@ def create( location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, request_metadata: Optional[Sequence[Tuple[str, str]]] = (), + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, ) -> "VideoDataset": @@ -95,6 +96,16 @@ def create( credentials set in aiplatform.init. request_metadata (Sequence[Tuple[str, str]]): Strings which should be sent along with the request as metadata. + labels (Dict[str, str]): + Optional. Labels with user-defined metadata to organize your Tensorboards. + Label keys and values can be no longer than 64 characters + (Unicode codepoints), can only contain lowercase letters, numeric + characters, underscores and dashes. International characters are allowed. + No more than 64 user labels can be associated with one Tensorboard + (System labels are excluded). + See https://goo.gl/xmQnxf for more information and examples of labels. + System reserved label keys are prefixed with "aiplatform.googleapis.com/" + and are immutable. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the dataset. Has the @@ -117,6 +128,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -141,6 +154,7 @@ def create( location=location or initializer.global_config.location, credentials=credentials or initializer.global_config.credentials, request_metadata=request_metadata, + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 66b0479ced..720aa46b21 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -370,7 +370,7 @@ def create( explanation_parameters: Optional[ "aiplatform.explain.ExplanationParameters" ] = None, - labels: Optional[dict] = None, + labels: Optional[Dict[str, str]] = None, project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, @@ -499,8 +499,8 @@ def create( a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see `Ref docs ` - labels (Optional[dict]): - The labels with user-defined metadata to organize your + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to organize your BatchPredictionJobs. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase letters, numeric characters, underscores and dashes. @@ -533,6 +533,8 @@ def create( """ utils.validate_display_name(job_display_name) + if labels: + utils.validate_labels(labels) model_name = utils.full_resource_name( resource_name=model_name, @@ -935,6 +937,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, ): @@ -960,7 +963,8 @@ def __init__( my_job = aiplatform.CustomJob( display_name='my_job', - worker_pool_specs=worker_pool_specs + worker_pool_specs=worker_pool_specs, + labels={'my_key': 'my_value'}, ) my_job.run() @@ -989,6 +993,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional.Custom credentials to use to run call custom job service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize CustomJobs. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. encryption_spec_key_name (str): Optional.Customer-managed encryption key name for a CustomJob. If this is set, then all resources @@ -1013,6 +1027,9 @@ def __init__( "should be set using aiplatform.init(staging_bucket='gs://my-bucket')" ) + if labels: + utils.validate_labels(labels) + # default directory if not given base_output_dir = base_output_dir or utils._timestamped_gcs_dir( staging_bucket, "aiplatform-custom-job" @@ -1026,6 +1043,7 @@ def __init__( output_uri_prefix=base_output_dir ), ), + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), @@ -1063,6 +1081,7 @@ def from_local_script( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, ) -> "CustomJob": @@ -1078,6 +1097,7 @@ def from_local_script( replica_count=1, args=['--dataset', 'gs://my-bucket/my-dataset', '--model_output_uri', 'gs://my-bucket/model'] + labels={'my_key': 'my_value'}, ) job.run() @@ -1126,6 +1146,16 @@ def from_local_script( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call custom job service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize CustomJobs. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. encryption_spec_key_name (str): Optional. Customer-managed encryption key name for a CustomJob. If this is set, then all resources @@ -1150,6 +1180,9 @@ def from_local_script( "should be set using aiplatform.init(staging_bucket='gs://my-bucket')" ) + if labels: + utils.validate_labels(labels) + worker_pool_specs = worker_spec_utils._DistributedTrainingSpec.chief_worker_pool( replica_count=replica_count, machine_type=machine_type, @@ -1188,6 +1221,7 @@ def from_local_script( project=project, location=location, credentials=credentials, + labels=labels, encryption_spec_key_name=encryption_spec_key_name, staging_bucket=staging_bucket, ) @@ -1325,6 +1359,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, ): """ @@ -1353,7 +1388,8 @@ def __init__( custom_job = aiplatform.CustomJob( display_name='my_job', - worker_pool_specs=worker_pool_specs + worker_pool_specs=worker_pool_specs, + labels={'my_key': 'my_value'}, ) @@ -1371,6 +1407,7 @@ def __init__( }, max_trial_count=128, parallel_trial_count=8, + labels={'my_key': 'my_value'}, ) hp_job.run() @@ -1466,6 +1503,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call HyperparameterTuning service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize HyperparameterTuningJobs. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. encryption_spec_key_name (str): Optional. Customer-managed encryption key options for a HyperparameterTuningJob. If this is set, then @@ -1503,6 +1550,7 @@ def __init__( parallel_trial_count=parallel_trial_count, max_failed_trial_count=max_failed_trial_count, trial_job_spec=copy.deepcopy(custom_job.job_spec), + labels=labels, encryption_spec=initializer.global_config.get_encryption_spec( encryption_spec_key_name=encryption_spec_key_name ), diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 9c53ff5a2d..c1518ce89d 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -154,7 +154,7 @@ def create( cls, display_name: str, description: Optional[str] = None, - labels: Optional[Dict] = None, + labels: Optional[Dict[str, str]] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), project: Optional[str] = None, location: Optional[str] = None, @@ -177,7 +177,7 @@ def create( set in aiplatform.init will be used. description (str): Optional. The description of the Endpoint. - labels (Dict): + labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 @@ -216,6 +216,8 @@ def create( api_client = cls._instantiate_client(location=location, credentials=credentials) utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) project = project or initializer.global_config.project location = location or initializer.global_config.location @@ -244,7 +246,7 @@ def _create( project: str, location: str, description: Optional[str] = None, - labels: Optional[Dict] = None, + labels: Optional[Dict[str, str]] = None, metadata: Optional[Sequence[Tuple[str, str]]] = (), credentials: Optional[auth_credentials.Credentials] = None, encryption_spec: Optional[gca_encryption_spec.EncryptionSpec] = None, @@ -268,7 +270,7 @@ def _create( set in aiplatform.init will be used. description (str): Optional. The description of the Endpoint. - labels (Dict): + labels (Dict[str, str]): Optional. The labels with user-defined metadata to organize your Endpoints. Label keys and values can be no longer than 64 @@ -1470,6 +1472,7 @@ def upload( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, encryption_spec_key_name: Optional[str] = None, sync=True, ) -> "Model": @@ -1593,6 +1596,16 @@ def upload( credentials: Optional[auth_credentials.Credentials]=None, Custom credentials to use to upload this model. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the model. Has the @@ -1611,6 +1624,8 @@ def upload( is specified. """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) if bool(explanation_metadata) != bool(explanation_parameters): raise ValueError( @@ -1667,6 +1682,7 @@ def upload( description=description, container_spec=container_spec, predict_schemata=model_predict_schemata, + labels=labels, encryption_spec=encryption_spec, ) @@ -1991,7 +2007,7 @@ def batch_predict( generate_explanation: Optional[bool] = False, explanation_metadata: Optional[explain.ExplanationMetadata] = None, explanation_parameters: Optional[explain.ExplanationParameters] = None, - labels: Optional[dict] = None, + labels: Optional[Dict[str, str]] = None, credentials: Optional[auth_credentials.Credentials] = None, encryption_spec_key_name: Optional[str] = None, sync: bool = True, @@ -2126,7 +2142,7 @@ def batch_predict( a field of the `explanation_parameters` object is not populated, the corresponding field of the `Model.explanation_parameters` object is inherited. For more details, see `Ref docs ` - labels: Optional[dict] = None + labels: Optional[Dict[str, str]] = None Optional. The labels with user-defined metadata to organize your BatchPredictionJobs. Label keys and values can be no longer than 64 characters (Unicode codepoints), can only contain lowercase diff --git a/google/cloud/aiplatform/pipeline_jobs.py b/google/cloud/aiplatform/pipeline_jobs.py index 29a31a3ced..393f61c965 100644 --- a/google/cloud/aiplatform/pipeline_jobs.py +++ b/google/cloud/aiplatform/pipeline_jobs.py @@ -161,12 +161,7 @@ def __init__( utils.validate_display_name(display_name) if labels: - for k, v in labels.items(): - if not isinstance(k, str) or not isinstance(v, str): - raise ValueError( - "Expect labels to be a mapping of string key value pairs. " - 'Got "{}".'.format(labels) - ) + utils.validate_labels(labels) super().__init__(project=project, location=location, credentials=credentials) diff --git a/google/cloud/aiplatform/tensorboard/tensorboard.py b/google/cloud/aiplatform/tensorboard/tensorboard.py index 1b561a6557..3fe6507968 100644 --- a/google/cloud/aiplatform/tensorboard/tensorboard.py +++ b/google/cloud/aiplatform/tensorboard/tensorboard.py @@ -146,6 +146,8 @@ def create( """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) api_client = cls._instantiate_client(location=location, credentials=credentials) @@ -245,6 +247,7 @@ def update( update_mask.append("description") if labels: + utils.validate_labels(labels) update_mask.append("labels") encryption_spec = None diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 8e89509246..db7db10f2f 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -80,6 +80,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -96,6 +97,16 @@ def __init__( aiplatform.init will be used. credentials (auth_credentials.Credentials): Optional credentials to use to retrieve the model. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -123,9 +134,12 @@ def __init__( Overrides encryption_spec_key_name set in aiplatform.init. """ utils.validate_display_name(display_name) + if labels: + utils.validate_labels(labels) super().__init__(project=project, location=location, credentials=credentials) self._display_name = display_name + self._labels = labels self._training_encryption_spec = initializer.global_config.get_encryption_spec( encryption_spec_key_name=training_encryption_spec_key_name ) @@ -581,6 +595,7 @@ def _run_job( training_task_inputs=training_task_inputs, model_to_upload=model, input_data_config=input_data_config, + labels=self._labels, encryption_spec=self._training_encryption_spec, ) @@ -881,6 +896,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, @@ -985,6 +1001,16 @@ def __init__( credentials (auth_credentials.Credentials): Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -1019,6 +1045,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -1107,6 +1134,7 @@ def network(self) -> Optional[str]: def _prepare_and_validate_run( self, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, replica_count: int = 1, machine_type: str = "n1-standard-4", accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", @@ -1122,6 +1150,16 @@ def _prepare_and_validate_run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. replica_count (int): The number of worker replicas. If replica count = 1 then one chief replica will be provisioned. If replica_count > 1 the remainder will be @@ -1172,6 +1210,11 @@ def _prepare_and_validate_run( if model_display_name: utils.validate_display_name(model_display_name) managed_model.display_name = model_display_name + if model_labels: + utils.validate_labels(model_labels) + managed_model.labels = model_labels + else: + managed_model.labels = self._labels else: managed_model = None @@ -1313,6 +1356,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, @@ -1326,14 +1370,21 @@ def __init__( container_uri='gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest', model_serving_container_image_uri='gcr.io/my-trainer/serving:1', model_serving_container_predict_route='predict', - model_serving_container_health_route='metadata) + model_serving_container_health_route='metadata, + labels={'key': 'value'}, + ) Usage with Dataset: ds = aiplatform.TabularDataset( 'projects/my-project/locations/us-central1/datasets/12345') - job.run(ds, replica_count=1, model_display_name='my-trained-model') + job.run( + ds, + replica_count=1, + model_display_name='my-trained-model', + model_labels={'key': 'value'}, + ) Usage without Dataset: @@ -1447,6 +1498,16 @@ def __init__( credentials (auth_credentials.Credentials): Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -1481,6 +1542,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, container_uri=container_uri, @@ -1515,6 +1577,7 @@ def run( ] = None, annotation_schema_uri: Optional[str] = None, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, base_output_dir: Optional[str] = None, service_account: Optional[str] = None, network: Optional[str] = None, @@ -1594,6 +1657,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. base_output_dir (str): GCS output directory of job. If not provided a timestamped directory in the staging directory will be used. @@ -1696,6 +1769,7 @@ def run( """ worker_pool_specs, managed_model = self._prepare_and_validate_run( model_display_name=model_display_name, + model_labels=model_labels, replica_count=replica_count, machine_type=machine_type, accelerator_count=accelerator_count, @@ -1937,6 +2011,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, @@ -1949,14 +2024,21 @@ def __init__( command=['python3', 'run_script.py'] model_serving_container_image_uri='gcr.io/my-trainer/serving:1', model_serving_container_predict_route='predict', - model_serving_container_health_route='metadata) + model_serving_container_health_route='metadata, + labels={'key': 'value'}, + ) Usage with Dataset: ds = aiplatform.TabularDataset( 'projects/my-project/locations/us-central1/datasets/12345') - job.run(ds, replica_count=1, model_display_name='my-trained-model') + job.run( + ds, + replica_count=1, + model_display_name='my-trained-model', + model_labels={'key': 'value'}, + ) Usage without Dataset: @@ -2070,6 +2152,16 @@ def __init__( credentials (auth_credentials.Credentials): Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -2104,6 +2196,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, container_uri=container_uri, @@ -2137,6 +2230,7 @@ def run( ] = None, annotation_schema_uri: Optional[str] = None, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, base_output_dir: Optional[str] = None, service_account: Optional[str] = None, network: Optional[str] = None, @@ -2209,6 +2303,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. base_output_dir (str): GCS output directory of job. If not provided a timestamped directory in the staging directory will be used. @@ -2316,6 +2420,7 @@ def run( """ worker_pool_specs, managed_model = self._prepare_and_validate_run( model_display_name=model_display_name, + model_labels=model_labels, replica_count=replica_count, machine_type=machine_type, accelerator_count=accelerator_count, @@ -2532,6 +2637,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -2544,6 +2650,7 @@ def __init__( optimization_prediction_type="classification", optimization_objective="minimize-log-loss", column_specs={"column_1": "auto", "column_2": "numeric"}, + labels={'key': 'value'}, ) Args: @@ -2627,6 +2734,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -2661,6 +2778,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -2704,6 +2822,7 @@ def run( weight_column: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: bool = False, export_evaluated_data_items: bool = False, export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, @@ -2774,6 +2893,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. disable_early_stopping (bool): Required. If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means @@ -2812,6 +2941,10 @@ def run( Raises: RuntimeError: If Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) if self._is_waiting_to_run(): raise RuntimeError("AutoML Tabular Training is already scheduled to run.") @@ -2829,6 +2962,7 @@ def run( weight_column=weight_column, budget_milli_node_hours=budget_milli_node_hours, model_display_name=model_display_name, + model_labels=model_labels, disable_early_stopping=disable_early_stopping, export_evaluated_data_items=export_evaluated_data_items, export_evaluated_data_items_bigquery_destination_uri=export_evaluated_data_items_bigquery_destination_uri, @@ -2848,6 +2982,7 @@ def _run( weight_column: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: bool = False, export_evaluated_data_items: bool = False, export_evaluated_data_items_bigquery_destination_uri: Optional[str] = None, @@ -2918,6 +3053,16 @@ def _run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. disable_early_stopping (bool): Required. If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means @@ -3008,11 +3153,9 @@ def _run( "additionalExperiments" ] = self._additional_experiments - if model_display_name is None: - model_display_name = self._display_name - model = gca_model.Model( - display_name=model_display_name, + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, encryption_spec=self._model_encryption_spec, ) @@ -3088,6 +3231,7 @@ class AutoMLForecastingTrainingJob(_TrainingJob): def __init__( self, display_name: str, + labels: Optional[Dict[str, str]] = None, optimization_objective: Optional[str] = None, column_transformations: Optional[Union[Dict, List[Dict]]] = None, project: Optional[str] = None, @@ -3099,6 +3243,16 @@ def __init__( Args: display_name (str): Required. The user-defined name of this TrainingPipeline. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. optimization_objective (str): Optional. Objective function the model is to be optimized towards. The training process creates a Model that optimizes the value of the objective @@ -3130,6 +3284,7 @@ def __init__( """ super().__init__( display_name=display_name, + labels=labels, project=project, location=location, credentials=credentials, @@ -3160,6 +3315,7 @@ def run( validation_options: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -3279,6 +3435,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -3291,6 +3457,11 @@ def run( RuntimeError if Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + if self._is_waiting_to_run(): raise RuntimeError( "AutoML Forecasting Training is already scheduled to run." @@ -3320,6 +3491,7 @@ def run( quantiles=quantiles, validation_options=validation_options, model_display_name=model_display_name, + model_labels=model_labels, sync=sync, ) @@ -3346,6 +3518,7 @@ def _run( validation_options: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -3464,6 +3637,16 @@ def _run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -3515,10 +3698,10 @@ def _run( "additionalExperiments" ] = self._additional_experiments - if model_display_name is None: - model_display_name = self._display_name - - model = gca_model.Model(display_name=model_display_name) + model = gca_model.Model( + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, + ) return self._run_job( training_task_definition=training_task_definition, @@ -3564,6 +3747,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -3629,6 +3813,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -3689,6 +3883,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -3706,6 +3901,7 @@ def run( test_fraction_split: float = 0.1, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: bool = False, sync: bool = True, ) -> models.Model: @@ -3752,6 +3948,16 @@ def run( Optional. The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. disable_early_stopping: bool = False Required. If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means @@ -3770,6 +3976,11 @@ def run( RuntimeError: If Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + if self._is_waiting_to_run(): raise RuntimeError("AutoML Image Training is already scheduled to run.") @@ -3784,6 +3995,7 @@ def run( test_fraction_split=test_fraction_split, budget_milli_node_hours=budget_milli_node_hours, model_display_name=model_display_name, + model_labels=model_labels, disable_early_stopping=disable_early_stopping, sync=sync, ) @@ -3798,6 +4010,7 @@ def _run( test_fraction_split: float = 0.1, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, disable_early_stopping: bool = False, sync: bool = True, ) -> models.Model: @@ -3852,6 +4065,16 @@ def _run( characters. If a `base_model` was provided, the display_name in the base_model will be overritten with this value. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. disable_early_stopping (bool): Required. If true, the entire budget is used. This disables the early stopping feature. By default, the early stopping feature is enabled, which means @@ -3888,6 +4111,7 @@ def _run( model_tbt = gca_model.Model(encryption_spec=self._model_encryption_spec) model_tbt.display_name = model_display_name or self._display_name + model_tbt.labels = model_labels or self._labels if base_model: # Use provided base_model to pass to model_to_upload causing the @@ -3945,6 +4169,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, staging_bucket: Optional[str] = None, @@ -3958,7 +4183,8 @@ def __init__( container_uri='gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest', model_serving_container_image_uri='gcr.io/my-trainer/serving:1', model_serving_container_predict_route='predict', - model_serving_container_health_route='metadata + model_serving_container_health_route='metadata, + labels={'key': 'value'}, ) Usage with Dataset: @@ -3970,14 +4196,16 @@ def __init__( job.run( ds, replica_count=1, - model_display_name='my-trained-model' + model_display_name='my-trained-model', + model_labels={'key': 'value'}, ) Usage without Dataset: job.run( replica_count=1, - model_display_name='my-trained-model' + model_display_name='my-trained-model', + model_labels={'key': 'value'}, ) To ensure your model gets saved in Vertex AI, write your saved model to @@ -4086,6 +4314,16 @@ def __init__( credentials (auth_credentials.Credentials): Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -4120,6 +4358,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, container_uri=container_uri, @@ -4152,6 +4391,7 @@ def run( ] = None, annotation_schema_uri: Optional[str] = None, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, base_output_dir: Optional[str] = None, service_account: Optional[str] = None, network: Optional[str] = None, @@ -4224,6 +4464,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. base_output_dir (str): GCS output directory of job. If not provided a timestamped directory in the staging directory will be used. @@ -4326,6 +4576,7 @@ def run( """ worker_pool_specs, managed_model = self._prepare_and_validate_run( model_display_name=model_display_name, + model_labels=model_labels, replica_count=replica_count, machine_type=machine_type, accelerator_count=accelerator_count, @@ -4530,6 +4781,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -4579,6 +4831,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -4628,6 +4890,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -4641,6 +4904,7 @@ def run( training_fraction_split: float = 0.8, test_fraction_split: float = 0.2, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the AutoML Image training job and returns a model. @@ -4669,6 +4933,16 @@ def run( Optional. The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync: bool = True Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -4681,6 +4955,11 @@ def run( RuntimeError: If Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + if self._is_waiting_to_run(): raise RuntimeError("AutoML Video Training is already scheduled to run.") @@ -4692,6 +4971,7 @@ def run( training_fraction_split=training_fraction_split, test_fraction_split=test_fraction_split, model_display_name=model_display_name, + model_labels=model_labels, sync=sync, ) @@ -4702,6 +4982,7 @@ def _run( training_fraction_split: float = 0.8, test_fraction_split: float = 0.2, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -4732,6 +5013,16 @@ def _run( characters. If a `base_model` was provided, the display_name in the base_model will be overritten with this value. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -4754,6 +5045,7 @@ def _run( # gca Model to be trained model_tbt = gca_model.Model(encryption_spec=self._model_encryption_spec) model_tbt.display_name = model_display_name or self._display_name + model_tbt.labels = model_labels or self._labels return self._run_job( training_task_definition=training_task_definition, @@ -4790,6 +5082,7 @@ def __init__( project: Optional[str] = None, location: Optional[str] = None, credentials: Optional[auth_credentials.Credentials] = None, + labels: Optional[Dict[str, str]] = None, training_encryption_spec_key_name: Optional[str] = None, model_encryption_spec_key_name: Optional[str] = None, ): @@ -4833,6 +5126,16 @@ def __init__( credentials (auth_credentials.Credentials): Optional. Custom credentials to use to run call training service. Overrides credentials set in aiplatform.init. + labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize TrainingPipelines. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. training_encryption_spec_key_name (Optional[str]): Optional. The Cloud KMS resource identifier of the customer managed encryption key used to protect the training pipeline. Has the @@ -4864,6 +5167,7 @@ def __init__( project=project, location=location, credentials=credentials, + labels=labels, training_encryption_spec_key_name=training_encryption_spec_key_name, model_encryption_spec_key_name=model_encryption_spec_key_name, ) @@ -4908,6 +5212,7 @@ def run( validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -4941,6 +5246,16 @@ def run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -4952,6 +5267,11 @@ def run( RuntimeError: If Training job has already been run or is waiting to run. """ + if model_display_name: + utils.validate_display_name(model_display_name) + if model_labels: + utils.validate_labels(model_labels) + if self._is_waiting_to_run(): raise RuntimeError("AutoML Text Training is already scheduled to run.") @@ -4964,6 +5284,7 @@ def run( validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, model_display_name=model_display_name, + model_labels=model_labels, sync=sync, ) @@ -4975,6 +5296,7 @@ def _run( validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, model_display_name: Optional[str] = None, + model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. @@ -5010,6 +5332,16 @@ def _run( of any UTF-8 characters. If not provided upon creation, the job's display_name is used. + model_labels (Dict[str, str]): + Optional. The labels with user-defined metadata to + organize your Models. + Label keys and values can be no longer than 64 + characters (Unicode codepoints), can only + contain lowercase letters, numeric characters, + underscores and dashes. International characters + are allowed. + See https://goo.gl/xmQnxf for more information + and examples of labels. sync (bool): Whether to execute this method synchronously. If False, this method will be executed in concurrent Future and any downstream object will @@ -5020,11 +5352,9 @@ def _run( produce a Vertex AI Model. """ - if model_display_name is None: - model_display_name = self._display_name - model = gca_model.Model( - display_name=model_display_name, + display_name=model_display_name or self._display_name, + labels=model_labels or self._labels, encryption_spec=self._model_encryption_spec, ) diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py index bf57c86908..d239b9b441 100644 --- a/google/cloud/aiplatform/utils/__init__.py +++ b/google/cloud/aiplatform/utils/__init__.py @@ -22,7 +22,7 @@ from collections import namedtuple import logging import re -from typing import Any, Match, Optional, Type, TypeVar, Tuple +from typing import Any, Dict, Match, Optional, Type, TypeVar, Tuple from google.api_core import client_options from google.api_core import gapic_v1 @@ -239,6 +239,22 @@ def validate_display_name(display_name: str): raise ValueError("Display name needs to be less than 128 characters.") +def validate_labels(labels: Dict[str, str]): + """Validate labels. + + Args: + labels: labels to verify + Raises: + ValueError: if labels is not a mapping of string key value pairs. + """ + for k, v in labels.items(): + if not isinstance(k, str) or not isinstance(v, str): + raise ValueError( + "Expect labels to be a mapping of string key value pairs. " + 'Got "{}".'.format(labels) + ) + + def validate_region(region: str) -> bool: """Validates region against supported regions. diff --git a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py index d7b2e85001..d699563327 100644 --- a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py @@ -103,6 +103,8 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} _TEST_TRAINING_FRACTION_SPLIT = 0.8 _TEST_VALIDATION_FRACTION_SPLIT = 0.1 _TEST_TEST_FRACTION_SPLIT = 0.1 @@ -228,6 +230,7 @@ def test_run_call_pipeline_service_create( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + labels=_TEST_LABELS, ) model_from_job = job.run( @@ -241,6 +244,7 @@ def test_run_call_pipeline_service_create( data_granularity_unit=_TEST_TRAINING_DATA_GRANULARITY_UNIT, data_granularity_count=_TEST_TRAINING_DATA_GRANULARITY_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, time_series_attribute_columns=_TEST_TRAINING_TIME_SERIES_ATTRIBUTE_COLUMNS, @@ -263,7 +267,9 @@ def test_run_call_pipeline_service_create( test_fraction=_TEST_TEST_FRACTION_SPLIT, ) - true_managed_model = gca_model.Model(display_name=_TEST_MODEL_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, @@ -275,6 +281,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, @@ -300,7 +307,7 @@ def test_run_call_pipeline_service_create( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_time_series, @@ -313,6 +320,7 @@ def test_run_call_pipeline_if_no_model_display_name( display_name=_TEST_DISPLAY_NAME, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + labels=_TEST_LABELS, ) model_from_job = job.run( @@ -347,7 +355,9 @@ def test_run_call_pipeline_if_no_model_display_name( ) # Test that if defaults to the job display name - true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, @@ -356,6 +366,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_forecasting, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_automl_image_training_jobs.py b/tests/unit/aiplatform/test_automl_image_training_jobs.py index 29ce61a8a1..a46f960b1c 100644 --- a/tests/unit/aiplatform/test_automl_image_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_image_training_jobs.py @@ -46,6 +46,9 @@ _TEST_MODEL_DISPLAY_NAME = "model-display-name" _TEST_MODEL_ID = "98777645321" +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_TRAINING_TASK_INPUTS = json_format.ParseDict( { "modelType": "CLOUD", @@ -251,12 +254,15 @@ def test_run_call_pipeline_service_create( ) job = training_jobs.AutoMLImageTrainingJob( - display_name=_TEST_DISPLAY_NAME, base_model=mock_model_image + display_name=_TEST_DISPLAY_NAME, + base_model=mock_model_image, + labels=_TEST_LABELS, ) model_from_job = job.run( dataset=mock_dataset_image, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, @@ -276,6 +282,7 @@ def test_run_call_pipeline_service_create( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=mock_model_image._gca_resource.labels, description=mock_model_image._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -286,6 +293,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_image_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, model_to_upload=true_managed_model, @@ -307,7 +315,7 @@ def test_run_call_pipeline_service_create( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_image, @@ -318,6 +326,7 @@ def test_run_call_pipeline_if_no_model_display_name( job = training_jobs.AutoMLImageTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_encryption_spec_key_name=_TEST_PIPELINE_ENCRYPTION_KEY_NAME, model_encryption_spec_key_name=_TEST_MODEL_ENCRYPTION_KEY_NAME, ) @@ -342,7 +351,9 @@ def test_run_call_pipeline_if_no_model_display_name( # Test that if defaults to the job display name true_managed_model = gca_model.Model( - display_name=_TEST_DISPLAY_NAME, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC + display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, + encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( @@ -351,6 +362,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_image_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py index 02ddad688b..2c380206e4 100644 --- a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py @@ -136,6 +136,10 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" + +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_TRAINING_FRACTION_SPLIT = 0.6 _TEST_VALIDATION_FRACTION_SPLIT = 0.2 _TEST_TEST_FRACTION_SPLIT = 0.2 @@ -308,6 +312,7 @@ def test_run_call_pipeline_service_create( job = training_jobs.AutoMLTabularTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -319,6 +324,7 @@ def test_run_call_pipeline_service_create( dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, @@ -344,6 +350,7 @@ def test_run_call_pipeline_service_create( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -357,6 +364,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_tabular, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, @@ -476,7 +484,7 @@ def test_run_call_pipeline_service_create_with_export_eval_data_items( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_tabular, @@ -487,6 +495,7 @@ def test_run_call_pipeline_if_no_model_display_name( job = training_jobs.AutoMLTabularTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, @@ -522,7 +531,9 @@ def test_run_call_pipeline_if_no_model_display_name( # Test that if defaults to the job display name true_managed_model = gca_model.Model( - display_name=_TEST_DISPLAY_NAME, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC + display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, + encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( @@ -531,6 +542,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_tabular, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_automl_text_training_jobs.py b/tests/unit/aiplatform/test_automl_text_training_jobs.py index 4d7cd60527..583789c00e 100644 --- a/tests/unit/aiplatform/test_automl_text_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_text_training_jobs.py @@ -42,6 +42,10 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" + +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_MODEL_ID = "98777645321" _TEST_TRAINING_TASK_INPUTS_CLASSIFICATION = training_job_inputs.AutoMlTextClassificationInputs( @@ -319,6 +323,7 @@ def test_run_call_pipeline_service_create_classification( job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, multi_label=_TEST_CLASSIFICATION_MULTILABEL, training_encryption_spec_key_name=_TEST_PIPELINE_ENCRYPTION_KEY_NAME, @@ -328,6 +333,7 @@ def test_run_call_pipeline_service_create_classification( model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, @@ -345,6 +351,7 @@ def test_run_call_pipeline_service_create_classification( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) @@ -354,6 +361,7 @@ def test_run_call_pipeline_service_create_classification( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, @@ -388,12 +396,14 @@ def test_run_call_pipeline_service_create_extraction( job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_EXTRACTION, ) model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, @@ -409,7 +419,9 @@ def test_run_call_pipeline_service_create_extraction( test_fraction=_TEST_FRACTION_SPLIT_TEST, ) - true_managed_model = gca_model.Model(display_name=_TEST_MODEL_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS, + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, @@ -417,6 +429,7 @@ def test_run_call_pipeline_service_create_extraction( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_text_extraction, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_EXTRACTION, model_to_upload=true_managed_model, @@ -450,6 +463,7 @@ def test_run_call_pipeline_service_create_sentiment( job = training_jobs.AutoMLTextTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_SENTIMENT, sentiment_max=10, ) @@ -457,6 +471,7 @@ def test_run_call_pipeline_service_create_sentiment( model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, @@ -472,7 +487,9 @@ def test_run_call_pipeline_service_create_sentiment( test_fraction=_TEST_FRACTION_SPLIT_TEST, ) - true_managed_model = gca_model.Model(display_name=_TEST_MODEL_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, @@ -480,6 +497,7 @@ def test_run_call_pipeline_service_create_sentiment( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_text_sentiment, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_SENTIMENT, model_to_upload=true_managed_model, @@ -500,7 +518,7 @@ def test_run_call_pipeline_service_create_sentiment( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_text, @@ -514,6 +532,7 @@ def test_run_call_pipeline_if_no_model_display_name( display_name=_TEST_DISPLAY_NAME, prediction_type="classification", multi_label=True, + labels=_TEST_LABELS, ) model_from_job = job.run( @@ -535,7 +554,9 @@ def test_run_call_pipeline_if_no_model_display_name( ) # Test that if defaults to the job display name - true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, @@ -543,6 +564,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_text_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_automl_video_training_jobs.py b/tests/unit/aiplatform/test_automl_video_training_jobs.py index b3087d0eed..fc7d6f38e3 100644 --- a/tests/unit/aiplatform/test_automl_video_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_video_training_jobs.py @@ -43,6 +43,10 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" + +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_MODEL_ID = "98777645321" # TODO _TEST_TRAINING_TASK_INPUTS = json_format.ParseDict( @@ -290,6 +294,7 @@ def test_run_call_pipeline_service_create( job = training_jobs.AutoMLVideoTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_VCN, model_type=_TEST_MODEL_TYPE_CLOUD, training_encryption_spec_key_name=_TEST_PIPELINE_ENCRYPTION_KEY_NAME, @@ -299,6 +304,7 @@ def test_run_call_pipeline_service_create( model_from_job = job.run( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, sync=sync, @@ -314,6 +320,7 @@ def test_run_call_pipeline_service_create( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=mock_model._gca_resource.description, encryption_spec=_TEST_MODEL_ENCRYPTION_SPEC, ) @@ -324,6 +331,7 @@ def test_run_call_pipeline_service_create( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_video_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, @@ -345,7 +353,7 @@ def test_run_call_pipeline_service_create( @pytest.mark.usefixtures("mock_pipeline_service_get") @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_if_no_model_display_name( + def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_dataset_video, @@ -356,6 +364,7 @@ def test_run_call_pipeline_if_no_model_display_name( job = training_jobs.AutoMLVideoTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, prediction_type=_TEST_PREDICTION_TYPE_VCN, model_type=_TEST_MODEL_TYPE_CLOUD, ) @@ -375,7 +384,9 @@ def test_run_call_pipeline_if_no_model_display_name( ) # Test that if defaults to the job display name - true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) + true_managed_model = gca_model.Model( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, + ) true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, dataset_id=mock_dataset_video.name, @@ -383,6 +394,7 @@ def test_run_call_pipeline_if_no_model_display_name( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.automl_video_classification, training_task_inputs=_TEST_TRAINING_TASK_INPUTS, model_to_upload=true_managed_model, diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index 363ad18048..da4fc1fbe7 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -87,6 +87,8 @@ _TEST_TIMEOUT = 8000 _TEST_RESTART_JOB_ON_WORKER_RESTART = True +_TEST_LABELS = {"my_key": "my_value"} + _TEST_BASE_CUSTOM_JOB_PROTO = gca_custom_job_compat.CustomJob( display_name=_TEST_DISPLAY_NAME, job_spec=gca_custom_job_compat.CustomJobSpec( @@ -101,6 +103,7 @@ service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, ), + labels=_TEST_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -228,6 +231,7 @@ def test_create_custom_job(self, create_custom_job_mock, get_custom_job_mock, sy display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC, base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, ) job.run( @@ -271,6 +275,7 @@ def test_run_custom_job_with_fail_raises( display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC, base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, ) with pytest.raises(RuntimeError) as e: @@ -395,6 +400,7 @@ def test_create_from_local_script( script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, ) job.run(sync=sync) @@ -441,6 +447,7 @@ def test_create_custom_job_with_tensorboard( display_name=_TEST_DISPLAY_NAME, worker_pool_specs=_TEST_WORKER_POOL_SPEC, base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, ) job.run( diff --git a/tests/unit/aiplatform/test_datasets.py b/tests/unit/aiplatform/test_datasets.py index 3457ccc7bd..7864ca0d35 100644 --- a/tests/unit/aiplatform/test_datasets.py +++ b/tests/unit/aiplatform/test_datasets.py @@ -144,6 +144,8 @@ _TEST_LIST_FILTER = 'display_name="abc"' _TEST_LIST_ORDER_BY = "create_time desc" +_TEST_LABELS = {"my_key": "my_value"} + @pytest.fixture def get_dataset_mock(): @@ -946,6 +948,34 @@ def test_create_then_import( expected_dataset.name = _TEST_NAME assert my_dataset._gca_resource == expected_dataset + @pytest.mark.usefixtures("get_dataset_image_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_dataset_with_labels(self, create_dataset_mock, sync): + aiplatform.init( + project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, + ) + + my_dataset = datasets.ImageDataset.create( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, sync=sync, + ) + + if not sync: + my_dataset.wait() + + expected_dataset = gca_dataset.Dataset( + display_name=_TEST_DISPLAY_NAME, + metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_IMAGE, + metadata=_TEST_NONTABULAR_DATASET_METADATA, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + + create_dataset_mock.assert_called_once_with( + parent=_TEST_PARENT, + dataset=expected_dataset, + metadata=_TEST_REQUEST_METADATA, + ) + class TestTabularDataset: def setup_method(self): @@ -1165,6 +1195,35 @@ def test_tabular_dataset_column_name_bigquery(self): ] ) + @pytest.mark.usefixtures("get_dataset_tabular_bq_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_dataset_with_labels(self, create_dataset_mock, sync): + + my_dataset = datasets.TabularDataset.create( + display_name=_TEST_DISPLAY_NAME, + bq_source=_TEST_SOURCE_URI_BQ, + labels=_TEST_LABELS, + encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, + sync=sync, + ) + + if not sync: + my_dataset.wait() + + expected_dataset = gca_dataset.Dataset( + display_name=_TEST_DISPLAY_NAME, + metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_TABULAR, + metadata=_TEST_METADATA_TABULAR_BQ, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + + create_dataset_mock.assert_called_once_with( + parent=_TEST_PARENT, + dataset=expected_dataset, + metadata=_TEST_REQUEST_METADATA, + ) + class TestTextDataset: def setup_method(self): @@ -1364,6 +1423,34 @@ def test_create_then_import( expected_dataset.name = _TEST_NAME assert my_dataset._gca_resource == expected_dataset + @pytest.mark.usefixtures("get_dataset_text_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_dataset_with_labels(self, create_dataset_mock, sync): + aiplatform.init( + project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME, + ) + + my_dataset = datasets.TextDataset.create( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, sync=sync, + ) + + if not sync: + my_dataset.wait() + + expected_dataset = gca_dataset.Dataset( + display_name=_TEST_DISPLAY_NAME, + metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_TEXT, + metadata=_TEST_NONTABULAR_DATASET_METADATA, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + + create_dataset_mock.assert_called_once_with( + parent=_TEST_PARENT, + dataset=expected_dataset, + metadata=_TEST_REQUEST_METADATA, + ) + class TestVideoDataset: def setup_method(self): @@ -1525,3 +1612,31 @@ def test_create_then_import( expected_dataset.name = _TEST_NAME assert my_dataset._gca_resource == expected_dataset + + @pytest.mark.usefixtures("get_dataset_video_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_dataset_with_labels(self, create_dataset_mock, sync): + aiplatform.init( + project=_TEST_PROJECT, encryption_spec_key_name=_TEST_ENCRYPTION_KEY_NAME + ) + + my_dataset = datasets.VideoDataset.create( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, sync=sync, + ) + + if not sync: + my_dataset.wait() + + expected_dataset = gca_dataset.Dataset( + display_name=_TEST_DISPLAY_NAME, + metadata_schema_uri=_TEST_METADATA_SCHEMA_URI_VIDEO, + metadata=_TEST_NONTABULAR_DATASET_METADATA, + labels=_TEST_LABELS, + encryption_spec=_TEST_ENCRYPTION_SPEC, + ) + + create_dataset_mock.assert_called_once_with( + parent=_TEST_PARENT, + dataset=expected_dataset, + metadata=_TEST_REQUEST_METADATA, + ) diff --git a/tests/unit/aiplatform/test_endpoints.py b/tests/unit/aiplatform/test_endpoints.py index 0ae76ea988..9dfc2db481 100644 --- a/tests/unit/aiplatform/test_endpoints.py +++ b/tests/unit/aiplatform/test_endpoints.py @@ -167,6 +167,8 @@ _TEST_LIST_ORDER_BY_CREATE_TIME = "create_time desc" _TEST_LIST_ORDER_BY_DISPLAY_NAME = "display_name" +_TEST_LABELS = {"my_key": "my_value"} + @pytest.fixture def get_endpoint_mock(): @@ -527,6 +529,22 @@ def test_create_with_description(self, create_endpoint_mock, sync): parent=_TEST_PARENT, endpoint=expected_endpoint, metadata=(), ) + @pytest.mark.usefixtures("get_endpoint_mock") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_with_labels(self, create_endpoint_mock, sync): + my_endpoint = models.Endpoint.create( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, sync=sync + ) + if not sync: + my_endpoint.wait() + + expected_endpoint = gca_endpoint.Endpoint( + display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, + ) + create_endpoint_mock.assert_called_once_with( + parent=_TEST_PARENT, endpoint=expected_endpoint, metadata=(), + ) + @pytest.mark.usefixtures("get_endpoint_mock", "get_model_mock") @pytest.mark.parametrize("sync", [True, False]) def test_deploy(self, deploy_model_mock, sync): diff --git a/tests/unit/aiplatform/test_hyperparameter_tuning_job.py b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py index 752d39a93c..d82071db4f 100644 --- a/tests/unit/aiplatform/test_hyperparameter_tuning_job.py +++ b/tests/unit/aiplatform/test_hyperparameter_tuning_job.py @@ -78,6 +78,7 @@ _TEST_SEARCH_ALGORITHM = "random" _TEST_MEASUREMENT_SELECTION = "best" +_TEST_LABELS = {"my_hp_key": "my_hp_value"} _TEST_BASE_HYPERPARAMETER_TUNING_JOB_PROTO = gca_hyperparameter_tuning_job_compat.HyperparameterTuningJob( display_name=_TEST_DISPLAY_NAME, @@ -123,6 +124,7 @@ max_trial_count=_TEST_MAX_TRIAL_COUNT, max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, trial_job_spec=test_custom_job._TEST_BASE_CUSTOM_JOB_PROTO.job_spec, + labels=_TEST_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -283,6 +285,7 @@ def test_create_hyperparameter_tuning_job( max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, search_algorithm=_TEST_SEARCH_ALGORITHM, measurement_selection=_TEST_MEASUREMENT_SELECTION, + labels=_TEST_LABELS, ) job.run( @@ -345,6 +348,7 @@ def test_run_hyperparameter_tuning_job_with_fail_raises( max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, search_algorithm=_TEST_SEARCH_ALGORITHM, measurement_selection=_TEST_MEASUREMENT_SELECTION, + labels=_TEST_LABELS, ) with pytest.raises(RuntimeError): @@ -524,6 +528,7 @@ def test_create_hyperparameter_tuning_job_with_tensorboard( max_failed_trial_count=_TEST_MAX_FAILED_TRIAL_COUNT, search_algorithm=_TEST_SEARCH_ALGORITHM, measurement_selection=_TEST_MEASUREMENT_SELECTION, + labels=_TEST_LABELS, ) job.run( diff --git a/tests/unit/aiplatform/test_models.py b/tests/unit/aiplatform/test_models.py index 600b880d14..89a6dc9ca7 100644 --- a/tests/unit/aiplatform/test_models.py +++ b/tests/unit/aiplatform/test_models.py @@ -599,6 +599,42 @@ def test_upload_uploads_and_gets_model( get_model_mock.assert_called_once_with(name=_TEST_MODEL_RESOURCE_NAME) + @pytest.mark.parametrize("sync", [True, False]) + def test_upload_uploads_and_gets_model_with_labels( + self, upload_model_mock, get_model_mock, sync + ): + + my_model = models.Model.upload( + display_name=_TEST_MODEL_NAME, + serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, + serving_container_predict_route=_TEST_SERVING_CONTAINER_PREDICTION_ROUTE, + serving_container_health_route=_TEST_SERVING_CONTAINER_HEALTH_ROUTE, + labels=_TEST_LABEL, + sync=sync, + ) + + if not sync: + my_model.wait() + + container_spec = gca_model.ModelContainerSpec( + image_uri=_TEST_SERVING_CONTAINER_IMAGE, + predict_route=_TEST_SERVING_CONTAINER_PREDICTION_ROUTE, + health_route=_TEST_SERVING_CONTAINER_HEALTH_ROUTE, + ) + + managed_model = gca_model.Model( + display_name=_TEST_MODEL_NAME, + container_spec=container_spec, + labels=_TEST_LABEL, + ) + + upload_model_mock.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + model=managed_model, + ) + + get_model_mock.assert_called_once_with(name=_TEST_MODEL_RESOURCE_NAME) + def test_upload_raises_with_impartial_explanation_spec(self): with pytest.raises(ValueError) as e: @@ -633,6 +669,7 @@ def test_upload_uploads_and_gets_model_with_all_args( serving_container_ports=_TEST_SERVING_CONTAINER_PORTS, explanation_metadata=_TEST_EXPLANATION_METADATA, explanation_parameters=_TEST_EXPLANATION_PARAMETERS, + labels=_TEST_LABEL, sync=sync, ) @@ -673,6 +710,7 @@ def test_upload_uploads_and_gets_model_with_all_args( metadata=_TEST_EXPLANATION_METADATA, parameters=_TEST_EXPLANATION_PARAMETERS, ), + labels=_TEST_LABEL, ) upload_model_with_explanations_mock.assert_called_once_with( diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py index c639c462cb..15824d3172 100644 --- a/tests/unit/aiplatform/test_training_jobs.py +++ b/tests/unit/aiplatform/test_training_jobs.py @@ -96,6 +96,9 @@ _TEST_INVALID_ACCELERATOR_TYPE = "NVIDIA_DOES_NOT_EXIST" _TEST_ACCELERATOR_COUNT = 1 _TEST_MODEL_DISPLAY_NAME = "model-display-name" +_TEST_LABELS = {"key": "value"} +_TEST_MODEL_LABELS = {"model_key": "model_value"} + _TEST_DEFAULT_TRAINING_FRACTION_SPLIT = 0.8 _TEST_DEFAULT_VALIDATION_FRACTION_SPLIT = 0.1 _TEST_DEFAULT_TEST_FRACTION_SPLIT = 0.1 @@ -630,6 +633,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( job = training_jobs.CustomTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, script_path=_TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, model_serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, @@ -656,6 +660,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, @@ -723,6 +728,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -761,6 +767,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( ), model_to_upload=true_managed_model, input_data_config=true_input_data_config, + labels=_TEST_LABELS, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) @@ -1588,7 +1595,7 @@ def test_get_and_return_subclass_custom(self): assert isinstance(subcls, aiplatform.training_jobs.CustomTrainingJob) @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_service_create_with_nontabular_dataset( + def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_pipeline_service_get, @@ -1605,6 +1612,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( job = training_jobs.CustomTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, script_path=_TEST_LOCAL_SCRIPT_FILE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, model_serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, @@ -1628,7 +1636,6 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, - model_display_name=_TEST_MODEL_DISPLAY_NAME, sync=sync, ) @@ -1685,7 +1692,8 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( ) true_managed_model = gca_model.Model( - display_name=_TEST_MODEL_DISPLAY_NAME, + display_name=_TEST_DISPLAY_NAME + "-model", + labels=_TEST_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -1706,6 +1714,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { @@ -1846,6 +1855,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( job = training_jobs.CustomContainerTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, command=_TEST_TRAINING_CONTAINER_CMD, model_serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, @@ -1870,6 +1880,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, @@ -1931,6 +1942,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -1954,6 +1966,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { @@ -2645,6 +2658,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( job = training_jobs.CustomContainerTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, command=_TEST_TRAINING_CONTAINER_CMD, model_serving_container_image_uri=_TEST_SERVING_CONTAINER_IMAGE, @@ -2671,6 +2685,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, sync=sync, ) @@ -2721,6 +2736,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -2755,6 +2771,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( ), model_to_upload=true_managed_model, input_data_config=true_input_data_config, + labels=_TEST_LABELS, ) mock_pipeline_service_create.assert_called_once_with( @@ -3080,6 +3097,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( job = training_jobs.CustomPythonPackageTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, python_package_gcs_uri=_TEST_OUTPUT_PYTHON_PACKAGE_PATH, python_module_name=_TEST_PYTHON_MODULE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, @@ -3099,6 +3117,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( model_from_job = job.run( dataset=mock_tabular_dataset, model_display_name=_TEST_MODEL_DISPLAY_NAME, + model_labels=_TEST_MODEL_LABELS, base_output_dir=_TEST_BASE_OUTPUT_DIR, service_account=_TEST_SERVICE_ACCOUNT, network=_TEST_NETWORK, @@ -3167,6 +3186,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, + labels=_TEST_MODEL_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -3190,6 +3210,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { @@ -3227,7 +3248,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_display_name( + def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_pipeline_service_get, @@ -3243,6 +3264,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis job = training_jobs.CustomPythonPackageTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, python_package_gcs_uri=_TEST_OUTPUT_PYTHON_PACKAGE_PATH, python_module_name=_TEST_PYTHON_MODULE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, @@ -3322,6 +3344,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME + "-model", + labels=_TEST_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -3345,6 +3368,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { @@ -4032,7 +4056,7 @@ def test_run_call_pipeline_service_create_distributed_training( assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED @pytest.mark.parametrize("sync", [True, False]) - def test_run_call_pipeline_service_create_with_nontabular_dataset( + def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_display_name_nor_model_labels( self, mock_pipeline_service_create, mock_pipeline_service_get, @@ -4047,6 +4071,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( job = training_jobs.CustomPythonPackageTrainingJob( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, python_package_gcs_uri=_TEST_OUTPUT_PYTHON_PACKAGE_PATH, python_module_name=_TEST_PYTHON_MODULE_NAME, container_uri=_TEST_TRAINING_CONTAINER_IMAGE, @@ -4071,7 +4096,6 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, - model_display_name=_TEST_MODEL_DISPLAY_NAME, service_account=_TEST_SERVICE_ACCOUNT, tensorboard=_TEST_TENSORBOARD_RESOURCE_NAME, sync=sync, @@ -4124,7 +4148,8 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( ) true_managed_model = gca_model.Model( - display_name=_TEST_MODEL_DISPLAY_NAME, + display_name=_TEST_DISPLAY_NAME + "-model", + labels=_TEST_LABELS, description=_TEST_MODEL_DESCRIPTION, container_spec=true_container_spec, predict_schemata=gca_model.PredictSchemata( @@ -4145,6 +4170,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( true_training_pipeline = gca_training_pipeline.TrainingPipeline( display_name=_TEST_DISPLAY_NAME, + labels=_TEST_LABELS, training_task_definition=schema.training_job.definition.custom_task, training_task_inputs=json_format.ParseDict( { diff --git a/tests/unit/aiplatform/test_utils.py b/tests/unit/aiplatform/test_utils.py index 068575fc51..ed85fb9f0a 100644 --- a/tests/unit/aiplatform/test_utils.py +++ b/tests/unit/aiplatform/test_utils.py @@ -257,6 +257,20 @@ def test_validate_display_name(): aiplatform.utils.validate_display_name("my_model_abc") +def test_validate_labels_raises_value_not_str(): + with pytest.raises(ValueError): + aiplatform.utils.validate_labels({"my_key1": 1, "my_key2": 2}) + + +def test_validate_labels_raises_key_not_str(): + with pytest.raises(ValueError): + aiplatform.utils.validate_labels({1: "my_value1", 2: "my_value2"}) + + +def test_validate_labels(): + aiplatform.utils.validate_labels({"my_key1": "my_value1", "my_key2": "my_value2"}) + + @pytest.mark.parametrize( "accelerator_type, expected", [ From 3612b05c62dfb46822cd2c1798fd47349dba33bc Mon Sep 17 00:00:00 2001 From: Vinny Senthil Date: Thu, 12 Aug 2021 07:48:40 -0700 Subject: [PATCH 11/28] chore: Update testing configs and structure for speed (#578) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Initial test config changes * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/master/packages/owl-bot/README.md * Update build configs to match google3 job configs * Rename required presubmit CI check * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Force OwlBot to avoid overwriting Kokoro configs * Undo OwlBot overwrite * Update OwlBot.py * Fix double install of SDK in sample nox sessions * Empty requirements.txt files to trigger Sample CI * Update Nox env var to use spaces, not csv * Update parallel dist in presubmit * Address reviewer comments, final changes * Limit presubmit to unit-3.6, widen release presubmit to all unit tests * Update presubmit to unit-3.7 only * Unit 3.6, 3.7 in presubmit to check coverage * Presubmit to unit-3.8 Co-authored-by: Owl Bot --- .github/sync-repo-settings.yaml | 3 +-- .kokoro/continuous/system.cfg | 10 ++++++++++ .kokoro/continuous/unit.cfg | 11 +++++++++++ .kokoro/presubmit/presubmit.cfg | 14 +++++++++++++- .kokoro/presubmit/release.cfg | 11 +++++++++++ owlbot.py | 1 + samples/model-builder/noxfile_config.py | 4 +++- samples/model-builder/requirements-test.txt | 2 ++ samples/model-builder/requirements-tests.txt | 1 - samples/model-builder/requirements.txt | 3 +-- samples/snippets/noxfile_config.py | 3 ++- samples/snippets/requirements-test.txt | 2 ++ samples/snippets/requirements-tests.txt | 1 - samples/snippets/requirements.txt | 4 +--- setup.py | 2 +- 15 files changed, 59 insertions(+), 13 deletions(-) create mode 100644 .kokoro/continuous/system.cfg create mode 100644 .kokoro/continuous/unit.cfg create mode 100644 .kokoro/presubmit/release.cfg create mode 100644 samples/model-builder/requirements-test.txt delete mode 100644 samples/model-builder/requirements-tests.txt create mode 100644 samples/snippets/requirements-test.txt delete mode 100644 samples/snippets/requirements-tests.txt diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index dc9c647dbb..ab4509fa20 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -8,6 +8,5 @@ branchProtectionRules: requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: - - 'Kokoro' - 'cla/google' - - 'Samples - Lint' + - 'Presubmit - Unit Tests' diff --git a/.kokoro/continuous/system.cfg b/.kokoro/continuous/system.cfg new file mode 100644 index 0000000000..eaa9edaab5 --- /dev/null +++ b/.kokoro/continuous/system.cfg @@ -0,0 +1,10 @@ +env_vars: { + key: "NOX_SESSION" + value: "system-3.8" +} + +# Run system tests in parallel, splitting up by file +env_vars: { + key: "PYTEST_ADDOPTS" + value: "-n=auto --dist=loadfile" +} diff --git a/.kokoro/continuous/unit.cfg b/.kokoro/continuous/unit.cfg new file mode 100644 index 0000000000..52c7230be9 --- /dev/null +++ b/.kokoro/continuous/unit.cfg @@ -0,0 +1,11 @@ +# Run all unit test sessions, in Python 3.6 to 3.9 +env_vars: { + key: "NOX_SESSION" + value: "unit" +} + +# Run unit tests in parallel, splitting up by test +env_vars: { + key: "PYTEST_ADDOPTS" + value: "-n=auto" +} diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index 8f43917d92..f351292171 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -1 +1,13 @@ -# Format: //devtools/kokoro/config/proto/build.proto \ No newline at end of file +# Format: //devtools/kokoro/config/proto/build.proto + +# Run all sessions except system tests and docs builds +env_vars: { + key: "NOX_SESSION" + value: "unit-3.8 lint lint_setup_py blacken cover" +} + +# Run unit tests in parallel, splitting up by file +env_vars: { + key: "PYTEST_ADDOPTS" + value: "-n=auto --dist=loadfile" +} diff --git a/.kokoro/presubmit/release.cfg b/.kokoro/presubmit/release.cfg new file mode 100644 index 0000000000..b9398805e5 --- /dev/null +++ b/.kokoro/presubmit/release.cfg @@ -0,0 +1,11 @@ +# Run system tests in presubmit for library releases +env_vars: { + key: "NOX_SESSION" + value: "system-3.8 unit" +} + +# Run system tests in parallel, splitting up by file +env_vars: { + key: "PYTEST_ADDOPTS" + value: "-n=auto --dist=loadfile" +} diff --git a/owlbot.py b/owlbot.py index e8df4c990b..3c1e32b044 100644 --- a/owlbot.py +++ b/owlbot.py @@ -86,6 +86,7 @@ templated_files, excludes=[ ".coveragerc", + ".kokoro/**/*.cfg" ] ) # the microgenerator has a good coveragerc file diff --git a/samples/model-builder/noxfile_config.py b/samples/model-builder/noxfile_config.py index 95a168ea45..d83f6320cb 100644 --- a/samples/model-builder/noxfile_config.py +++ b/samples/model-builder/noxfile_config.py @@ -38,5 +38,7 @@ "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. - "envs": {}, + "envs": { + "PYTEST_ADDOPTS": "-n=auto" # Run tests parallel using all available CPUs + }, } diff --git a/samples/model-builder/requirements-test.txt b/samples/model-builder/requirements-test.txt new file mode 100644 index 0000000000..aeab33a1d6 --- /dev/null +++ b/samples/model-builder/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==6.2.4 +pytest-xdist \ No newline at end of file diff --git a/samples/model-builder/requirements-tests.txt b/samples/model-builder/requirements-tests.txt deleted file mode 100644 index f53c4c11a6..0000000000 --- a/samples/model-builder/requirements-tests.txt +++ /dev/null @@ -1 +0,0 @@ -pytest >= 6.2 diff --git a/samples/model-builder/requirements.txt b/samples/model-builder/requirements.txt index efe811b2c3..d09caa45e0 100644 --- a/samples/model-builder/requirements.txt +++ b/samples/model-builder/requirements.txt @@ -1,2 +1 @@ -pytest >= 6.2 -git+https://github.com/googleapis/python-aiplatform.git@mb-release#egg=google-cloud-aiplatform \ No newline at end of file +google-cloud-aiplatform diff --git a/samples/snippets/noxfile_config.py b/samples/snippets/noxfile_config.py index a64317c506..d1502d6bf6 100644 --- a/samples/snippets/noxfile_config.py +++ b/samples/snippets/noxfile_config.py @@ -29,6 +29,7 @@ # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. "envs": { - "DATA_LABELING_API_ENDPOINT": "us-central1-autopush-aiplatform.sandbox.googleapis.com" + "DATA_LABELING_API_ENDPOINT": "us-central1-autopush-aiplatform.sandbox.googleapis.com", + "PYTEST_ADDOPTS": "-n=auto" # Run tests parallel using all available CPUs }, } diff --git a/samples/snippets/requirements-test.txt b/samples/snippets/requirements-test.txt new file mode 100644 index 0000000000..f8eeaaa725 --- /dev/null +++ b/samples/snippets/requirements-test.txt @@ -0,0 +1,2 @@ +pytest==6.2.4 +pytest-xdist diff --git a/samples/snippets/requirements-tests.txt b/samples/snippets/requirements-tests.txt deleted file mode 100644 index 7e460c8c86..0000000000 --- a/samples/snippets/requirements-tests.txt +++ /dev/null @@ -1 +0,0 @@ -pytest==6.0.1 diff --git a/samples/snippets/requirements.txt b/samples/snippets/requirements.txt index 76902b2424..d09caa45e0 100644 --- a/samples/snippets/requirements.txt +++ b/samples/snippets/requirements.txt @@ -1,3 +1 @@ -pytest==6.2.4 -google-cloud-storage>=1.26.0, <2.0.0dev -google-cloud-aiplatform==1.3.0 +google-cloud-aiplatform diff --git a/setup.py b/setup.py index 02d1bf881b..f6eeaca9fe 100644 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ full_extra_require = list( set(tensorboard_extra_require + metadata_extra_require + xai_extra_require) ) -testing_extra_require = full_extra_require + ["grpcio-testing"] +testing_extra_require = full_extra_require + ["grpcio-testing", "pytest-xdist"] setuptools.setup( From b42bf0daa113e234b7dc0324b3a083a6b3143c94 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Thu, 12 Aug 2021 16:28:23 +0000 Subject: [PATCH 12/28] chore(python): avoid `.nox` directories when building docs (#604) Source-Link: https://github.com/googleapis/synthtool/commit/7e1f6da50524b5d98eb67adbf6dd0805df54233d Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:a1a891041baa4ffbe1a809ac1b8b9b4a71887293c9101c88e8e255943c5aec2d --- .github/.OwlBot.lock.yaml | 2 +- docs/conf.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index 9ee60f7e48..b771c37cae 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:aea14a583128771ae8aefa364e1652f3c56070168ef31beb203534222d842b8b + digest: sha256:a1a891041baa4ffbe1a809ac1b8b9b4a71887293c9101c88e8e255943c5aec2d diff --git a/docs/conf.py b/docs/conf.py index 8d3ff953e6..5e1669c22d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -110,6 +110,7 @@ # directories to ignore when looking for source files. exclude_patterns = [ "_build", + "**/.nox/**/*", "samples/AUTHORING_GUIDE.md", "samples/CONTRIBUTING.md", "samples/snippets/README.rst", From 355ea24c6dd9b061ae0933df4dd07dd5b8c2232b Mon Sep 17 00:00:00 2001 From: Morgan Du Date: Thu, 12 Aug 2021 11:58:35 -0700 Subject: [PATCH 13/28] feat: expose boot disk type and size for CustomTrainingJob, CustomPythonPackageTrainingJob, and CustomContainerTrainingJob (#602) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Thank you for opening a Pull Request! Before submitting your PR, there are a few things you can do to make sure it goes smoothly: - [x] Make sure to open an issue as a [bug/issue](https://github.com/googleapis/python-aiplatform/issues/new/choose) before writing your code! That way we can discuss the change, evaluate designs, and agree on the general idea - [x] Ensure the tests and linter pass - [x] Code coverage does not decrease (if any source code was changed) - [x] Appropriate docs were updated (if necessary) Fixes # 🦕 - Change `_MachineSpec` to `_WorkerPoolSpec` - Add `boot_disk_type` and `boot_disk_size_gb` in `_MachineSpec` and `_DistributedTrainingSpec` - Expose `boot_disk_type` and `boot_disk_size_gb` in `run` with default values of `CustomTrainingJob`, `CustomPythonPackageTrainingJob`, and `CustomContainerTrainingJob` - Add in `boot_disk_type` and `boot_disk_size_gb` in `_CustomTrainingJob._prepare_and_validate_run` - Expose `boot_disk_type` and `boot_disk_size_gb` in `CustomJob.from_local_script` - Update TypeHint for command line arguments `args` to be passed to the Python task - Modify unit tests to for default and overwrite boot disk config --- google/cloud/aiplatform/jobs.py | 15 +- google/cloud/aiplatform/training_jobs.py | 44 ++++ .../aiplatform/utils/worker_spec_utils.py | 59 ++++-- tests/unit/aiplatform/test_custom_job.py | 43 +++- tests/unit/aiplatform/test_end_to_end.py | 8 + tests/unit/aiplatform/test_training_jobs.py | 188 ++++++++++++++++-- 6 files changed, 322 insertions(+), 35 deletions(-) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 720aa46b21..20d8141a22 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -1070,13 +1070,15 @@ def from_local_script( display_name: str, script_path: str, container_uri: str, - args: Optional[List[Union[str, float, int]]] = None, + args: Optional[Sequence[str]] = None, requirements: Optional[Sequence[str]] = None, environment_variables: Optional[Dict[str, str]] = None, replica_count: int = 1, machine_type: str = "n1-standard-4", accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", accelerator_count: int = 0, + boot_disk_type: str = "pd-ssd", + boot_disk_size_gb: int = 100, base_output_dir: Optional[str] = None, project: Optional[str] = None, location: Optional[str] = None, @@ -1110,7 +1112,7 @@ def from_local_script( Required. Local path to training script. container_uri (str): Required: Uri of the training container image to use for custom job. - args (Optional[List[Union[str, float, int]]]): + args (Optional[Sequence[str]]): Optional. Command line arguments to be passed to the Python task. requirements (Sequence[str]): Optional. List of python packages dependencies of script. @@ -1136,6 +1138,13 @@ def from_local_script( NVIDIA_TESLA_T4 accelerator_count (int): Optional. The number of accelerators to attach to a worker replica. + boot_disk_type (str): + Optional. Type of the boot disk, default is `pd-ssd`. + Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or + `pd-standard` (Persistent Disk Hard Disk Drive). + boot_disk_size_gb (int): + Optional. Size in GB of the boot disk, default is 100GB. + boot disk size must be within the range of [100, 64000]. base_output_dir (str): Optional. GCS output directory of job. If not provided a timestamped directory in the staging directory will be used. @@ -1188,6 +1197,8 @@ def from_local_script( machine_type=machine_type, accelerator_count=accelerator_count, accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, ).pool_specs python_packager = source_utils._TrainingScriptPythonPackager( diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index db7db10f2f..52418096be 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -1139,6 +1139,8 @@ def _prepare_and_validate_run( machine_type: str = "n1-standard-4", accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", accelerator_count: int = 0, + boot_disk_type: str = "pd-ssd", + boot_disk_size_gb: int = 100, ) -> Tuple[worker_spec_utils._DistributedTrainingSpec, Optional[gca_model.Model]]: """Create worker pool specs and managed model as well validating the run. @@ -1172,6 +1174,13 @@ def _prepare_and_validate_run( NVIDIA_TESLA_T4 accelerator_count (int): The number of accelerators to attach to a worker replica. + boot_disk_type (str): + Type of the boot disk, default is `pd-ssd`. + Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or + `pd-standard` (Persistent Disk Hard Disk Drive). + boot_disk_size_gb (int): + Size in GB of the boot disk, default is 100GB. + boot disk size must be within the range of [100, 64000]. Returns: Worker pools specs and managed model for run. @@ -1204,6 +1213,8 @@ def _prepare_and_validate_run( machine_type=machine_type, accelerator_count=accelerator_count, accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, ).pool_specs managed_model = self._managed_model @@ -1588,6 +1599,8 @@ def run( machine_type: str = "n1-standard-4", accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", accelerator_count: int = 0, + boot_disk_type: str = "pd-ssd", + boot_disk_size_gb: int = 100, training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, @@ -1724,6 +1737,13 @@ def run( NVIDIA_TESLA_T4 accelerator_count (int): The number of accelerators to attach to a worker replica. + boot_disk_type (str): + Type of the boot disk, default is `pd-ssd`. + Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or + `pd-standard` (Persistent Disk Hard Disk Drive). + boot_disk_size_gb (int): + Size in GB of the boot disk, default is 100GB. + boot disk size must be within the range of [100, 64000]. training_fraction_split (float): The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. @@ -1774,6 +1794,8 @@ def run( machine_type=machine_type, accelerator_count=accelerator_count, accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, ) # make and copy package @@ -2241,6 +2263,8 @@ def run( machine_type: str = "n1-standard-4", accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", accelerator_count: int = 0, + boot_disk_type: str = "pd-ssd", + boot_disk_size_gb: int = 100, training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, @@ -2370,6 +2394,13 @@ def run( NVIDIA_TESLA_T4 accelerator_count (int): The number of accelerators to attach to a worker replica. + boot_disk_type (str): + Type of the boot disk, default is `pd-ssd`. + Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or + `pd-standard` (Persistent Disk Hard Disk Drive). + boot_disk_size_gb (int): + Size in GB of the boot disk, default is 100GB. + boot disk size must be within the range of [100, 64000]. training_fraction_split (float): The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. @@ -2425,6 +2456,8 @@ def run( machine_type=machine_type, accelerator_count=accelerator_count, accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, ) return self._run( @@ -4402,6 +4435,8 @@ def run( machine_type: str = "n1-standard-4", accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", accelerator_count: int = 0, + boot_disk_type: str = "pd-ssd", + boot_disk_size_gb: int = 100, training_fraction_split: float = 0.8, validation_fraction_split: float = 0.1, test_fraction_split: float = 0.1, @@ -4531,6 +4566,13 @@ def run( NVIDIA_TESLA_T4 accelerator_count (int): The number of accelerators to attach to a worker replica. + boot_disk_type (str): + Type of the boot disk, default is `pd-ssd`. + Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or + `pd-standard` (Persistent Disk Hard Disk Drive). + boot_disk_size_gb (int): + Size in GB of the boot disk, default is 100GB. + boot disk size must be within the range of [100, 64000]. training_fraction_split (float): The fraction of the input data that is to be used to train the Model. This is ignored if Dataset is not provided. @@ -4581,6 +4623,8 @@ def run( machine_type=machine_type, accelerator_count=accelerator_count, accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, ) return self._run( diff --git a/google/cloud/aiplatform/utils/worker_spec_utils.py b/google/cloud/aiplatform/utils/worker_spec_utils.py index 385ac83979..1c0b60540f 100644 --- a/google/cloud/aiplatform/utils/worker_spec_utils.py +++ b/google/cloud/aiplatform/utils/worker_spec_utils.py @@ -22,16 +22,19 @@ ) -class _MachineSpec(NamedTuple): - """Specification container for Machine specs used for distributed training. +class _WorkerPoolSpec(NamedTuple): + """Specification container for Worker Pool specs used for distributed training. Usage: - spec = _MachineSpec( + spec = _WorkerPoolSpec( replica_count=10, machine_type='n1-standard-4', accelerator_count=2, - accelerator_type='NVIDIA_TESLA_K80') + accelerator_type='NVIDIA_TESLA_K80', + boot_disk_type='pd-ssd', + boot_disk_size_gb=100, + ) Note that container and python package specs are not stored with this spec. """ @@ -40,6 +43,8 @@ class _MachineSpec(NamedTuple): machine_type: str = "n1-standard-4" accelerator_count: int = 0 accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED" + boot_disk_type: str = "pd-ssd" + boot_disk_size_gb: int = 100 def _get_accelerator_type(self) -> Optional[str]: """Validates accelerator_type and returns the name of the accelerator. @@ -70,7 +75,12 @@ def spec_dict(self) -> Dict[str, Union[int, str, Dict[str, Union[int, str]]]]: spec = { "machine_spec": {"machine_type": self.machine_type}, "replica_count": self.replica_count, + "disk_spec": { + "boot_disk_type": self.boot_disk_type, + "boot_disk_size_gb": self.boot_disk_size_gb, + }, } + accelerator_type = self._get_accelerator_type() if accelerator_type and self.accelerator_count: spec["machine_spec"]["accelerator_type"] = accelerator_type @@ -98,25 +108,29 @@ class _DistributedTrainingSpec(NamedTuple): Usage: dist_training_spec = _DistributedTrainingSpec( - chief_spec = _MachineSpec( + chief_spec = _WorkerPoolSpec( replica_count=1, machine_type='n1-standard-4', accelerator_count=2, - accelerator_type='NVIDIA_TESLA_K80' - ), - worker_spec = _MachineSpec( + accelerator_type='NVIDIA_TESLA_K80', + boot_disk_type='pd-ssd', + boot_disk_size_gb=100, + ), + worker_spec = _WorkerPoolSpec( replica_count=10, machine_type='n1-standard-4', accelerator_count=2, - accelerator_type='NVIDIA_TESLA_K80' - ) + accelerator_type='NVIDIA_TESLA_K80', + boot_disk_type='pd-ssd', + boot_disk_size_gb=100, + ), ) """ - chief_spec: _MachineSpec = _MachineSpec() - worker_spec: _MachineSpec = _MachineSpec() - parameter_server_spec: _MachineSpec = _MachineSpec() - evaluator_spec: _MachineSpec = _MachineSpec() + chief_spec: _WorkerPoolSpec = _WorkerPoolSpec() + worker_spec: _WorkerPoolSpec = _WorkerPoolSpec() + parameter_server_spec: _WorkerPoolSpec = _WorkerPoolSpec() + evaluator_spec: _WorkerPoolSpec = _WorkerPoolSpec() @property def pool_specs( @@ -156,6 +170,8 @@ def chief_worker_pool( machine_type: str = "n1-standard-4", accelerator_count: int = 0, accelerator_type: str = "ACCELERATOR_TYPE_UNSPECIFIED", + boot_disk_type: str = "pd-ssd", + boot_disk_size_gb: int = 100, ) -> "_DistributedTrainingSpec": """Parameterizes Config to support only chief with worker replicas. @@ -174,6 +190,13 @@ def chief_worker_pool( NVIDIA_TESLA_T4 accelerator_count (int): The number of accelerators to attach to a worker replica. + boot_disk_type (str): + Type of the boot disk (default is `pd-ssd`). + Valid values: `pd-ssd` (Persistent Disk Solid State Drive) or + `pd-standard` (Persistent Disk Hard Disk Drive). + boot_disk_size_gb (int): + Size in GB of the boot disk (default is 100GB). + boot disk size must be within the range of [100, 64000]. Returns: _DistributedTrainingSpec representing one chief and n workers all of same @@ -182,18 +205,22 @@ def chief_worker_pool( if replica_count <= 0: return cls() - chief_spec = _MachineSpec( + chief_spec = _WorkerPoolSpec( replica_count=1, machine_type=machine_type, accelerator_count=accelerator_count, accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, ) - worker_spec = _MachineSpec( + worker_spec = _WorkerPoolSpec( replica_count=replica_count - 1, machine_type=machine_type, accelerator_count=accelerator_count, accelerator_type=accelerator_type, + boot_disk_type=boot_disk_type, + boot_disk_size_gb=boot_disk_size_gb, ) return cls(chief_spec=chief_spec, worker_spec=worker_spec) diff --git a/tests/unit/aiplatform/test_custom_job.py b/tests/unit/aiplatform/test_custom_job.py index da4fc1fbe7..f44a1471cc 100644 --- a/tests/unit/aiplatform/test_custom_job.py +++ b/tests/unit/aiplatform/test_custom_job.py @@ -54,6 +54,8 @@ _TEST_TRAINING_CONTAINER_IMAGE = "gcr.io/test-training/container:image" +_TEST_RUN_ARGS = ["-v", "0.1", "--test=arg"] + _TEST_WORKER_POOL_SPEC = [ { "machine_spec": { @@ -62,10 +64,11 @@ "accelerator_count": 1, }, "replica_count": 1, + "disk_spec": {"boot_disk_type": "pd-ssd", "boot_disk_size_gb": 100}, "container_spec": { "image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "command": [], - "args": [], + "args": _TEST_RUN_ARGS, }, } ] @@ -490,3 +493,41 @@ def test_create_custom_job_without_base_output_dir(self,): assert job.job_spec.base_output_directory.output_uri_prefix.startswith( f"{_TEST_STAGING_BUCKET}/aiplatform-custom-job" ) + + @pytest.mark.usefixtures("mock_python_package_to_gcs") + @pytest.mark.parametrize("sync", [True, False]) + def test_create_from_local_script_with_all_args( + self, get_custom_job_mock, create_custom_job_mock, sync + ): + aiplatform.init( + project=_TEST_PROJECT, + location=_TEST_LOCATION, + staging_bucket=_TEST_STAGING_BUCKET, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + # configuration on this is tested in test_training_jobs.py + job = aiplatform.CustomJob.from_local_script( + display_name=_TEST_DISPLAY_NAME, + script_path=test_training_jobs._TEST_LOCAL_SCRIPT_FILE_NAME, + container_uri=_TEST_TRAINING_CONTAINER_IMAGE, + args=_TEST_RUN_ARGS, + requirements=test_training_jobs._TEST_REQUIREMENTS, + environment_variables=test_training_jobs._TEST_ENVIRONMENT_VARIABLES, + replica_count=test_training_jobs._TEST_REPLICA_COUNT, + machine_type=test_training_jobs._TEST_MACHINE_TYPE, + accelerator_type=test_training_jobs._TEST_ACCELERATOR_TYPE, + accelerator_count=test_training_jobs._TEST_ACCELERATOR_COUNT, + boot_disk_type=test_training_jobs._TEST_BOOT_DISK_TYPE, + boot_disk_size_gb=test_training_jobs._TEST_BOOT_DISK_SIZE_GB, + base_output_dir=_TEST_BASE_OUTPUT_DIR, + labels=_TEST_LABELS, + ) + + job.run(sync=sync) + + job.wait() + + assert ( + job._gca_resource.state == gca_job_state_compat.JobState.JOB_STATE_SUCCEEDED + ) diff --git a/tests/unit/aiplatform/test_end_to_end.py b/tests/unit/aiplatform/test_end_to_end.py index 35006a3e95..d9e0788f39 100644 --- a/tests/unit/aiplatform/test_end_to_end.py +++ b/tests/unit/aiplatform/test_end_to_end.py @@ -211,6 +211,10 @@ def test_dataset_create_to_model_predict( "accelerator_type": test_training_jobs._TEST_ACCELERATOR_TYPE, "accelerator_count": test_training_jobs._TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": test_training_jobs._TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": test_training_jobs._TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, @@ -394,6 +398,10 @@ def test_dataset_create_to_model_predict_with_pipeline_fail( "accelerator_type": test_training_jobs._TEST_ACCELERATOR_TYPE, "accelerator_count": test_training_jobs._TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": test_training_jobs._TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": test_training_jobs._TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": test_training_jobs._TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py index 15824d3172..3e694e6a1e 100644 --- a/tests/unit/aiplatform/test_training_jobs.py +++ b/tests/unit/aiplatform/test_training_jobs.py @@ -95,6 +95,10 @@ _TEST_ACCELERATOR_TYPE = "NVIDIA_TESLA_K80" _TEST_INVALID_ACCELERATOR_TYPE = "NVIDIA_DOES_NOT_EXIST" _TEST_ACCELERATOR_COUNT = 1 +_TEST_BOOT_DISK_TYPE_DEFAULT = "pd-ssd" +_TEST_BOOT_DISK_SIZE_GB_DEFAULT = 100 +_TEST_BOOT_DISK_TYPE = "pd-standard" +_TEST_BOOT_DISK_SIZE_GB = 300 _TEST_MODEL_DISPLAY_NAME = "model-display-name" _TEST_LABELS = {"key": "value"} _TEST_MODEL_LABELS = {"model_key": "model_value"} @@ -691,6 +695,10 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, @@ -858,6 +866,10 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, @@ -1136,6 +1148,10 @@ def test_run_call_pipeline_service_create_with_no_dataset( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, @@ -1392,6 +1408,10 @@ def test_run_call_pipeline_service_create_distributed_training( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, @@ -1407,6 +1427,10 @@ def test_run_call_pipeline_service_create_distributed_training( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, @@ -1657,6 +1681,10 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_ "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": source_utils._TrainingScriptPythonPackager.module_name, @@ -1906,6 +1934,10 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "containerSpec": { "imageUri": _TEST_TRAINING_CONTAINER_IMAGE, "command": _TEST_TRAINING_CONTAINER_CMD, @@ -2063,6 +2095,10 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "containerSpec": { "imageUri": _TEST_TRAINING_CONTAINER_IMAGE, "command": _TEST_TRAINING_CONTAINER_CMD, @@ -2323,6 +2359,10 @@ def test_run_call_pipeline_service_create_with_no_dataset( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "containerSpec": { "imageUri": _TEST_TRAINING_CONTAINER_IMAGE, "command": _TEST_TRAINING_CONTAINER_CMD, @@ -2556,6 +2596,10 @@ def test_run_call_pipeline_service_create_distributed_training( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "containerSpec": { "imageUri": _TEST_TRAINING_CONTAINER_IMAGE, "command": _TEST_TRAINING_CONTAINER_CMD, @@ -2569,6 +2613,10 @@ def test_run_call_pipeline_service_create_distributed_training( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "containerSpec": { "imageUri": _TEST_TRAINING_CONTAINER_IMAGE, "command": _TEST_TRAINING_CONTAINER_CMD, @@ -2701,6 +2749,10 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "containerSpec": { "imageUri": _TEST_TRAINING_CONTAINER_IMAGE, "command": _TEST_TRAINING_CONTAINER_CMD, @@ -2830,9 +2882,9 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_raises_if_anno ) -class Test_MachineSpec: +class Test_WorkerPoolSpec: def test_machine_spec_return_spec_dict(self): - test_spec = worker_spec_utils._MachineSpec( + test_spec = worker_spec_utils._WorkerPoolSpec( replica_count=_TEST_REPLICA_COUNT, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, @@ -2846,12 +2898,41 @@ def test_machine_spec_return_spec_dict(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": _TEST_REPLICA_COUNT, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, + } + + assert test_spec.spec_dict == true_spec_dict + + def test_machine_spec_return_spec_with_boot_disk_dict(self): + test_spec = worker_spec_utils._WorkerPoolSpec( + replica_count=_TEST_REPLICA_COUNT, + machine_type=_TEST_MACHINE_TYPE, + accelerator_count=_TEST_ACCELERATOR_COUNT, + accelerator_type=_TEST_ACCELERATOR_TYPE, + boot_disk_type=_TEST_BOOT_DISK_TYPE, + boot_disk_size_gb=_TEST_BOOT_DISK_SIZE_GB, + ) + + true_spec_dict = { + "machine_spec": { + "machine_type": _TEST_MACHINE_TYPE, + "accelerator_type": _TEST_ACCELERATOR_TYPE, + "accelerator_count": _TEST_ACCELERATOR_COUNT, + }, + "replica_count": _TEST_REPLICA_COUNT, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB, + }, } assert test_spec.spec_dict == true_spec_dict def test_machine_spec_return_spec_dict_with_no_accelerator(self): - test_spec = worker_spec_utils._MachineSpec( + test_spec = worker_spec_utils._WorkerPoolSpec( replica_count=_TEST_REPLICA_COUNT, machine_type=_TEST_MACHINE_TYPE, accelerator_count=0, @@ -2861,12 +2942,16 @@ def test_machine_spec_return_spec_dict_with_no_accelerator(self): true_spec_dict = { "machine_spec": {"machine_type": _TEST_MACHINE_TYPE}, "replica_count": _TEST_REPLICA_COUNT, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, } assert test_spec.spec_dict == true_spec_dict def test_machine_spec_spec_dict_raises_invalid_accelerator(self): - test_spec = worker_spec_utils._MachineSpec( + test_spec = worker_spec_utils._WorkerPoolSpec( replica_count=_TEST_REPLICA_COUNT, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, @@ -2877,7 +2962,7 @@ def test_machine_spec_spec_dict_raises_invalid_accelerator(self): test_spec.spec_dict def test_machine_spec_spec_dict_is_empty(self): - test_spec = worker_spec_utils._MachineSpec( + test_spec = worker_spec_utils._WorkerPoolSpec( replica_count=0, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, @@ -2887,7 +2972,7 @@ def test_machine_spec_spec_dict_is_empty(self): assert test_spec.is_empty def test_machine_spec_spec_dict_is_not_empty(self): - test_spec = worker_spec_utils._MachineSpec( + test_spec = worker_spec_utils._WorkerPoolSpec( replica_count=_TEST_REPLICA_COUNT, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, @@ -2901,25 +2986,25 @@ class Test_DistributedTrainingSpec: def test_machine_spec_returns_pool_spec(self): spec = worker_spec_utils._DistributedTrainingSpec( - chief_spec=worker_spec_utils._MachineSpec( + chief_spec=worker_spec_utils._WorkerPoolSpec( replica_count=1, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, accelerator_type=_TEST_ACCELERATOR_TYPE, ), - worker_spec=worker_spec_utils._MachineSpec( + worker_spec=worker_spec_utils._WorkerPoolSpec( replica_count=10, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, accelerator_type=_TEST_ACCELERATOR_TYPE, ), - parameter_server_spec=worker_spec_utils._MachineSpec( + parameter_server_spec=worker_spec_utils._WorkerPoolSpec( replica_count=3, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, accelerator_type=_TEST_ACCELERATOR_TYPE, ), - evaluator_spec=worker_spec_utils._MachineSpec( + evaluator_spec=worker_spec_utils._WorkerPoolSpec( replica_count=1, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, @@ -2935,6 +3020,10 @@ def test_machine_spec_returns_pool_spec(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 1, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, }, { "machine_spec": { @@ -2943,6 +3032,10 @@ def test_machine_spec_returns_pool_spec(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 10, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, }, { "machine_spec": { @@ -2951,6 +3044,10 @@ def test_machine_spec_returns_pool_spec(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 3, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, }, { "machine_spec": { @@ -2959,6 +3056,10 @@ def test_machine_spec_returns_pool_spec(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 1, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, }, ] @@ -2981,6 +3082,10 @@ def test_chief_worker_pool_returns_spec(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 1, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, }, { "machine_spec": { @@ -2989,6 +3094,10 @@ def test_chief_worker_pool_returns_spec(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 9, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, }, ] @@ -3011,6 +3120,10 @@ def test_chief_worker_pool_returns_just_chief(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 1, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, } ] @@ -3019,7 +3132,7 @@ def test_chief_worker_pool_returns_just_chief(self): def test_machine_spec_raise_with_more_than_one_chief_replica(self): spec = worker_spec_utils._DistributedTrainingSpec( - chief_spec=worker_spec_utils._MachineSpec( + chief_spec=worker_spec_utils._WorkerPoolSpec( replica_count=2, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, @@ -3033,20 +3146,20 @@ def test_machine_spec_raise_with_more_than_one_chief_replica(self): def test_machine_spec_handles_missing_pools(self): spec = worker_spec_utils._DistributedTrainingSpec( - chief_spec=worker_spec_utils._MachineSpec( + chief_spec=worker_spec_utils._WorkerPoolSpec( replica_count=1, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, accelerator_type=_TEST_ACCELERATOR_TYPE, ), - worker_spec=worker_spec_utils._MachineSpec(replica_count=0), - parameter_server_spec=worker_spec_utils._MachineSpec( + worker_spec=worker_spec_utils._WorkerPoolSpec(replica_count=0), + parameter_server_spec=worker_spec_utils._WorkerPoolSpec( replica_count=3, machine_type=_TEST_MACHINE_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, accelerator_type=_TEST_ACCELERATOR_TYPE, ), - evaluator_spec=worker_spec_utils._MachineSpec(replica_count=0), + evaluator_spec=worker_spec_utils._WorkerPoolSpec(replica_count=0), ) true_pool_spec = [ @@ -3057,8 +3170,19 @@ def test_machine_spec_handles_missing_pools(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 1, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, + }, + { + "machine_spec": {"machine_type": "n1-standard-4"}, + "replica_count": 0, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, }, - {"machine_spec": {"machine_type": "n1-standard-4"}, "replica_count": 0}, { "machine_spec": { "machine_type": _TEST_MACHINE_TYPE, @@ -3066,6 +3190,10 @@ def test_machine_spec_handles_missing_pools(self): "accelerator_count": _TEST_ACCELERATOR_COUNT, }, "replica_count": 3, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, }, ] @@ -3149,6 +3277,10 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": _TEST_PYTHON_MODULE_NAME, @@ -3308,6 +3440,10 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": _TEST_PYTHON_MODULE_NAME, @@ -3462,6 +3598,10 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": _TEST_PYTHON_MODULE_NAME, @@ -3727,6 +3867,10 @@ def test_run_call_pipeline_service_create_with_no_dataset( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": _TEST_PYTHON_MODULE_NAME, @@ -3967,6 +4111,10 @@ def test_run_call_pipeline_service_create_distributed_training( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": _TEST_PYTHON_MODULE_NAME, @@ -3981,6 +4129,10 @@ def test_run_call_pipeline_service_create_distributed_training( "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": _TEST_PYTHON_MODULE_NAME, @@ -4113,6 +4265,10 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_ "accelerator_type": _TEST_ACCELERATOR_TYPE, "accelerator_count": _TEST_ACCELERATOR_COUNT, }, + "disk_spec": { + "boot_disk_type": _TEST_BOOT_DISK_TYPE_DEFAULT, + "boot_disk_size_gb": _TEST_BOOT_DISK_SIZE_GB_DEFAULT, + }, "python_package_spec": { "executor_image_uri": _TEST_TRAINING_CONTAINER_IMAGE, "python_module": _TEST_PYTHON_MODULE_NAME, From 5f15b4f9a4bad2c9447747a8bdebaa99eab00b75 Mon Sep 17 00:00:00 2001 From: Yu-Han Liu Date: Thu, 12 Aug 2021 17:05:42 -0700 Subject: [PATCH 14/28] feat: split GAPIC samples by service (#599) * feat: split GAPIC samples by service --- .../create_dataset_image_sample.py | 0 .../create_dataset_image_sample_test.py | 2 +- .../create_dataset_sample.py | 0 .../create_dataset_sample_test.py | 2 +- .../create_dataset_tabular_bigquery_sample.py | 0 ...te_dataset_tabular_bigquery_sample_test.py | 2 +- .../create_dataset_tabular_gcs_sample.py | 0 .../create_dataset_tabular_gcs_sample_test.py | 2 +- .../create_dataset_text_sample.py | 0 .../create_dataset_video_sample.py | 0 .../create_dataset_video_sample_test.py | 2 +- .../delete_dataset_sample.py | 0 ...mage_classification_single_label_sample.py | 0 ...port_data_image_object_detection_sample.py | 0 .../import_data_sample.py | 0 ...text_classification_single_label_sample.py | 0 ...port_data_text_entity_extraction_sample.py | 0 ...data_text_entity_extraction_sample_test.py | 19 +++++++++++++++--- ...ort_data_text_sentiment_analysis_sample.py | 0 ...ata_text_sentiment_analysis_sample_test.py | 19 +++++++++++++++--- ...rt_data_video_action_recognition_sample.py | 0 ...ta_video_action_recognition_sample_test.py | 3 +-- ...import_data_video_classification_sample.py | 0 ...t_data_video_classification_sample_test.py | 3 +-- ...mport_data_video_object_tracking_sample.py | 0 ..._data_video_object_tracking_sample_test.py | 3 +-- .../create_endpoint_sample.py | 0 .../create_endpoint_sample_test.py | 2 +- .../delete_endpoint_sample.py | 0 ...eploy_model_custom_trained_model_sample.py | 0 ..._model_custom_trained_model_sample_test.py | 2 +- .../deploy_model_sample.py | 0 .../deploy_model_sample_test.py | 2 +- .../cancel_batch_prediction_job_sample.py | 0 .../cancel_custom_job_sample.py | 0 .../cancel_data_labeling_job_sample.py | 0 ...cancel_hyperparameter_tuning_job_sample.py | 0 ...te_batch_prediction_job_bigquery_sample.py | 0 ...tch_prediction_job_bigquery_sample_test.py | 2 +- .../create_batch_prediction_job_sample.py | 0 ...create_batch_prediction_job_sample_test.py | 2 +- ...ediction_job_tabular_forecasting_sample.py | 0 ...ion_job_tabular_forecasting_sample_test.py | 2 +- ...ediction_job_text_classification_sample.py | 0 ...ion_job_text_classification_sample_test.py | 2 +- ...ction_job_text_entity_extraction_sample.py | 0 ..._job_text_entity_extraction_sample_test.py | 2 +- ...tion_job_text_sentiment_analysis_sample.py | 0 ...job_text_sentiment_analysis_sample_test.py | 2 +- ...ion_job_video_action_recognition_sample.py | 0 ...ob_video_action_recognition_sample_test.py | 2 +- ...diction_job_video_classification_sample.py | 0 ...on_job_video_classification_sample_test.py | 2 +- ...iction_job_video_object_tracking_sample.py | 0 ...n_job_video_object_tracking_sample_test.py | 2 +- .../create_custom_job_sample.py | 0 .../create_custom_job_sample_test.py | 2 +- ...ata_labeling_job_active_learning_sample.py | 0 ...abeling_job_active_learning_sample_test.py | 2 +- ..._labeling_job_image_segmentation_sample.py | 0 ...ling_job_image_segmentation_sample_test.py | 2 +- .../create_data_labeling_job_images_sample.py | 0 ...te_data_labeling_job_images_sample_test.py | 2 +- .../create_data_labeling_job_sample.py | 0 .../create_data_labeling_job_sample_test.py | 2 +- ...ata_labeling_job_specialist_pool_sample.py | 0 ...abeling_job_specialist_pool_sample_test.py | 2 +- .../create_data_labeling_job_video_sample.py | 0 ...ate_data_labeling_job_video_sample_test.py | 2 +- ...ameter_tuning_job_python_package_sample.py | 0 ...r_tuning_job_python_package_sample_test.py | 2 +- ...create_hyperparameter_tuning_job_sample.py | 0 ...e_hyperparameter_tuning_job_sample_test.py | 2 +- .../delete_batch_prediction_job_sample.py | 0 .../delete_custom_job_sample.py | 0 .../delete_data_labeling_job_sample.py | 0 ...delete_hyperparameter_tuning_job_sample.py | 0 .../get_batch_prediction_job_sample.py | 0 .../get_custom_job_sample.py | 0 .../get_custom_job_sample_test.py | 0 .../get_hyperparameter_tuning_job_sample.py | 0 ...t_hyperparameter_tuning_job_sample_test.py | 1 - .../delete_model_sample.py | 0 .../export_model_sample.py | 0 .../export_model_sample_test.py | 2 +- ...ort_model_tabular_classification_sample.py | 0 ...odel_tabular_classification_sample_test.py | 2 +- ...t_model_video_action_recognition_sample.py | 0 ...el_video_action_recognition_sample_test.py | 2 +- ..._evaluation_image_classification_sample.py | 0 ...valuation_image_object_detection_sample.py | 0 .../get_model_evaluation_sample.py | 0 .../get_model_evaluation_sample_test.py | 0 .../get_model_evaluation_slice_sample.py | 0 .../get_model_evaluation_slice_sample_test.py | 0 ...valuation_tabular_classification_sample.py | 0 ...tion_tabular_classification_sample_test.py | 0 ...el_evaluation_tabular_regression_sample.py | 0 ...aluation_tabular_regression_sample_test.py | 0 ...l_evaluation_text_classification_sample.py | 0 ...valuation_text_entity_extraction_sample.py | 0 ...aluation_text_sentiment_analysis_sample.py | 0 ...luation_video_action_recognition_sample.py | 0 ...on_video_action_recognition_sample_test.py | 0 ..._evaluation_video_classification_sample.py | 0 ...uation_video_classification_sample_test.py | 0 ...evaluation_video_object_tracking_sample.py | 0 ...ation_video_object_tracking_sample_test.py | 0 .../{ => model_service}/get_model_sample.py | 0 .../get_model_sample_test.py | 0 .../list_model_evaluation_slices_sample.py | 0 ...ist_model_evaluation_slices_sample_test.py | 0 ..._explain_image_managed_container_sample.py | 0 ...ain_image_managed_container_sample_test.py | 2 +- ...xplain_tabular_managed_container_sample.py | 0 ...n_tabular_managed_container_sample_test.py | 2 +- .../upload_model_sample.py | 0 .../upload_model_sample_test.py | 4 +++- .../cancel_training_pipeline_sample.py | 0 .../cancel_training_pipeline_sample_test.py | 2 +- ...ate_training_pipeline_custom_job_sample.py | 0 ...raining_pipeline_custom_job_sample_test.py | 2 +- ..._custom_training_managed_dataset_sample.py | 0 ...om_training_managed_dataset_sample_test.py | 2 +- ...ng_pipeline_image_classification_sample.py | 0 ...peline_image_classification_sample_test.py | 2 +- ..._pipeline_image_object_detection_sample.py | 0 ...line_image_object_detection_sample_test.py | 2 +- .../create_training_pipeline_sample.py | 0 .../create_training_pipeline_sample_test.py | 2 +- ..._pipeline_tabular_classification_sample.py | 0 ...line_tabular_classification_sample_test.py | 2 +- ...ing_pipeline_tabular_forecasting_sample.py | 0 ...ipeline_tabular_forecasting_sample_test.py | 6 +++--- ...ning_pipeline_tabular_regression_sample.py | 0 ...pipeline_tabular_regression_sample_test.py | 2 +- ...ing_pipeline_text_classification_sample.py | 0 ..._pipeline_text_entity_extraction_sample.py | 0 ...line_text_entity_extraction_sample_test.py | 2 +- ...pipeline_text_sentiment_analysis_sample.py | 0 ...ine_text_sentiment_analysis_sample_test.py | 2 +- ...ipeline_video_action_recognition_sample.py | 0 ...ne_video_action_recognition_sample_test.py | 2 +- ...ng_pipeline_video_classification_sample.py | 0 ...peline_video_classification_sample_test.py | 2 +- ...g_pipeline_video_object_tracking_sample.py | 0 ...eline_video_object_tracking_sample_test.py | 2 +- .../delete_training_pipeline_sample.py | 0 .../get_training_pipeline_sample.py | 0 .../get_training_pipeline_sample_test.py | 0 .../explain_tabular_sample.py | 0 .../explain_tabular_sample_test.py | 0 .../predict_custom_trained_model_sample.py | 0 ...redict_custom_trained_model_sample_test.py | 0 .../predict_image_classification_sample.py | 0 ...redict_image_classification_sample_test.py | 0 .../predict_image_object_detection_sample.py | 0 ...dict_image_object_detection_sample_test.py | 0 .../predict_sample.py | 0 .../predict_tabular_classification_sample.py | 0 ...dict_tabular_classification_sample_test.py | 0 .../predict_tabular_regression_sample.py | 0 .../predict_tabular_regression_sample_test.py | 0 ...text_classification_single_label_sample.py | 0 ...classification_single_label_sample_test.py | 0 .../predict_text_entity_extraction_sample.py | 0 ...dict_text_entity_extraction_sample_test.py | 0 .../predict_text_sentiment_analysis_sample.py | 0 ...ict_text_sentiment_analysis_sample_test.py | 0 .../resources/caprese_salad.jpg | Bin .../resources/daisy.jpg | Bin 171 files changed, 85 insertions(+), 61 deletions(-) rename samples/snippets/{ => dataset_service}/create_dataset_image_sample.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_image_sample_test.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_sample.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_sample_test.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_tabular_bigquery_sample.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_tabular_bigquery_sample_test.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_tabular_gcs_sample.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_tabular_gcs_sample_test.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_text_sample.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_video_sample.py (100%) rename samples/snippets/{ => dataset_service}/create_dataset_video_sample_test.py (100%) rename samples/snippets/{ => dataset_service}/delete_dataset_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_image_classification_single_label_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_image_object_detection_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_text_classification_single_label_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_text_entity_extraction_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_text_entity_extraction_sample_test.py (65%) rename samples/snippets/{ => dataset_service}/import_data_text_sentiment_analysis_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_text_sentiment_analysis_sample_test.py (63%) rename samples/snippets/{ => dataset_service}/import_data_video_action_recognition_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_video_action_recognition_sample_test.py (99%) rename samples/snippets/{ => dataset_service}/import_data_video_classification_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_video_classification_sample_test.py (99%) rename samples/snippets/{ => dataset_service}/import_data_video_object_tracking_sample.py (100%) rename samples/snippets/{ => dataset_service}/import_data_video_object_tracking_sample_test.py (99%) rename samples/snippets/{ => endpoint_service}/create_endpoint_sample.py (100%) rename samples/snippets/{ => endpoint_service}/create_endpoint_sample_test.py (100%) rename samples/snippets/{ => endpoint_service}/delete_endpoint_sample.py (100%) rename samples/snippets/{ => endpoint_service}/deploy_model_custom_trained_model_sample.py (100%) rename samples/snippets/{ => endpoint_service}/deploy_model_custom_trained_model_sample_test.py (100%) rename samples/snippets/{ => endpoint_service}/deploy_model_sample.py (100%) rename samples/snippets/{ => endpoint_service}/deploy_model_sample_test.py (100%) rename samples/snippets/{ => job_service}/cancel_batch_prediction_job_sample.py (100%) rename samples/snippets/{ => job_service}/cancel_custom_job_sample.py (100%) rename samples/snippets/{ => job_service}/cancel_data_labeling_job_sample.py (100%) rename samples/snippets/{ => job_service}/cancel_hyperparameter_tuning_job_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_bigquery_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_bigquery_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_tabular_forecasting_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_tabular_forecasting_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_text_classification_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_text_classification_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_text_entity_extraction_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_text_entity_extraction_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_text_sentiment_analysis_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_text_sentiment_analysis_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_video_action_recognition_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_video_action_recognition_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_video_classification_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_video_classification_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_video_object_tracking_sample.py (100%) rename samples/snippets/{ => job_service}/create_batch_prediction_job_video_object_tracking_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_custom_job_sample.py (100%) rename samples/snippets/{ => job_service}/create_custom_job_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_active_learning_sample.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_active_learning_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_image_segmentation_sample.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_image_segmentation_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_images_sample.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_images_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_sample.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_specialist_pool_sample.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_specialist_pool_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_video_sample.py (100%) rename samples/snippets/{ => job_service}/create_data_labeling_job_video_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_hyperparameter_tuning_job_python_package_sample.py (100%) rename samples/snippets/{ => job_service}/create_hyperparameter_tuning_job_python_package_sample_test.py (100%) rename samples/snippets/{ => job_service}/create_hyperparameter_tuning_job_sample.py (100%) rename samples/snippets/{ => job_service}/create_hyperparameter_tuning_job_sample_test.py (100%) rename samples/snippets/{ => job_service}/delete_batch_prediction_job_sample.py (100%) rename samples/snippets/{ => job_service}/delete_custom_job_sample.py (100%) rename samples/snippets/{ => job_service}/delete_data_labeling_job_sample.py (100%) rename samples/snippets/{ => job_service}/delete_hyperparameter_tuning_job_sample.py (100%) rename samples/snippets/{ => job_service}/get_batch_prediction_job_sample.py (100%) rename samples/snippets/{ => job_service}/get_custom_job_sample.py (100%) rename samples/snippets/{ => job_service}/get_custom_job_sample_test.py (100%) rename samples/snippets/{ => job_service}/get_hyperparameter_tuning_job_sample.py (100%) rename samples/snippets/{ => job_service}/get_hyperparameter_tuning_job_sample_test.py (99%) rename samples/snippets/{ => model_service}/delete_model_sample.py (100%) rename samples/snippets/{ => model_service}/export_model_sample.py (100%) rename samples/snippets/{ => model_service}/export_model_sample_test.py (100%) rename samples/snippets/{ => model_service}/export_model_tabular_classification_sample.py (100%) rename samples/snippets/{ => model_service}/export_model_tabular_classification_sample_test.py (100%) rename samples/snippets/{ => model_service}/export_model_video_action_recognition_sample.py (100%) rename samples/snippets/{ => model_service}/export_model_video_action_recognition_sample_test.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_image_classification_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_image_object_detection_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_sample_test.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_slice_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_slice_sample_test.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_tabular_classification_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_tabular_classification_sample_test.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_tabular_regression_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_tabular_regression_sample_test.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_text_classification_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_text_entity_extraction_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_text_sentiment_analysis_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_video_action_recognition_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_video_action_recognition_sample_test.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_video_classification_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_video_classification_sample_test.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_video_object_tracking_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_evaluation_video_object_tracking_sample_test.py (100%) rename samples/snippets/{ => model_service}/get_model_sample.py (100%) rename samples/snippets/{ => model_service}/get_model_sample_test.py (100%) rename samples/snippets/{ => model_service}/list_model_evaluation_slices_sample.py (100%) rename samples/snippets/{ => model_service}/list_model_evaluation_slices_sample_test.py (100%) rename samples/snippets/{ => model_service}/upload_model_explain_image_managed_container_sample.py (100%) rename samples/snippets/{ => model_service}/upload_model_explain_image_managed_container_sample_test.py (100%) rename samples/snippets/{ => model_service}/upload_model_explain_tabular_managed_container_sample.py (100%) rename samples/snippets/{ => model_service}/upload_model_explain_tabular_managed_container_sample_test.py (100%) rename samples/snippets/{ => model_service}/upload_model_sample.py (100%) rename samples/snippets/{ => model_service}/upload_model_sample_test.py (99%) rename samples/snippets/{ => pipeline_service}/cancel_training_pipeline_sample.py (100%) rename samples/snippets/{ => pipeline_service}/cancel_training_pipeline_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_custom_job_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_custom_job_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_custom_training_managed_dataset_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_custom_training_managed_dataset_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_image_classification_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_image_classification_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_image_object_detection_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_image_object_detection_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_tabular_classification_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_tabular_classification_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_tabular_forecasting_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_tabular_forecasting_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_tabular_regression_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_tabular_regression_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_text_classification_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_text_entity_extraction_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_text_entity_extraction_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_text_sentiment_analysis_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_text_sentiment_analysis_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_video_action_recognition_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_video_action_recognition_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_video_classification_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_video_classification_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_video_object_tracking_sample.py (100%) rename samples/snippets/{ => pipeline_service}/create_training_pipeline_video_object_tracking_sample_test.py (100%) rename samples/snippets/{ => pipeline_service}/delete_training_pipeline_sample.py (100%) rename samples/snippets/{ => pipeline_service}/get_training_pipeline_sample.py (100%) rename samples/snippets/{ => pipeline_service}/get_training_pipeline_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/explain_tabular_sample.py (100%) rename samples/snippets/{ => prediction_service}/explain_tabular_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/predict_custom_trained_model_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_custom_trained_model_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/predict_image_classification_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_image_classification_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/predict_image_object_detection_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_image_object_detection_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/predict_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_tabular_classification_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_tabular_classification_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/predict_tabular_regression_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_tabular_regression_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/predict_text_classification_single_label_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_text_classification_single_label_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/predict_text_entity_extraction_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_text_entity_extraction_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/predict_text_sentiment_analysis_sample.py (100%) rename samples/snippets/{ => prediction_service}/predict_text_sentiment_analysis_sample_test.py (100%) rename samples/snippets/{ => prediction_service}/resources/caprese_salad.jpg (100%) rename samples/snippets/{ => prediction_service}/resources/daisy.jpg (100%) diff --git a/samples/snippets/create_dataset_image_sample.py b/samples/snippets/dataset_service/create_dataset_image_sample.py similarity index 100% rename from samples/snippets/create_dataset_image_sample.py rename to samples/snippets/dataset_service/create_dataset_image_sample.py diff --git a/samples/snippets/create_dataset_image_sample_test.py b/samples/snippets/dataset_service/create_dataset_image_sample_test.py similarity index 100% rename from samples/snippets/create_dataset_image_sample_test.py rename to samples/snippets/dataset_service/create_dataset_image_sample_test.py index c2221d2c84..75305313a3 100644 --- a/samples/snippets/create_dataset_image_sample_test.py +++ b/samples/snippets/dataset_service/create_dataset_image_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_dataset_image_sample import pytest -import create_dataset_image_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_dataset_sample.py b/samples/snippets/dataset_service/create_dataset_sample.py similarity index 100% rename from samples/snippets/create_dataset_sample.py rename to samples/snippets/dataset_service/create_dataset_sample.py diff --git a/samples/snippets/create_dataset_sample_test.py b/samples/snippets/dataset_service/create_dataset_sample_test.py similarity index 100% rename from samples/snippets/create_dataset_sample_test.py rename to samples/snippets/dataset_service/create_dataset_sample_test.py index 5394395dec..c56a8e8a29 100644 --- a/samples/snippets/create_dataset_sample_test.py +++ b/samples/snippets/dataset_service/create_dataset_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_dataset_sample import pytest -import create_dataset_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_dataset_tabular_bigquery_sample.py b/samples/snippets/dataset_service/create_dataset_tabular_bigquery_sample.py similarity index 100% rename from samples/snippets/create_dataset_tabular_bigquery_sample.py rename to samples/snippets/dataset_service/create_dataset_tabular_bigquery_sample.py diff --git a/samples/snippets/create_dataset_tabular_bigquery_sample_test.py b/samples/snippets/dataset_service/create_dataset_tabular_bigquery_sample_test.py similarity index 100% rename from samples/snippets/create_dataset_tabular_bigquery_sample_test.py rename to samples/snippets/dataset_service/create_dataset_tabular_bigquery_sample_test.py index d8f8dc1993..0752db30e6 100644 --- a/samples/snippets/create_dataset_tabular_bigquery_sample_test.py +++ b/samples/snippets/dataset_service/create_dataset_tabular_bigquery_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_dataset_tabular_bigquery_sample import pytest -import create_dataset_tabular_bigquery_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_dataset_tabular_gcs_sample.py b/samples/snippets/dataset_service/create_dataset_tabular_gcs_sample.py similarity index 100% rename from samples/snippets/create_dataset_tabular_gcs_sample.py rename to samples/snippets/dataset_service/create_dataset_tabular_gcs_sample.py diff --git a/samples/snippets/create_dataset_tabular_gcs_sample_test.py b/samples/snippets/dataset_service/create_dataset_tabular_gcs_sample_test.py similarity index 100% rename from samples/snippets/create_dataset_tabular_gcs_sample_test.py rename to samples/snippets/dataset_service/create_dataset_tabular_gcs_sample_test.py index ff9da3ea74..03f07c46aa 100644 --- a/samples/snippets/create_dataset_tabular_gcs_sample_test.py +++ b/samples/snippets/dataset_service/create_dataset_tabular_gcs_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_dataset_tabular_gcs_sample import pytest -import create_dataset_tabular_gcs_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_dataset_text_sample.py b/samples/snippets/dataset_service/create_dataset_text_sample.py similarity index 100% rename from samples/snippets/create_dataset_text_sample.py rename to samples/snippets/dataset_service/create_dataset_text_sample.py diff --git a/samples/snippets/create_dataset_video_sample.py b/samples/snippets/dataset_service/create_dataset_video_sample.py similarity index 100% rename from samples/snippets/create_dataset_video_sample.py rename to samples/snippets/dataset_service/create_dataset_video_sample.py diff --git a/samples/snippets/create_dataset_video_sample_test.py b/samples/snippets/dataset_service/create_dataset_video_sample_test.py similarity index 100% rename from samples/snippets/create_dataset_video_sample_test.py rename to samples/snippets/dataset_service/create_dataset_video_sample_test.py index 35cf1d08d2..902676b8bb 100644 --- a/samples/snippets/create_dataset_video_sample_test.py +++ b/samples/snippets/dataset_service/create_dataset_video_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_dataset_video_sample import pytest -import create_dataset_video_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/delete_dataset_sample.py b/samples/snippets/dataset_service/delete_dataset_sample.py similarity index 100% rename from samples/snippets/delete_dataset_sample.py rename to samples/snippets/dataset_service/delete_dataset_sample.py diff --git a/samples/snippets/import_data_image_classification_single_label_sample.py b/samples/snippets/dataset_service/import_data_image_classification_single_label_sample.py similarity index 100% rename from samples/snippets/import_data_image_classification_single_label_sample.py rename to samples/snippets/dataset_service/import_data_image_classification_single_label_sample.py diff --git a/samples/snippets/import_data_image_object_detection_sample.py b/samples/snippets/dataset_service/import_data_image_object_detection_sample.py similarity index 100% rename from samples/snippets/import_data_image_object_detection_sample.py rename to samples/snippets/dataset_service/import_data_image_object_detection_sample.py diff --git a/samples/snippets/import_data_sample.py b/samples/snippets/dataset_service/import_data_sample.py similarity index 100% rename from samples/snippets/import_data_sample.py rename to samples/snippets/dataset_service/import_data_sample.py diff --git a/samples/snippets/import_data_text_classification_single_label_sample.py b/samples/snippets/dataset_service/import_data_text_classification_single_label_sample.py similarity index 100% rename from samples/snippets/import_data_text_classification_single_label_sample.py rename to samples/snippets/dataset_service/import_data_text_classification_single_label_sample.py diff --git a/samples/snippets/import_data_text_entity_extraction_sample.py b/samples/snippets/dataset_service/import_data_text_entity_extraction_sample.py similarity index 100% rename from samples/snippets/import_data_text_entity_extraction_sample.py rename to samples/snippets/dataset_service/import_data_text_entity_extraction_sample.py diff --git a/samples/snippets/import_data_text_entity_extraction_sample_test.py b/samples/snippets/dataset_service/import_data_text_entity_extraction_sample_test.py similarity index 65% rename from samples/snippets/import_data_text_entity_extraction_sample_test.py rename to samples/snippets/dataset_service/import_data_text_entity_extraction_sample_test.py index 7239407ec8..728fcd86fc 100644 --- a/samples/snippets/import_data_text_entity_extraction_sample_test.py +++ b/samples/snippets/dataset_service/import_data_text_entity_extraction_sample_test.py @@ -1,9 +1,22 @@ -import os +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -import pytest -import import_data_text_entity_extraction_sample +import os +import import_data_text_entity_extraction_sample +import pytest PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") LOCATION = "us-central1" diff --git a/samples/snippets/import_data_text_sentiment_analysis_sample.py b/samples/snippets/dataset_service/import_data_text_sentiment_analysis_sample.py similarity index 100% rename from samples/snippets/import_data_text_sentiment_analysis_sample.py rename to samples/snippets/dataset_service/import_data_text_sentiment_analysis_sample.py diff --git a/samples/snippets/import_data_text_sentiment_analysis_sample_test.py b/samples/snippets/dataset_service/import_data_text_sentiment_analysis_sample_test.py similarity index 63% rename from samples/snippets/import_data_text_sentiment_analysis_sample_test.py rename to samples/snippets/dataset_service/import_data_text_sentiment_analysis_sample_test.py index 2bca6f4779..48265e6093 100644 --- a/samples/snippets/import_data_text_sentiment_analysis_sample_test.py +++ b/samples/snippets/dataset_service/import_data_text_sentiment_analysis_sample_test.py @@ -1,9 +1,22 @@ -import os +# Copyright 2020 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -import pytest -import import_data_text_sentiment_analysis_sample +import os +import import_data_text_sentiment_analysis_sample +import pytest PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") LOCATION = "us-central1" diff --git a/samples/snippets/import_data_video_action_recognition_sample.py b/samples/snippets/dataset_service/import_data_video_action_recognition_sample.py similarity index 100% rename from samples/snippets/import_data_video_action_recognition_sample.py rename to samples/snippets/dataset_service/import_data_video_action_recognition_sample.py diff --git a/samples/snippets/import_data_video_action_recognition_sample_test.py b/samples/snippets/dataset_service/import_data_video_action_recognition_sample_test.py similarity index 99% rename from samples/snippets/import_data_video_action_recognition_sample_test.py rename to samples/snippets/dataset_service/import_data_video_action_recognition_sample_test.py index fb72b90833..7680e4d004 100644 --- a/samples/snippets/import_data_video_action_recognition_sample_test.py +++ b/samples/snippets/dataset_service/import_data_video_action_recognition_sample_test.py @@ -15,9 +15,8 @@ import os -import pytest - import import_data_video_action_recognition_sample +import pytest PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") LOCATION = "us-central1" diff --git a/samples/snippets/import_data_video_classification_sample.py b/samples/snippets/dataset_service/import_data_video_classification_sample.py similarity index 100% rename from samples/snippets/import_data_video_classification_sample.py rename to samples/snippets/dataset_service/import_data_video_classification_sample.py diff --git a/samples/snippets/import_data_video_classification_sample_test.py b/samples/snippets/dataset_service/import_data_video_classification_sample_test.py similarity index 99% rename from samples/snippets/import_data_video_classification_sample_test.py rename to samples/snippets/dataset_service/import_data_video_classification_sample_test.py index f786670844..2b588e3f6b 100644 --- a/samples/snippets/import_data_video_classification_sample_test.py +++ b/samples/snippets/dataset_service/import_data_video_classification_sample_test.py @@ -15,9 +15,8 @@ import os -import pytest - import import_data_video_classification_sample +import pytest PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") LOCATION = "us-central1" diff --git a/samples/snippets/import_data_video_object_tracking_sample.py b/samples/snippets/dataset_service/import_data_video_object_tracking_sample.py similarity index 100% rename from samples/snippets/import_data_video_object_tracking_sample.py rename to samples/snippets/dataset_service/import_data_video_object_tracking_sample.py diff --git a/samples/snippets/import_data_video_object_tracking_sample_test.py b/samples/snippets/dataset_service/import_data_video_object_tracking_sample_test.py similarity index 99% rename from samples/snippets/import_data_video_object_tracking_sample_test.py rename to samples/snippets/dataset_service/import_data_video_object_tracking_sample_test.py index 6ba16a9c43..9813b2753c 100644 --- a/samples/snippets/import_data_video_object_tracking_sample_test.py +++ b/samples/snippets/dataset_service/import_data_video_object_tracking_sample_test.py @@ -15,9 +15,8 @@ import os -import pytest - import import_data_video_object_tracking_sample +import pytest PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") LOCATION = "us-central1" diff --git a/samples/snippets/create_endpoint_sample.py b/samples/snippets/endpoint_service/create_endpoint_sample.py similarity index 100% rename from samples/snippets/create_endpoint_sample.py rename to samples/snippets/endpoint_service/create_endpoint_sample.py diff --git a/samples/snippets/create_endpoint_sample_test.py b/samples/snippets/endpoint_service/create_endpoint_sample_test.py similarity index 100% rename from samples/snippets/create_endpoint_sample_test.py rename to samples/snippets/endpoint_service/create_endpoint_sample_test.py index 5d6c66e510..fa2b3f7053 100644 --- a/samples/snippets/create_endpoint_sample_test.py +++ b/samples/snippets/endpoint_service/create_endpoint_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_endpoint_sample import pytest -import create_endpoint_sample import helpers DISPLAY_NAME = f"temp_create_endpoint_test_{uuid4()}" diff --git a/samples/snippets/delete_endpoint_sample.py b/samples/snippets/endpoint_service/delete_endpoint_sample.py similarity index 100% rename from samples/snippets/delete_endpoint_sample.py rename to samples/snippets/endpoint_service/delete_endpoint_sample.py diff --git a/samples/snippets/deploy_model_custom_trained_model_sample.py b/samples/snippets/endpoint_service/deploy_model_custom_trained_model_sample.py similarity index 100% rename from samples/snippets/deploy_model_custom_trained_model_sample.py rename to samples/snippets/endpoint_service/deploy_model_custom_trained_model_sample.py diff --git a/samples/snippets/deploy_model_custom_trained_model_sample_test.py b/samples/snippets/endpoint_service/deploy_model_custom_trained_model_sample_test.py similarity index 100% rename from samples/snippets/deploy_model_custom_trained_model_sample_test.py rename to samples/snippets/endpoint_service/deploy_model_custom_trained_model_sample_test.py index 23399a7a3b..6151889ede 100644 --- a/samples/snippets/deploy_model_custom_trained_model_sample_test.py +++ b/samples/snippets/endpoint_service/deploy_model_custom_trained_model_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import deploy_model_custom_trained_model_sample import pytest -import deploy_model_custom_trained_model_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/deploy_model_sample.py b/samples/snippets/endpoint_service/deploy_model_sample.py similarity index 100% rename from samples/snippets/deploy_model_sample.py rename to samples/snippets/endpoint_service/deploy_model_sample.py diff --git a/samples/snippets/deploy_model_sample_test.py b/samples/snippets/endpoint_service/deploy_model_sample_test.py similarity index 100% rename from samples/snippets/deploy_model_sample_test.py rename to samples/snippets/endpoint_service/deploy_model_sample_test.py index e739d75f2f..b12b234b8b 100644 --- a/samples/snippets/deploy_model_sample_test.py +++ b/samples/snippets/endpoint_service/deploy_model_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import deploy_model_sample import pytest -import deploy_model_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/cancel_batch_prediction_job_sample.py b/samples/snippets/job_service/cancel_batch_prediction_job_sample.py similarity index 100% rename from samples/snippets/cancel_batch_prediction_job_sample.py rename to samples/snippets/job_service/cancel_batch_prediction_job_sample.py diff --git a/samples/snippets/cancel_custom_job_sample.py b/samples/snippets/job_service/cancel_custom_job_sample.py similarity index 100% rename from samples/snippets/cancel_custom_job_sample.py rename to samples/snippets/job_service/cancel_custom_job_sample.py diff --git a/samples/snippets/cancel_data_labeling_job_sample.py b/samples/snippets/job_service/cancel_data_labeling_job_sample.py similarity index 100% rename from samples/snippets/cancel_data_labeling_job_sample.py rename to samples/snippets/job_service/cancel_data_labeling_job_sample.py diff --git a/samples/snippets/cancel_hyperparameter_tuning_job_sample.py b/samples/snippets/job_service/cancel_hyperparameter_tuning_job_sample.py similarity index 100% rename from samples/snippets/cancel_hyperparameter_tuning_job_sample.py rename to samples/snippets/job_service/cancel_hyperparameter_tuning_job_sample.py diff --git a/samples/snippets/create_batch_prediction_job_bigquery_sample.py b/samples/snippets/job_service/create_batch_prediction_job_bigquery_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_bigquery_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_bigquery_sample.py diff --git a/samples/snippets/create_batch_prediction_job_bigquery_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_bigquery_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_bigquery_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_bigquery_sample_test.py index d82217aea7..66ab41baef 100644 --- a/samples/snippets/create_batch_prediction_job_bigquery_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_bigquery_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_batch_prediction_job_bigquery_sample import pytest -import create_batch_prediction_job_bigquery_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_batch_prediction_job_sample.py b/samples/snippets/job_service/create_batch_prediction_job_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_sample.py diff --git a/samples/snippets/create_batch_prediction_job_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_sample_test.py index 6690a11d0b..32148ae691 100644 --- a/samples/snippets/create_batch_prediction_job_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_batch_prediction_job_sample import pytest -import create_batch_prediction_job_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_batch_prediction_job_tabular_forecasting_sample.py b/samples/snippets/job_service/create_batch_prediction_job_tabular_forecasting_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_tabular_forecasting_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_tabular_forecasting_sample.py diff --git a/samples/snippets/create_batch_prediction_job_tabular_forecasting_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_tabular_forecasting_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_tabular_forecasting_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_tabular_forecasting_sample_test.py index 2ec78e5c93..04eb3ede36 100644 --- a/samples/snippets/create_batch_prediction_job_tabular_forecasting_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_tabular_forecasting_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_batch_prediction_job_tabular_forecasting_sample import pytest -import create_batch_prediction_job_tabular_forecasting_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_batch_prediction_job_text_classification_sample.py b/samples/snippets/job_service/create_batch_prediction_job_text_classification_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_text_classification_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_text_classification_sample.py diff --git a/samples/snippets/create_batch_prediction_job_text_classification_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_text_classification_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_text_classification_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_text_classification_sample_test.py index e7a1fdd463..c35f8b572c 100644 --- a/samples/snippets/create_batch_prediction_job_text_classification_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_text_classification_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_batch_prediction_job_text_classification_sample import pytest -import create_batch_prediction_job_text_classification_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_batch_prediction_job_text_entity_extraction_sample.py b/samples/snippets/job_service/create_batch_prediction_job_text_entity_extraction_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_text_entity_extraction_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_text_entity_extraction_sample.py diff --git a/samples/snippets/create_batch_prediction_job_text_entity_extraction_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_text_entity_extraction_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_text_entity_extraction_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_text_entity_extraction_sample_test.py index 7c65f1e604..643f16ec90 100644 --- a/samples/snippets/create_batch_prediction_job_text_entity_extraction_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_text_entity_extraction_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_batch_prediction_job_text_entity_extraction_sample import pytest -import create_batch_prediction_job_text_entity_extraction_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_batch_prediction_job_text_sentiment_analysis_sample.py b/samples/snippets/job_service/create_batch_prediction_job_text_sentiment_analysis_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_text_sentiment_analysis_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_text_sentiment_analysis_sample.py diff --git a/samples/snippets/create_batch_prediction_job_text_sentiment_analysis_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_text_sentiment_analysis_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_text_sentiment_analysis_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_text_sentiment_analysis_sample_test.py index c8eca77a90..d56a58070f 100644 --- a/samples/snippets/create_batch_prediction_job_text_sentiment_analysis_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_text_sentiment_analysis_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_batch_prediction_job_text_sentiment_analysis_sample import pytest -import create_batch_prediction_job_text_sentiment_analysis_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_batch_prediction_job_video_action_recognition_sample.py b/samples/snippets/job_service/create_batch_prediction_job_video_action_recognition_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_video_action_recognition_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_video_action_recognition_sample.py diff --git a/samples/snippets/create_batch_prediction_job_video_action_recognition_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_video_action_recognition_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_video_action_recognition_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_video_action_recognition_sample_test.py index 339e565fad..94a75c2906 100644 --- a/samples/snippets/create_batch_prediction_job_video_action_recognition_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_video_action_recognition_sample_test.py @@ -15,9 +15,9 @@ import os import uuid +import create_batch_prediction_job_video_action_recognition_sample import pytest -import create_batch_prediction_job_video_action_recognition_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_batch_prediction_job_video_classification_sample.py b/samples/snippets/job_service/create_batch_prediction_job_video_classification_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_video_classification_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_video_classification_sample.py diff --git a/samples/snippets/create_batch_prediction_job_video_classification_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_video_classification_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_video_classification_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_video_classification_sample_test.py index 732f102a6d..a47af0da52 100644 --- a/samples/snippets/create_batch_prediction_job_video_classification_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_video_classification_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_batch_prediction_job_video_classification_sample import pytest -import create_batch_prediction_job_video_classification_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_batch_prediction_job_video_object_tracking_sample.py b/samples/snippets/job_service/create_batch_prediction_job_video_object_tracking_sample.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_video_object_tracking_sample.py rename to samples/snippets/job_service/create_batch_prediction_job_video_object_tracking_sample.py diff --git a/samples/snippets/create_batch_prediction_job_video_object_tracking_sample_test.py b/samples/snippets/job_service/create_batch_prediction_job_video_object_tracking_sample_test.py similarity index 100% rename from samples/snippets/create_batch_prediction_job_video_object_tracking_sample_test.py rename to samples/snippets/job_service/create_batch_prediction_job_video_object_tracking_sample_test.py index c4cbbd597f..42cdb31023 100644 --- a/samples/snippets/create_batch_prediction_job_video_object_tracking_sample_test.py +++ b/samples/snippets/job_service/create_batch_prediction_job_video_object_tracking_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_batch_prediction_job_video_object_tracking_sample import pytest -import create_batch_prediction_job_video_object_tracking_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_custom_job_sample.py b/samples/snippets/job_service/create_custom_job_sample.py similarity index 100% rename from samples/snippets/create_custom_job_sample.py rename to samples/snippets/job_service/create_custom_job_sample.py diff --git a/samples/snippets/create_custom_job_sample_test.py b/samples/snippets/job_service/create_custom_job_sample_test.py similarity index 100% rename from samples/snippets/create_custom_job_sample_test.py rename to samples/snippets/job_service/create_custom_job_sample_test.py index 0a29132cdc..c067983913 100644 --- a/samples/snippets/create_custom_job_sample_test.py +++ b/samples/snippets/job_service/create_custom_job_sample_test.py @@ -16,9 +16,9 @@ import os import uuid +import create_custom_job_sample import pytest -import create_custom_job_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_data_labeling_job_active_learning_sample.py b/samples/snippets/job_service/create_data_labeling_job_active_learning_sample.py similarity index 100% rename from samples/snippets/create_data_labeling_job_active_learning_sample.py rename to samples/snippets/job_service/create_data_labeling_job_active_learning_sample.py diff --git a/samples/snippets/create_data_labeling_job_active_learning_sample_test.py b/samples/snippets/job_service/create_data_labeling_job_active_learning_sample_test.py similarity index 100% rename from samples/snippets/create_data_labeling_job_active_learning_sample_test.py rename to samples/snippets/job_service/create_data_labeling_job_active_learning_sample_test.py index 4ec5394535..ee264dbca2 100644 --- a/samples/snippets/create_data_labeling_job_active_learning_sample_test.py +++ b/samples/snippets/job_service/create_data_labeling_job_active_learning_sample_test.py @@ -15,9 +15,9 @@ import os import uuid +import create_data_labeling_job_active_learning_sample import pytest -import create_data_labeling_job_active_learning_sample import helpers API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT") diff --git a/samples/snippets/create_data_labeling_job_image_segmentation_sample.py b/samples/snippets/job_service/create_data_labeling_job_image_segmentation_sample.py similarity index 100% rename from samples/snippets/create_data_labeling_job_image_segmentation_sample.py rename to samples/snippets/job_service/create_data_labeling_job_image_segmentation_sample.py diff --git a/samples/snippets/create_data_labeling_job_image_segmentation_sample_test.py b/samples/snippets/job_service/create_data_labeling_job_image_segmentation_sample_test.py similarity index 100% rename from samples/snippets/create_data_labeling_job_image_segmentation_sample_test.py rename to samples/snippets/job_service/create_data_labeling_job_image_segmentation_sample_test.py index e5f365d234..f0500d0bc9 100644 --- a/samples/snippets/create_data_labeling_job_image_segmentation_sample_test.py +++ b/samples/snippets/job_service/create_data_labeling_job_image_segmentation_sample_test.py @@ -15,9 +15,9 @@ import os import uuid +import create_data_labeling_job_image_segmentation_sample import pytest -import create_data_labeling_job_image_segmentation_sample import helpers API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT") diff --git a/samples/snippets/create_data_labeling_job_images_sample.py b/samples/snippets/job_service/create_data_labeling_job_images_sample.py similarity index 100% rename from samples/snippets/create_data_labeling_job_images_sample.py rename to samples/snippets/job_service/create_data_labeling_job_images_sample.py diff --git a/samples/snippets/create_data_labeling_job_images_sample_test.py b/samples/snippets/job_service/create_data_labeling_job_images_sample_test.py similarity index 100% rename from samples/snippets/create_data_labeling_job_images_sample_test.py rename to samples/snippets/job_service/create_data_labeling_job_images_sample_test.py index 026a0fbd58..b56a377ea7 100644 --- a/samples/snippets/create_data_labeling_job_images_sample_test.py +++ b/samples/snippets/job_service/create_data_labeling_job_images_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_data_labeling_job_images_sample import pytest -import create_data_labeling_job_images_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_data_labeling_job_sample.py b/samples/snippets/job_service/create_data_labeling_job_sample.py similarity index 100% rename from samples/snippets/create_data_labeling_job_sample.py rename to samples/snippets/job_service/create_data_labeling_job_sample.py diff --git a/samples/snippets/create_data_labeling_job_sample_test.py b/samples/snippets/job_service/create_data_labeling_job_sample_test.py similarity index 100% rename from samples/snippets/create_data_labeling_job_sample_test.py rename to samples/snippets/job_service/create_data_labeling_job_sample_test.py index 847e452c0a..8b77950b84 100644 --- a/samples/snippets/create_data_labeling_job_sample_test.py +++ b/samples/snippets/job_service/create_data_labeling_job_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_data_labeling_job_sample import pytest -import create_data_labeling_job_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_data_labeling_job_specialist_pool_sample.py b/samples/snippets/job_service/create_data_labeling_job_specialist_pool_sample.py similarity index 100% rename from samples/snippets/create_data_labeling_job_specialist_pool_sample.py rename to samples/snippets/job_service/create_data_labeling_job_specialist_pool_sample.py diff --git a/samples/snippets/create_data_labeling_job_specialist_pool_sample_test.py b/samples/snippets/job_service/create_data_labeling_job_specialist_pool_sample_test.py similarity index 100% rename from samples/snippets/create_data_labeling_job_specialist_pool_sample_test.py rename to samples/snippets/job_service/create_data_labeling_job_specialist_pool_sample_test.py index 0f0f882c8c..7ae76a0550 100644 --- a/samples/snippets/create_data_labeling_job_specialist_pool_sample_test.py +++ b/samples/snippets/job_service/create_data_labeling_job_specialist_pool_sample_test.py @@ -15,9 +15,9 @@ import os import uuid +import create_data_labeling_job_specialist_pool_sample import pytest -import create_data_labeling_job_specialist_pool_sample import helpers API_ENDPOINT = os.getenv("DATA_LABELING_API_ENDPOINT") diff --git a/samples/snippets/create_data_labeling_job_video_sample.py b/samples/snippets/job_service/create_data_labeling_job_video_sample.py similarity index 100% rename from samples/snippets/create_data_labeling_job_video_sample.py rename to samples/snippets/job_service/create_data_labeling_job_video_sample.py diff --git a/samples/snippets/create_data_labeling_job_video_sample_test.py b/samples/snippets/job_service/create_data_labeling_job_video_sample_test.py similarity index 100% rename from samples/snippets/create_data_labeling_job_video_sample_test.py rename to samples/snippets/job_service/create_data_labeling_job_video_sample_test.py index 5d952ec552..6b38d8d65c 100644 --- a/samples/snippets/create_data_labeling_job_video_sample_test.py +++ b/samples/snippets/job_service/create_data_labeling_job_video_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_data_labeling_job_video_sample import pytest -import create_data_labeling_job_video_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_hyperparameter_tuning_job_python_package_sample.py b/samples/snippets/job_service/create_hyperparameter_tuning_job_python_package_sample.py similarity index 100% rename from samples/snippets/create_hyperparameter_tuning_job_python_package_sample.py rename to samples/snippets/job_service/create_hyperparameter_tuning_job_python_package_sample.py diff --git a/samples/snippets/create_hyperparameter_tuning_job_python_package_sample_test.py b/samples/snippets/job_service/create_hyperparameter_tuning_job_python_package_sample_test.py similarity index 100% rename from samples/snippets/create_hyperparameter_tuning_job_python_package_sample_test.py rename to samples/snippets/job_service/create_hyperparameter_tuning_job_python_package_sample_test.py index d8a7ca6fef..9e8538810a 100644 --- a/samples/snippets/create_hyperparameter_tuning_job_python_package_sample_test.py +++ b/samples/snippets/job_service/create_hyperparameter_tuning_job_python_package_sample_test.py @@ -15,9 +15,9 @@ import os import uuid +import create_hyperparameter_tuning_job_python_package_sample import pytest -import create_hyperparameter_tuning_job_python_package_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_hyperparameter_tuning_job_sample.py b/samples/snippets/job_service/create_hyperparameter_tuning_job_sample.py similarity index 100% rename from samples/snippets/create_hyperparameter_tuning_job_sample.py rename to samples/snippets/job_service/create_hyperparameter_tuning_job_sample.py diff --git a/samples/snippets/create_hyperparameter_tuning_job_sample_test.py b/samples/snippets/job_service/create_hyperparameter_tuning_job_sample_test.py similarity index 100% rename from samples/snippets/create_hyperparameter_tuning_job_sample_test.py rename to samples/snippets/job_service/create_hyperparameter_tuning_job_sample_test.py index 9a16bdcb9c..63b1ac9be1 100644 --- a/samples/snippets/create_hyperparameter_tuning_job_sample_test.py +++ b/samples/snippets/job_service/create_hyperparameter_tuning_job_sample_test.py @@ -15,9 +15,9 @@ import os import uuid +import create_hyperparameter_tuning_job_sample import pytest -import create_hyperparameter_tuning_job_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/delete_batch_prediction_job_sample.py b/samples/snippets/job_service/delete_batch_prediction_job_sample.py similarity index 100% rename from samples/snippets/delete_batch_prediction_job_sample.py rename to samples/snippets/job_service/delete_batch_prediction_job_sample.py diff --git a/samples/snippets/delete_custom_job_sample.py b/samples/snippets/job_service/delete_custom_job_sample.py similarity index 100% rename from samples/snippets/delete_custom_job_sample.py rename to samples/snippets/job_service/delete_custom_job_sample.py diff --git a/samples/snippets/delete_data_labeling_job_sample.py b/samples/snippets/job_service/delete_data_labeling_job_sample.py similarity index 100% rename from samples/snippets/delete_data_labeling_job_sample.py rename to samples/snippets/job_service/delete_data_labeling_job_sample.py diff --git a/samples/snippets/delete_hyperparameter_tuning_job_sample.py b/samples/snippets/job_service/delete_hyperparameter_tuning_job_sample.py similarity index 100% rename from samples/snippets/delete_hyperparameter_tuning_job_sample.py rename to samples/snippets/job_service/delete_hyperparameter_tuning_job_sample.py diff --git a/samples/snippets/get_batch_prediction_job_sample.py b/samples/snippets/job_service/get_batch_prediction_job_sample.py similarity index 100% rename from samples/snippets/get_batch_prediction_job_sample.py rename to samples/snippets/job_service/get_batch_prediction_job_sample.py diff --git a/samples/snippets/get_custom_job_sample.py b/samples/snippets/job_service/get_custom_job_sample.py similarity index 100% rename from samples/snippets/get_custom_job_sample.py rename to samples/snippets/job_service/get_custom_job_sample.py diff --git a/samples/snippets/get_custom_job_sample_test.py b/samples/snippets/job_service/get_custom_job_sample_test.py similarity index 100% rename from samples/snippets/get_custom_job_sample_test.py rename to samples/snippets/job_service/get_custom_job_sample_test.py diff --git a/samples/snippets/get_hyperparameter_tuning_job_sample.py b/samples/snippets/job_service/get_hyperparameter_tuning_job_sample.py similarity index 100% rename from samples/snippets/get_hyperparameter_tuning_job_sample.py rename to samples/snippets/job_service/get_hyperparameter_tuning_job_sample.py diff --git a/samples/snippets/get_hyperparameter_tuning_job_sample_test.py b/samples/snippets/job_service/get_hyperparameter_tuning_job_sample_test.py similarity index 99% rename from samples/snippets/get_hyperparameter_tuning_job_sample_test.py rename to samples/snippets/job_service/get_hyperparameter_tuning_job_sample_test.py index 06ac1627e7..05845e0941 100644 --- a/samples/snippets/get_hyperparameter_tuning_job_sample_test.py +++ b/samples/snippets/job_service/get_hyperparameter_tuning_job_sample_test.py @@ -14,7 +14,6 @@ import os - import get_hyperparameter_tuning_job_sample PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/delete_model_sample.py b/samples/snippets/model_service/delete_model_sample.py similarity index 100% rename from samples/snippets/delete_model_sample.py rename to samples/snippets/model_service/delete_model_sample.py diff --git a/samples/snippets/export_model_sample.py b/samples/snippets/model_service/export_model_sample.py similarity index 100% rename from samples/snippets/export_model_sample.py rename to samples/snippets/model_service/export_model_sample.py diff --git a/samples/snippets/export_model_sample_test.py b/samples/snippets/model_service/export_model_sample_test.py similarity index 100% rename from samples/snippets/export_model_sample_test.py rename to samples/snippets/model_service/export_model_sample_test.py index 67a625b657..6a8c0708f4 100644 --- a/samples/snippets/export_model_sample_test.py +++ b/samples/snippets/model_service/export_model_sample_test.py @@ -14,9 +14,9 @@ import os +import export_model_sample import pytest -import export_model_sample PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") MODEL_ID = ( diff --git a/samples/snippets/export_model_tabular_classification_sample.py b/samples/snippets/model_service/export_model_tabular_classification_sample.py similarity index 100% rename from samples/snippets/export_model_tabular_classification_sample.py rename to samples/snippets/model_service/export_model_tabular_classification_sample.py diff --git a/samples/snippets/export_model_tabular_classification_sample_test.py b/samples/snippets/model_service/export_model_tabular_classification_sample_test.py similarity index 100% rename from samples/snippets/export_model_tabular_classification_sample_test.py rename to samples/snippets/model_service/export_model_tabular_classification_sample_test.py index 668ace3c25..52312c31eb 100644 --- a/samples/snippets/export_model_tabular_classification_sample_test.py +++ b/samples/snippets/model_service/export_model_tabular_classification_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import export_model_tabular_classification_sample import pytest -import export_model_tabular_classification_sample PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") MODEL_ID = "6036688272397172736" # iris 1000 diff --git a/samples/snippets/export_model_video_action_recognition_sample.py b/samples/snippets/model_service/export_model_video_action_recognition_sample.py similarity index 100% rename from samples/snippets/export_model_video_action_recognition_sample.py rename to samples/snippets/model_service/export_model_video_action_recognition_sample.py diff --git a/samples/snippets/export_model_video_action_recognition_sample_test.py b/samples/snippets/model_service/export_model_video_action_recognition_sample_test.py similarity index 100% rename from samples/snippets/export_model_video_action_recognition_sample_test.py rename to samples/snippets/model_service/export_model_video_action_recognition_sample_test.py index 2142608ce0..3efa50b363 100644 --- a/samples/snippets/export_model_video_action_recognition_sample_test.py +++ b/samples/snippets/model_service/export_model_video_action_recognition_sample_test.py @@ -14,9 +14,9 @@ import os +import export_model_video_action_recognition_sample import pytest -import export_model_video_action_recognition_sample PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") MODEL_ID = ( diff --git a/samples/snippets/get_model_evaluation_image_classification_sample.py b/samples/snippets/model_service/get_model_evaluation_image_classification_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_image_classification_sample.py rename to samples/snippets/model_service/get_model_evaluation_image_classification_sample.py diff --git a/samples/snippets/get_model_evaluation_image_object_detection_sample.py b/samples/snippets/model_service/get_model_evaluation_image_object_detection_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_image_object_detection_sample.py rename to samples/snippets/model_service/get_model_evaluation_image_object_detection_sample.py diff --git a/samples/snippets/get_model_evaluation_sample.py b/samples/snippets/model_service/get_model_evaluation_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_sample.py rename to samples/snippets/model_service/get_model_evaluation_sample.py diff --git a/samples/snippets/get_model_evaluation_sample_test.py b/samples/snippets/model_service/get_model_evaluation_sample_test.py similarity index 100% rename from samples/snippets/get_model_evaluation_sample_test.py rename to samples/snippets/model_service/get_model_evaluation_sample_test.py diff --git a/samples/snippets/get_model_evaluation_slice_sample.py b/samples/snippets/model_service/get_model_evaluation_slice_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_slice_sample.py rename to samples/snippets/model_service/get_model_evaluation_slice_sample.py diff --git a/samples/snippets/get_model_evaluation_slice_sample_test.py b/samples/snippets/model_service/get_model_evaluation_slice_sample_test.py similarity index 100% rename from samples/snippets/get_model_evaluation_slice_sample_test.py rename to samples/snippets/model_service/get_model_evaluation_slice_sample_test.py diff --git a/samples/snippets/get_model_evaluation_tabular_classification_sample.py b/samples/snippets/model_service/get_model_evaluation_tabular_classification_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_tabular_classification_sample.py rename to samples/snippets/model_service/get_model_evaluation_tabular_classification_sample.py diff --git a/samples/snippets/get_model_evaluation_tabular_classification_sample_test.py b/samples/snippets/model_service/get_model_evaluation_tabular_classification_sample_test.py similarity index 100% rename from samples/snippets/get_model_evaluation_tabular_classification_sample_test.py rename to samples/snippets/model_service/get_model_evaluation_tabular_classification_sample_test.py diff --git a/samples/snippets/get_model_evaluation_tabular_regression_sample.py b/samples/snippets/model_service/get_model_evaluation_tabular_regression_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_tabular_regression_sample.py rename to samples/snippets/model_service/get_model_evaluation_tabular_regression_sample.py diff --git a/samples/snippets/get_model_evaluation_tabular_regression_sample_test.py b/samples/snippets/model_service/get_model_evaluation_tabular_regression_sample_test.py similarity index 100% rename from samples/snippets/get_model_evaluation_tabular_regression_sample_test.py rename to samples/snippets/model_service/get_model_evaluation_tabular_regression_sample_test.py diff --git a/samples/snippets/get_model_evaluation_text_classification_sample.py b/samples/snippets/model_service/get_model_evaluation_text_classification_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_text_classification_sample.py rename to samples/snippets/model_service/get_model_evaluation_text_classification_sample.py diff --git a/samples/snippets/get_model_evaluation_text_entity_extraction_sample.py b/samples/snippets/model_service/get_model_evaluation_text_entity_extraction_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_text_entity_extraction_sample.py rename to samples/snippets/model_service/get_model_evaluation_text_entity_extraction_sample.py diff --git a/samples/snippets/get_model_evaluation_text_sentiment_analysis_sample.py b/samples/snippets/model_service/get_model_evaluation_text_sentiment_analysis_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_text_sentiment_analysis_sample.py rename to samples/snippets/model_service/get_model_evaluation_text_sentiment_analysis_sample.py diff --git a/samples/snippets/get_model_evaluation_video_action_recognition_sample.py b/samples/snippets/model_service/get_model_evaluation_video_action_recognition_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_video_action_recognition_sample.py rename to samples/snippets/model_service/get_model_evaluation_video_action_recognition_sample.py diff --git a/samples/snippets/get_model_evaluation_video_action_recognition_sample_test.py b/samples/snippets/model_service/get_model_evaluation_video_action_recognition_sample_test.py similarity index 100% rename from samples/snippets/get_model_evaluation_video_action_recognition_sample_test.py rename to samples/snippets/model_service/get_model_evaluation_video_action_recognition_sample_test.py diff --git a/samples/snippets/get_model_evaluation_video_classification_sample.py b/samples/snippets/model_service/get_model_evaluation_video_classification_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_video_classification_sample.py rename to samples/snippets/model_service/get_model_evaluation_video_classification_sample.py diff --git a/samples/snippets/get_model_evaluation_video_classification_sample_test.py b/samples/snippets/model_service/get_model_evaluation_video_classification_sample_test.py similarity index 100% rename from samples/snippets/get_model_evaluation_video_classification_sample_test.py rename to samples/snippets/model_service/get_model_evaluation_video_classification_sample_test.py diff --git a/samples/snippets/get_model_evaluation_video_object_tracking_sample.py b/samples/snippets/model_service/get_model_evaluation_video_object_tracking_sample.py similarity index 100% rename from samples/snippets/get_model_evaluation_video_object_tracking_sample.py rename to samples/snippets/model_service/get_model_evaluation_video_object_tracking_sample.py diff --git a/samples/snippets/get_model_evaluation_video_object_tracking_sample_test.py b/samples/snippets/model_service/get_model_evaluation_video_object_tracking_sample_test.py similarity index 100% rename from samples/snippets/get_model_evaluation_video_object_tracking_sample_test.py rename to samples/snippets/model_service/get_model_evaluation_video_object_tracking_sample_test.py diff --git a/samples/snippets/get_model_sample.py b/samples/snippets/model_service/get_model_sample.py similarity index 100% rename from samples/snippets/get_model_sample.py rename to samples/snippets/model_service/get_model_sample.py diff --git a/samples/snippets/get_model_sample_test.py b/samples/snippets/model_service/get_model_sample_test.py similarity index 100% rename from samples/snippets/get_model_sample_test.py rename to samples/snippets/model_service/get_model_sample_test.py diff --git a/samples/snippets/list_model_evaluation_slices_sample.py b/samples/snippets/model_service/list_model_evaluation_slices_sample.py similarity index 100% rename from samples/snippets/list_model_evaluation_slices_sample.py rename to samples/snippets/model_service/list_model_evaluation_slices_sample.py diff --git a/samples/snippets/list_model_evaluation_slices_sample_test.py b/samples/snippets/model_service/list_model_evaluation_slices_sample_test.py similarity index 100% rename from samples/snippets/list_model_evaluation_slices_sample_test.py rename to samples/snippets/model_service/list_model_evaluation_slices_sample_test.py diff --git a/samples/snippets/upload_model_explain_image_managed_container_sample.py b/samples/snippets/model_service/upload_model_explain_image_managed_container_sample.py similarity index 100% rename from samples/snippets/upload_model_explain_image_managed_container_sample.py rename to samples/snippets/model_service/upload_model_explain_image_managed_container_sample.py diff --git a/samples/snippets/upload_model_explain_image_managed_container_sample_test.py b/samples/snippets/model_service/upload_model_explain_image_managed_container_sample_test.py similarity index 100% rename from samples/snippets/upload_model_explain_image_managed_container_sample_test.py rename to samples/snippets/model_service/upload_model_explain_image_managed_container_sample_test.py index e43a7a8a05..bb572446e0 100644 --- a/samples/snippets/upload_model_explain_image_managed_container_sample_test.py +++ b/samples/snippets/model_service/upload_model_explain_image_managed_container_sample_test.py @@ -17,10 +17,10 @@ from uuid import uuid4 import pytest +import upload_model_explain_image_managed_container_sample import helpers -import upload_model_explain_image_managed_container_sample PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") IMAGE_URI = "gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-1:latest" diff --git a/samples/snippets/upload_model_explain_tabular_managed_container_sample.py b/samples/snippets/model_service/upload_model_explain_tabular_managed_container_sample.py similarity index 100% rename from samples/snippets/upload_model_explain_tabular_managed_container_sample.py rename to samples/snippets/model_service/upload_model_explain_tabular_managed_container_sample.py diff --git a/samples/snippets/upload_model_explain_tabular_managed_container_sample_test.py b/samples/snippets/model_service/upload_model_explain_tabular_managed_container_sample_test.py similarity index 100% rename from samples/snippets/upload_model_explain_tabular_managed_container_sample_test.py rename to samples/snippets/model_service/upload_model_explain_tabular_managed_container_sample_test.py index 1a52a15319..a3ccd71623 100644 --- a/samples/snippets/upload_model_explain_tabular_managed_container_sample_test.py +++ b/samples/snippets/model_service/upload_model_explain_tabular_managed_container_sample_test.py @@ -17,10 +17,10 @@ from uuid import uuid4 import pytest +import upload_model_explain_tabular_managed_container_sample import helpers -import upload_model_explain_tabular_managed_container_sample PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") IMAGE_URI = "gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-1:latest" diff --git a/samples/snippets/upload_model_sample.py b/samples/snippets/model_service/upload_model_sample.py similarity index 100% rename from samples/snippets/upload_model_sample.py rename to samples/snippets/model_service/upload_model_sample.py diff --git a/samples/snippets/upload_model_sample_test.py b/samples/snippets/model_service/upload_model_sample_test.py similarity index 99% rename from samples/snippets/upload_model_sample_test.py rename to samples/snippets/model_service/upload_model_sample_test.py index 8a38605455..2f9670c26e 100644 --- a/samples/snippets/upload_model_sample_test.py +++ b/samples/snippets/model_service/upload_model_sample_test.py @@ -17,9 +17,11 @@ import pytest -import helpers import upload_model_sample +import helpers + + PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") IMAGE_URI = "gcr.io/cloud-ml-service-public/cloud-ml-online-prediction-model-server-cpu:v1_15py3cmle_op_images_20200229_0210_RC00" ARTIFACT_URI = "gs://ucaip-samples-us-central1/model/explain/" diff --git a/samples/snippets/cancel_training_pipeline_sample.py b/samples/snippets/pipeline_service/cancel_training_pipeline_sample.py similarity index 100% rename from samples/snippets/cancel_training_pipeline_sample.py rename to samples/snippets/pipeline_service/cancel_training_pipeline_sample.py diff --git a/samples/snippets/cancel_training_pipeline_sample_test.py b/samples/snippets/pipeline_service/cancel_training_pipeline_sample_test.py similarity index 100% rename from samples/snippets/cancel_training_pipeline_sample_test.py rename to samples/snippets/pipeline_service/cancel_training_pipeline_sample_test.py index 992cf8269d..f517d4c7b2 100644 --- a/samples/snippets/cancel_training_pipeline_sample_test.py +++ b/samples/snippets/pipeline_service/cancel_training_pipeline_sample_test.py @@ -15,11 +15,11 @@ import os from uuid import uuid4 +import cancel_training_pipeline_sample from google.protobuf import json_format from google.protobuf.struct_pb2 import Value import pytest -import cancel_training_pipeline_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_custom_job_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_custom_job_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_custom_job_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_custom_job_sample.py diff --git a/samples/snippets/create_training_pipeline_custom_job_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_custom_job_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_custom_job_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_custom_job_sample_test.py index e412c513da..283bbc2ed6 100644 --- a/samples/snippets/create_training_pipeline_custom_job_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_custom_job_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_custom_job_sample import pytest -import create_training_pipeline_custom_job_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_custom_training_managed_dataset_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_custom_training_managed_dataset_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_custom_training_managed_dataset_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_custom_training_managed_dataset_sample.py diff --git a/samples/snippets/create_training_pipeline_custom_training_managed_dataset_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_custom_training_managed_dataset_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_custom_training_managed_dataset_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_custom_training_managed_dataset_sample_test.py index a8f399147d..d18a925747 100644 --- a/samples/snippets/create_training_pipeline_custom_training_managed_dataset_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_custom_training_managed_dataset_sample_test.py @@ -15,9 +15,9 @@ import os import uuid +import create_training_pipeline_custom_training_managed_dataset_sample import pytest -import create_training_pipeline_custom_training_managed_dataset_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_image_classification_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_image_classification_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_image_classification_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_image_classification_sample.py diff --git a/samples/snippets/create_training_pipeline_image_classification_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_image_classification_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_image_classification_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_image_classification_sample_test.py index ff48302463..44073eb247 100644 --- a/samples/snippets/create_training_pipeline_image_classification_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_image_classification_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_image_classification_sample import pytest -import create_training_pipeline_image_classification_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_image_object_detection_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_image_object_detection_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_image_object_detection_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_image_object_detection_sample.py diff --git a/samples/snippets/create_training_pipeline_image_object_detection_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_image_object_detection_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_image_object_detection_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_image_object_detection_sample_test.py index 5935a2acf6..1b092b2af0 100644 --- a/samples/snippets/create_training_pipeline_image_object_detection_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_image_object_detection_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_image_object_detection_sample import pytest -import create_training_pipeline_image_object_detection_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_sample.py diff --git a/samples/snippets/create_training_pipeline_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_sample_test.py index 38771638f9..67359ffee9 100644 --- a/samples/snippets/create_training_pipeline_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_sample import pytest -import create_training_pipeline_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_tabular_classification_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_tabular_classification_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_tabular_classification_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_tabular_classification_sample.py diff --git a/samples/snippets/create_training_pipeline_tabular_classification_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_tabular_classification_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_tabular_classification_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_tabular_classification_sample_test.py index 49d48c1804..e0cf80518b 100644 --- a/samples/snippets/create_training_pipeline_tabular_classification_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_tabular_classification_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_tabular_classification_sample import pytest -import create_training_pipeline_tabular_classification_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_tabular_forecasting_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_tabular_forecasting_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_tabular_forecasting_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_tabular_forecasting_sample.py diff --git a/samples/snippets/create_training_pipeline_tabular_forecasting_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_tabular_forecasting_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_tabular_forecasting_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_tabular_forecasting_sample_test.py index 0cfde8f200..5c5dbce7fa 100644 --- a/samples/snippets/create_training_pipeline_tabular_forecasting_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_tabular_forecasting_sample_test.py @@ -15,12 +15,12 @@ import os from uuid import uuid4 -from google.cloud import aiplatform -import pytest - import cancel_training_pipeline_sample import create_training_pipeline_tabular_forecasting_sample import delete_training_pipeline_sample +from google.cloud import aiplatform +import pytest + import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_tabular_regression_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_tabular_regression_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_tabular_regression_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_tabular_regression_sample.py diff --git a/samples/snippets/create_training_pipeline_tabular_regression_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_tabular_regression_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_tabular_regression_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_tabular_regression_sample_test.py index 0209d6087e..2d602a7050 100644 --- a/samples/snippets/create_training_pipeline_tabular_regression_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_tabular_regression_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_tabular_regression_sample import pytest -import create_training_pipeline_tabular_regression_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_text_classification_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_text_classification_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_text_classification_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_text_classification_sample.py diff --git a/samples/snippets/create_training_pipeline_text_entity_extraction_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_text_entity_extraction_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_text_entity_extraction_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_text_entity_extraction_sample.py diff --git a/samples/snippets/create_training_pipeline_text_entity_extraction_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_text_entity_extraction_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_text_entity_extraction_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_text_entity_extraction_sample_test.py index e7dabbae76..eca60108fe 100644 --- a/samples/snippets/create_training_pipeline_text_entity_extraction_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_text_entity_extraction_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_text_entity_extraction_sample import pytest -import create_training_pipeline_text_entity_extraction_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_text_sentiment_analysis_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_text_sentiment_analysis_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_text_sentiment_analysis_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_text_sentiment_analysis_sample.py diff --git a/samples/snippets/create_training_pipeline_text_sentiment_analysis_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_text_sentiment_analysis_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_text_sentiment_analysis_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_text_sentiment_analysis_sample_test.py index 721a6f4a2f..f074fe29d8 100644 --- a/samples/snippets/create_training_pipeline_text_sentiment_analysis_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_text_sentiment_analysis_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_text_sentiment_analysis_sample import pytest -import create_training_pipeline_text_sentiment_analysis_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_video_action_recognition_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_video_action_recognition_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_video_action_recognition_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_video_action_recognition_sample.py diff --git a/samples/snippets/create_training_pipeline_video_action_recognition_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_video_action_recognition_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_video_action_recognition_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_video_action_recognition_sample_test.py index 94d161fb1d..2a4fe55b24 100644 --- a/samples/snippets/create_training_pipeline_video_action_recognition_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_video_action_recognition_sample_test.py @@ -15,9 +15,9 @@ import os import uuid +import create_training_pipeline_video_action_recognition_sample import pytest -import create_training_pipeline_video_action_recognition_sample import helpers LOCATION = "us-central1" diff --git a/samples/snippets/create_training_pipeline_video_classification_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_video_classification_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_video_classification_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_video_classification_sample.py diff --git a/samples/snippets/create_training_pipeline_video_classification_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_video_classification_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_video_classification_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_video_classification_sample_test.py index d192f27671..f84a9db1ad 100644 --- a/samples/snippets/create_training_pipeline_video_classification_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_video_classification_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_video_classification_sample import pytest -import create_training_pipeline_video_classification_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/create_training_pipeline_video_object_tracking_sample.py b/samples/snippets/pipeline_service/create_training_pipeline_video_object_tracking_sample.py similarity index 100% rename from samples/snippets/create_training_pipeline_video_object_tracking_sample.py rename to samples/snippets/pipeline_service/create_training_pipeline_video_object_tracking_sample.py diff --git a/samples/snippets/create_training_pipeline_video_object_tracking_sample_test.py b/samples/snippets/pipeline_service/create_training_pipeline_video_object_tracking_sample_test.py similarity index 100% rename from samples/snippets/create_training_pipeline_video_object_tracking_sample_test.py rename to samples/snippets/pipeline_service/create_training_pipeline_video_object_tracking_sample_test.py index 634c74bac9..b73deaa5bc 100644 --- a/samples/snippets/create_training_pipeline_video_object_tracking_sample_test.py +++ b/samples/snippets/pipeline_service/create_training_pipeline_video_object_tracking_sample_test.py @@ -15,9 +15,9 @@ import os from uuid import uuid4 +import create_training_pipeline_video_object_tracking_sample import pytest -import create_training_pipeline_video_object_tracking_sample import helpers PROJECT_ID = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") diff --git a/samples/snippets/delete_training_pipeline_sample.py b/samples/snippets/pipeline_service/delete_training_pipeline_sample.py similarity index 100% rename from samples/snippets/delete_training_pipeline_sample.py rename to samples/snippets/pipeline_service/delete_training_pipeline_sample.py diff --git a/samples/snippets/get_training_pipeline_sample.py b/samples/snippets/pipeline_service/get_training_pipeline_sample.py similarity index 100% rename from samples/snippets/get_training_pipeline_sample.py rename to samples/snippets/pipeline_service/get_training_pipeline_sample.py diff --git a/samples/snippets/get_training_pipeline_sample_test.py b/samples/snippets/pipeline_service/get_training_pipeline_sample_test.py similarity index 100% rename from samples/snippets/get_training_pipeline_sample_test.py rename to samples/snippets/pipeline_service/get_training_pipeline_sample_test.py diff --git a/samples/snippets/explain_tabular_sample.py b/samples/snippets/prediction_service/explain_tabular_sample.py similarity index 100% rename from samples/snippets/explain_tabular_sample.py rename to samples/snippets/prediction_service/explain_tabular_sample.py diff --git a/samples/snippets/explain_tabular_sample_test.py b/samples/snippets/prediction_service/explain_tabular_sample_test.py similarity index 100% rename from samples/snippets/explain_tabular_sample_test.py rename to samples/snippets/prediction_service/explain_tabular_sample_test.py diff --git a/samples/snippets/predict_custom_trained_model_sample.py b/samples/snippets/prediction_service/predict_custom_trained_model_sample.py similarity index 100% rename from samples/snippets/predict_custom_trained_model_sample.py rename to samples/snippets/prediction_service/predict_custom_trained_model_sample.py diff --git a/samples/snippets/predict_custom_trained_model_sample_test.py b/samples/snippets/prediction_service/predict_custom_trained_model_sample_test.py similarity index 100% rename from samples/snippets/predict_custom_trained_model_sample_test.py rename to samples/snippets/prediction_service/predict_custom_trained_model_sample_test.py diff --git a/samples/snippets/predict_image_classification_sample.py b/samples/snippets/prediction_service/predict_image_classification_sample.py similarity index 100% rename from samples/snippets/predict_image_classification_sample.py rename to samples/snippets/prediction_service/predict_image_classification_sample.py diff --git a/samples/snippets/predict_image_classification_sample_test.py b/samples/snippets/prediction_service/predict_image_classification_sample_test.py similarity index 100% rename from samples/snippets/predict_image_classification_sample_test.py rename to samples/snippets/prediction_service/predict_image_classification_sample_test.py diff --git a/samples/snippets/predict_image_object_detection_sample.py b/samples/snippets/prediction_service/predict_image_object_detection_sample.py similarity index 100% rename from samples/snippets/predict_image_object_detection_sample.py rename to samples/snippets/prediction_service/predict_image_object_detection_sample.py diff --git a/samples/snippets/predict_image_object_detection_sample_test.py b/samples/snippets/prediction_service/predict_image_object_detection_sample_test.py similarity index 100% rename from samples/snippets/predict_image_object_detection_sample_test.py rename to samples/snippets/prediction_service/predict_image_object_detection_sample_test.py diff --git a/samples/snippets/predict_sample.py b/samples/snippets/prediction_service/predict_sample.py similarity index 100% rename from samples/snippets/predict_sample.py rename to samples/snippets/prediction_service/predict_sample.py diff --git a/samples/snippets/predict_tabular_classification_sample.py b/samples/snippets/prediction_service/predict_tabular_classification_sample.py similarity index 100% rename from samples/snippets/predict_tabular_classification_sample.py rename to samples/snippets/prediction_service/predict_tabular_classification_sample.py diff --git a/samples/snippets/predict_tabular_classification_sample_test.py b/samples/snippets/prediction_service/predict_tabular_classification_sample_test.py similarity index 100% rename from samples/snippets/predict_tabular_classification_sample_test.py rename to samples/snippets/prediction_service/predict_tabular_classification_sample_test.py diff --git a/samples/snippets/predict_tabular_regression_sample.py b/samples/snippets/prediction_service/predict_tabular_regression_sample.py similarity index 100% rename from samples/snippets/predict_tabular_regression_sample.py rename to samples/snippets/prediction_service/predict_tabular_regression_sample.py diff --git a/samples/snippets/predict_tabular_regression_sample_test.py b/samples/snippets/prediction_service/predict_tabular_regression_sample_test.py similarity index 100% rename from samples/snippets/predict_tabular_regression_sample_test.py rename to samples/snippets/prediction_service/predict_tabular_regression_sample_test.py diff --git a/samples/snippets/predict_text_classification_single_label_sample.py b/samples/snippets/prediction_service/predict_text_classification_single_label_sample.py similarity index 100% rename from samples/snippets/predict_text_classification_single_label_sample.py rename to samples/snippets/prediction_service/predict_text_classification_single_label_sample.py diff --git a/samples/snippets/predict_text_classification_single_label_sample_test.py b/samples/snippets/prediction_service/predict_text_classification_single_label_sample_test.py similarity index 100% rename from samples/snippets/predict_text_classification_single_label_sample_test.py rename to samples/snippets/prediction_service/predict_text_classification_single_label_sample_test.py diff --git a/samples/snippets/predict_text_entity_extraction_sample.py b/samples/snippets/prediction_service/predict_text_entity_extraction_sample.py similarity index 100% rename from samples/snippets/predict_text_entity_extraction_sample.py rename to samples/snippets/prediction_service/predict_text_entity_extraction_sample.py diff --git a/samples/snippets/predict_text_entity_extraction_sample_test.py b/samples/snippets/prediction_service/predict_text_entity_extraction_sample_test.py similarity index 100% rename from samples/snippets/predict_text_entity_extraction_sample_test.py rename to samples/snippets/prediction_service/predict_text_entity_extraction_sample_test.py diff --git a/samples/snippets/predict_text_sentiment_analysis_sample.py b/samples/snippets/prediction_service/predict_text_sentiment_analysis_sample.py similarity index 100% rename from samples/snippets/predict_text_sentiment_analysis_sample.py rename to samples/snippets/prediction_service/predict_text_sentiment_analysis_sample.py diff --git a/samples/snippets/predict_text_sentiment_analysis_sample_test.py b/samples/snippets/prediction_service/predict_text_sentiment_analysis_sample_test.py similarity index 100% rename from samples/snippets/predict_text_sentiment_analysis_sample_test.py rename to samples/snippets/prediction_service/predict_text_sentiment_analysis_sample_test.py diff --git a/samples/snippets/resources/caprese_salad.jpg b/samples/snippets/prediction_service/resources/caprese_salad.jpg similarity index 100% rename from samples/snippets/resources/caprese_salad.jpg rename to samples/snippets/prediction_service/resources/caprese_salad.jpg diff --git a/samples/snippets/resources/daisy.jpg b/samples/snippets/prediction_service/resources/daisy.jpg similarity index 100% rename from samples/snippets/resources/daisy.jpg rename to samples/snippets/prediction_service/resources/daisy.jpg From 9dcf6fb0bc8144d819938a97edf4339fe6f2e1e6 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Fri, 13 Aug 2021 11:15:14 -0400 Subject: [PATCH 15/28] chore: drop mention of Python 2.7 from templates (#626) Source-Link: https://github.com/googleapis/synthtool/commit/facee4cc1ea096cd8bcc008bb85929daa7c414c0 Post-Processor: gcr.io/repo-automation-bots/owlbot-python:latest@sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 Co-authored-by: Owl Bot --- .github/.OwlBot.lock.yaml | 2 +- noxfile.py | 12 +++++++++--- scripts/readme-gen/templates/install_deps.tmpl.rst | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index b771c37cae..a9fcd07cc4 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:a1a891041baa4ffbe1a809ac1b8b9b4a71887293c9101c88e8e255943c5aec2d + digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 diff --git a/noxfile.py b/noxfile.py index 1c4b22dbda..2113b7b2d9 100644 --- a/noxfile.py +++ b/noxfile.py @@ -84,9 +84,15 @@ def default(session): constraints_path = str( CURRENT_DIRECTORY / "testing" / f"constraints-{session.python}.txt" ) - session.install("asyncmock", "pytest-asyncio", "-c", constraints_path) - - session.install("mock", "pytest", "pytest-cov", "-c", constraints_path) + session.install( + "mock", + "asyncmock", + "pytest", + "pytest-cov", + "pytest-asyncio", + "-c", + constraints_path, + ) session.install("-e", ".[testing]", "-c", constraints_path) diff --git a/scripts/readme-gen/templates/install_deps.tmpl.rst b/scripts/readme-gen/templates/install_deps.tmpl.rst index a0406dba8c..275d649890 100644 --- a/scripts/readme-gen/templates/install_deps.tmpl.rst +++ b/scripts/readme-gen/templates/install_deps.tmpl.rst @@ -12,7 +12,7 @@ Install Dependencies .. _Python Development Environment Setup Guide: https://cloud.google.com/python/setup -#. Create a virtualenv. Samples are compatible with Python 2.7 and 3.4+. +#. Create a virtualenv. Samples are compatible with Python 3.6+. .. code-block:: bash From 28f32fd11470ad86d2f103346b3e6be8f1adc2d8 Mon Sep 17 00:00:00 2001 From: Vinny Senthil Date: Tue, 17 Aug 2021 12:44:12 -0700 Subject: [PATCH 16/28] fix: Update BatchPredictionJob.iter_outputs() and BQ docstrings (#631) * fix: Have iter_outputs use BQ output table field * fix: Update arg docstring to reflect bq:// prefix --- google/cloud/aiplatform/jobs.py | 25 +++++++++++++------- google/cloud/aiplatform/models.py | 2 +- tests/unit/aiplatform/test_jobs.py | 38 +++++++++++++++++++++++++++++- 3 files changed, 54 insertions(+), 11 deletions(-) diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 20d8141a22..6a5eb8ffee 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -406,7 +406,7 @@ def create( https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. bigquery_source (Optional[str]): BigQuery URI to a table, up to 2000 characters long. For example: - `projectId.bqDatasetId.bqTableId` + `bq://projectId.bqDatasetId.bqTableId` gcs_destination_prefix (Optional[str]): The Google Cloud Storage location of the directory where the output is to be written to. In the given directory a new @@ -808,14 +808,15 @@ def iter_outputs( # BigQuery Destination, return RowIterator elif output_info.bigquery_output_dataset: - # Build a BigQuery Client using the same credentials as JobServiceClient - bq_client = bigquery.Client( - project=self.project, - credentials=self.api_client._transport._credentials, - ) - - # Format from service is `bq://projectId.bqDatasetId` + # Format of `bigquery_output_dataset` from service is `bq://projectId.bqDatasetId` bq_dataset = output_info.bigquery_output_dataset + bq_table = output_info.bigquery_output_table + + if not bq_table: + raise RuntimeError( + "A BigQuery table with predictions was not found, this " + f"might be due to errors. Visit {self._dashboard_uri()} for details." + ) if bq_dataset.startswith("bq://"): bq_dataset = bq_dataset[5:] @@ -823,8 +824,14 @@ def iter_outputs( # # Split project ID and BQ dataset ID _, bq_dataset_id = bq_dataset.split(".", 1) + # Build a BigQuery Client using the same credentials as JobServiceClient + bq_client = bigquery.Client( + project=self.project, + credentials=self.api_client._transport._credentials, + ) + row_iterator = bq_client.list_rows( - table=f"{bq_dataset_id}.predictions", max_results=bq_max_results + table=f"{bq_dataset_id}.{bq_table}", max_results=bq_max_results ) return row_iterator diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index c1518ce89d..4af337b3e8 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -2038,7 +2038,7 @@ def batch_predict( https://cloud.google.com/storage/docs/gsutil/addlhelp/WildcardNames. bigquery_source: Optional[str] = None BigQuery URI to a table, up to 2000 characters long. For example: - `projectId.bqDatasetId.bqTableId` + `bq://projectId.bqDatasetId.bqTableId` instances_format: str = "jsonl" Required. The format in which instances are given, must be one of "jsonl", "csv", "bigquery", "tf-record", "tf-record-gzip", diff --git a/tests/unit/aiplatform/test_jobs.py b/tests/unit/aiplatform/test_jobs.py index 76584cd0c4..d10eb0335d 100644 --- a/tests/unit/aiplatform/test_jobs.py +++ b/tests/unit/aiplatform/test_jobs.py @@ -56,6 +56,7 @@ _TEST_ALT_ID = "8834795523125638878" _TEST_DISPLAY_NAME = "my_job_1234" _TEST_BQ_DATASET_ID = "bqDatasetId" +_TEST_BQ_TABLE_NAME = "someBqTable" _TEST_BQ_JOB_ID = "123459876" _TEST_BQ_MAX_RESULTS = 100 _TEST_GCS_BUCKET_NAME = "my-bucket" @@ -108,6 +109,9 @@ gcs_output_directory=_TEST_GCS_BUCKET_NAME ) _TEST_BQ_OUTPUT_INFO = gca_batch_prediction_job.BatchPredictionJob.OutputInfo( + bigquery_output_dataset=_TEST_BQ_PATH, bigquery_output_table=_TEST_BQ_TABLE_NAME +) +_TEST_BQ_OUTPUT_INFO_INCOMPLETE = gca_batch_prediction_job.BatchPredictionJob.OutputInfo( bigquery_output_dataset=_TEST_BQ_PATH ) @@ -296,6 +300,23 @@ def get_batch_prediction_job_bq_output_mock(): yield get_batch_prediction_job_mock +@pytest.fixture +def get_batch_prediction_job_incomplete_bq_output_mock(): + with patch.object( + job_service_client.JobServiceClient, "get_batch_prediction_job" + ) as get_batch_prediction_job_mock: + get_batch_prediction_job_mock.return_value = gca_batch_prediction_job.BatchPredictionJob( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + display_name=_TEST_DISPLAY_NAME, + model=_TEST_MODEL_NAME, + input_config=_TEST_GCS_INPUT_CONFIG, + output_config=_TEST_BQ_OUTPUT_CONFIG, + output_info=_TEST_BQ_OUTPUT_INFO_INCOMPLETE, + state=_TEST_JOB_STATE_SUCCESS, + ) + yield get_batch_prediction_job_mock + + @pytest.fixture def get_batch_prediction_job_empty_output_mock(): with patch.object( @@ -397,7 +418,22 @@ def test_batch_prediction_iter_dirs_bq(self, bq_list_rows_mock): bp.iter_outputs() bq_list_rows_mock.assert_called_once_with( - table=f"{_TEST_BQ_DATASET_ID}.predictions", max_results=_TEST_BQ_MAX_RESULTS + table=f"{_TEST_BQ_DATASET_ID}.{_TEST_BQ_TABLE_NAME}", + max_results=_TEST_BQ_MAX_RESULTS, + ) + + @pytest.mark.usefixtures("get_batch_prediction_job_incomplete_bq_output_mock") + def test_batch_prediction_iter_dirs_bq_raises_on_empty(self, bq_list_rows_mock): + bp = jobs.BatchPredictionJob( + batch_prediction_job_name=_TEST_BATCH_PREDICTION_JOB_NAME + ) + with pytest.raises(RuntimeError) as e: + bp.iter_outputs() + assert e.match( + regexp=( + "A BigQuery table with predictions was not found," + " this might be due to errors. Visit http" + ) ) @pytest.mark.usefixtures("get_batch_prediction_job_running_bq_output_mock") From 74f81e6c4f7e6c0972dfe1c8db311843c079d93e Mon Sep 17 00:00:00 2001 From: Bu Sun Kim <8822365+busunkim96@users.noreply.github.com> Date: Wed, 18 Aug 2021 07:42:23 -0600 Subject: [PATCH 17/28] chore: generate python samples templates in owlbot.py (#634) Generate python samples templates in owlbot.py --- owlbot.py | 1 + samples/model-builder/noxfile.py | 91 +++++++++++++++++++++++--------- samples/snippets/noxfile.py | 90 ++++++++++++++++++++++--------- 3 files changed, 130 insertions(+), 52 deletions(-) diff --git a/owlbot.py b/owlbot.py index 3c1e32b044..d08a25d661 100644 --- a/owlbot.py +++ b/owlbot.py @@ -82,6 +82,7 @@ # ---------------------------------------------------------------------------- templated_files = common.py_library(cov_level=99, microgenerator=True) +python.py_samples(skip_readmes=True) s.move( templated_files, excludes=[ diff --git a/samples/model-builder/noxfile.py b/samples/model-builder/noxfile.py index 83bf446de2..e73436a156 100644 --- a/samples/model-builder/noxfile.py +++ b/samples/model-builder/noxfile.py @@ -17,17 +17,20 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox + # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING # DO NOT EDIT THIS FILE EVER! # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -# Copy `noxfile_config.py` to your directory and modify it instead. +BLACK_VERSION = "black==19.10b0" +# Copy `noxfile_config.py` to your directory and modify it instead. # `TEST_CONFIG` dict is a configuration hook that allows users to # modify the test configurations. The values here should be in sync @@ -36,22 +39,31 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + 'ignored_versions': [], + + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + 'enforce_type_hints': False, + # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string # to use your own Cloud project. - "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. - "envs": {}, + 'envs': {}, } try: # Ensure we can import noxfile_config in the project's directory. - sys.path.append(".") + sys.path.append('.') from noxfile_config import TEST_CONFIG_OVERRIDE except ImportError as e: print("No user noxfile_config found: detail: {}".format(e)) @@ -61,36 +73,36 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} # Override the GCLOUD_PROJECT and the alias. - env_key = TEST_CONFIG["gcloud_project_env"] + env_key = TEST_CONFIG['gcloud_project_env'] # This should error out if not set. - ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key] # Apply user supplied envs. - ret.update(TEST_CONFIG["envs"]) + ret.update(TEST_CONFIG['envs']) return ret # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. -IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] +IGNORED_VERSIONS = TEST_CONFIG['ignored_versions'] TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ("True", "true") # # Style Checks # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -128,17 +140,30 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): - session.install("flake8", "flake8-import-order") +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG['enforce_type_hints']: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ "--application-import-names", ",".join(local_names), - ".", + "." ] session.run("flake8", *args) +# +# Black +# + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + session.install(BLACK_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) # # Sample Tests @@ -148,13 +173,22 @@ def lint(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests(session: nox.sessions.Session, post_install: Callable = None) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) @@ -174,14 +208,14 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) else: - session.skip( - "SKIPPED: {} tests are disabled for this sample.".format(session.python) - ) + session.skip("SKIPPED: {} tests are disabled for this sample.".format( + session.python + )) # @@ -189,7 +223,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -198,6 +232,11 @@ def _get_repo_root(): break if Path(p / ".git").exists(): return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) p = p.parent raise Exception("Unable to detect repository root.") @@ -207,7 +246,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) diff --git a/samples/snippets/noxfile.py b/samples/snippets/noxfile.py index 5660f08be4..e73436a156 100644 --- a/samples/snippets/noxfile.py +++ b/samples/snippets/noxfile.py @@ -17,6 +17,7 @@ import os from pathlib import Path import sys +from typing import Callable, Dict, List, Optional import nox @@ -27,8 +28,9 @@ # WARNING - WARNING - WARNING - WARNING - WARNING # WARNING - WARNING - WARNING - WARNING - WARNING -# Copy `noxfile_config.py` to your directory and modify it instead. +BLACK_VERSION = "black==19.10b0" +# Copy `noxfile_config.py` to your directory and modify it instead. # `TEST_CONFIG` dict is a configuration hook that allows users to # modify the test configurations. The values here should be in sync @@ -37,22 +39,31 @@ TEST_CONFIG = { # You can opt out from the test for specific Python versions. - "ignored_versions": ["2.7"], + 'ignored_versions': [], + + # Old samples are opted out of enforcing Python type hints + # All new samples should feature them + 'enforce_type_hints': False, + # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string # to use your own Cloud project. - "gcloud_project_env": "GOOGLE_CLOUD_PROJECT", + 'gcloud_project_env': 'GOOGLE_CLOUD_PROJECT', # 'gcloud_project_env': 'BUILD_SPECIFIC_GCLOUD_PROJECT', + # If you need to use a specific version of pip, + # change pip_version_override to the string representation + # of the version number, for example, "20.2.4" + "pip_version_override": None, # A dictionary you want to inject into your test. Don't put any # secrets here. These values will override predefined values. - "envs": {}, + 'envs': {}, } try: # Ensure we can import noxfile_config in the project's directory. - sys.path.append(".") + sys.path.append('.') from noxfile_config import TEST_CONFIG_OVERRIDE except ImportError as e: print("No user noxfile_config found: detail: {}".format(e)) @@ -62,36 +73,36 @@ TEST_CONFIG.update(TEST_CONFIG_OVERRIDE) -def get_pytest_env_vars(): +def get_pytest_env_vars() -> Dict[str, str]: """Returns a dict for pytest invocation.""" ret = {} # Override the GCLOUD_PROJECT and the alias. - env_key = TEST_CONFIG["gcloud_project_env"] + env_key = TEST_CONFIG['gcloud_project_env'] # This should error out if not set. - ret["GOOGLE_CLOUD_PROJECT"] = os.environ[env_key] + ret['GOOGLE_CLOUD_PROJECT'] = os.environ[env_key] # Apply user supplied envs. - ret.update(TEST_CONFIG["envs"]) + ret.update(TEST_CONFIG['envs']) return ret # DO NOT EDIT - automatically generated. -# All versions used to tested samples. -ALL_VERSIONS = ["2.7", "3.6", "3.7", "3.8"] +# All versions used to test samples. +ALL_VERSIONS = ["3.6", "3.7", "3.8", "3.9"] # Any default versions that should be ignored. -IGNORED_VERSIONS = TEST_CONFIG["ignored_versions"] +IGNORED_VERSIONS = TEST_CONFIG['ignored_versions'] TESTED_VERSIONS = sorted([v for v in ALL_VERSIONS if v not in IGNORED_VERSIONS]) -INSTALL_LIBRARY_FROM_SOURCE = bool(os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False)) +INSTALL_LIBRARY_FROM_SOURCE = os.environ.get("INSTALL_LIBRARY_FROM_SOURCE", False) in ("True", "true") # # Style Checks # -def _determine_local_import_names(start_dir): +def _determine_local_import_names(start_dir: str) -> List[str]: """Determines all import names that should be considered "local". This is used when running the linter to insure that import order is @@ -129,17 +140,30 @@ def _determine_local_import_names(start_dir): @nox.session -def lint(session): - session.install("flake8", "flake8-import-order") +def lint(session: nox.sessions.Session) -> None: + if not TEST_CONFIG['enforce_type_hints']: + session.install("flake8", "flake8-import-order") + else: + session.install("flake8", "flake8-import-order", "flake8-annotations") local_names = _determine_local_import_names(".") args = FLAKE8_COMMON_ARGS + [ "--application-import-names", ",".join(local_names), - ".", + "." ] session.run("flake8", *args) +# +# Black +# + +@nox.session +def blacken(session: nox.sessions.Session) -> None: + session.install(BLACK_VERSION) + python_files = [path for path in os.listdir(".") if path.endswith(".py")] + + session.run("black", *python_files) # # Sample Tests @@ -149,13 +173,22 @@ def lint(session): PYTEST_COMMON_ARGS = ["--junitxml=sponge_log.xml"] -def _session_tests(session, post_install=None): +def _session_tests(session: nox.sessions.Session, post_install: Callable = None) -> None: + if TEST_CONFIG["pip_version_override"]: + pip_version = TEST_CONFIG["pip_version_override"] + session.install(f"pip=={pip_version}") """Runs py.test for a particular project.""" if os.path.exists("requirements.txt"): - session.install("-r", "requirements.txt") + if os.path.exists("constraints.txt"): + session.install("-r", "requirements.txt", "-c", "constraints.txt") + else: + session.install("-r", "requirements.txt") if os.path.exists("requirements-test.txt"): - session.install("-r", "requirements-test.txt") + if os.path.exists("constraints-test.txt"): + session.install("-r", "requirements-test.txt", "-c", "constraints-test.txt") + else: + session.install("-r", "requirements-test.txt") if INSTALL_LIBRARY_FROM_SOURCE: session.install("-e", _get_repo_root()) @@ -175,14 +208,14 @@ def _session_tests(session, post_install=None): @nox.session(python=ALL_VERSIONS) -def py(session): +def py(session: nox.sessions.Session) -> None: """Runs py.test for a sample using the specified version of Python.""" if session.python in TESTED_VERSIONS: _session_tests(session) else: - session.skip( - "SKIPPED: {} tests are disabled for this sample.".format(session.python) - ) + session.skip("SKIPPED: {} tests are disabled for this sample.".format( + session.python + )) # @@ -190,7 +223,7 @@ def py(session): # -def _get_repo_root(): +def _get_repo_root() -> Optional[str]: """ Returns the root folder of the project. """ # Get root of this repository. Assume we don't have directories nested deeper than 10 items. p = Path(os.getcwd()) @@ -199,6 +232,11 @@ def _get_repo_root(): break if Path(p / ".git").exists(): return str(p) + # .git is not available in repos cloned via Cloud Build + # setup.py is always in the library's root, so use that instead + # https://github.com/googleapis/synthtool/issues/792 + if Path(p / "setup.py").exists(): + return str(p) p = p.parent raise Exception("Unable to detect repository root.") @@ -208,7 +246,7 @@ def _get_repo_root(): @nox.session @nox.parametrize("path", GENERATED_READMES) -def readmegen(session, path): +def readmegen(session: nox.sessions.Session, path: str) -> None: """(Re-)generates the readme for a sample.""" session.install("jinja2", "pyyaml") dir_ = os.path.dirname(path) From 1a135775966c8a2303ded529eba514dcf9db7205 Mon Sep 17 00:00:00 2001 From: Ivan Cheung Date: Wed, 18 Aug 2021 14:32:19 -0400 Subject: [PATCH 18/28] feat: add filter and timestamp splits (#627) * Fixed splits * Fixed docstrings * Fix test bug * Ran linter * Fixed FractionSplit and AutoMLVideo FilterSplit issues * Added warning for incomplete filter splits * Fixed AutoMLVideo tests * Fixed type * Moved annotation_schema_uri * Tweaked docstrings Co-authored-by: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> --- google/cloud/aiplatform/training_jobs.py | 1181 +++++++++++++---- .../test_automl_forecasting_training_jobs.py | 28 +- .../test_automl_image_training_jobs.py | 302 ++++- .../test_automl_tabular_training_jobs.py | 400 ++++-- .../test_automl_text_training_jobs.py | 283 +++- .../test_automl_video_training_jobs.py | 253 +++- tests/unit/aiplatform/test_training_jobs.py | 224 ++-- 7 files changed, 2115 insertions(+), 556 deletions(-) diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 52418096be..15ef20af74 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -308,10 +308,14 @@ def run(self) -> Optional[models.Model]: def _create_input_data_config( dataset: Optional[datasets._Dataset] = None, annotation_schema_uri: Optional[str] = None, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, gcs_destination_uri_prefix: Optional[str] = None, bigquery_destination: Optional[str] = None, ) -> Optional[gca_training_pipeline.InputDataConfig]: @@ -349,17 +353,35 @@ def _create_input_data_config( and ``annotation_schema_uri``. training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. - training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -370,6 +392,16 @@ def _create_input_data_config( ignored by the pipeline. Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + + Supported only for tabular and time series Datasets. + This parameter must be used with training_fraction_split, validation_fraction_split and test_fraction_split. gcs_destination_uri_prefix (str): Optional. The Google Cloud Storage location. @@ -396,33 +428,97 @@ def _create_input_data_config( - AIP_TRAINING_DATA_URI ="bigquery_destination.dataset_*.training" - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" + Raises: + ValueError: When more than 1 type of split configuration is passed or when + the split configuartion passed is incompatible with the dataset schema. """ input_data_config = None if dataset: - # Create fraction split spec - fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=training_fraction_split, - validation_fraction=validation_fraction_split, - test_fraction=test_fraction_split, - ) - - # Create predefined split spec + # Initialize all possible splits + filter_split = None predefined_split = None - if predefined_split_column_name: - if dataset._gca_resource.metadata_schema_uri not in ( - schema.dataset.metadata.tabular, - schema.dataset.metadata.time_series, + timestamp_split = None + fraction_split = None + + # Create filter split + if any( + [ + training_filter_split is not None, + validation_filter_split is not None, + test_filter_split is not None, + ] + ): + if all( + [ + training_filter_split is not None, + validation_filter_split is not None, + test_filter_split is not None, + ] ): + filter_split = gca_training_pipeline.FilterSplit( + training_filter=training_filter_split, + validation_filter=validation_filter_split, + test_filter=test_filter_split, + ) + else: raise ValueError( - "A pre-defined split may only be used with a tabular or time series Dataset" + "All filter splits must be passed together or not at all" ) + # Create predefined split + if predefined_split_column_name: predefined_split = gca_training_pipeline.PredefinedSplit( key=predefined_split_column_name ) - # Create GCS destination + # Create timestamp split or fraction split + if timestamp_split_column_name: + timestamp_split = gca_training_pipeline.TimestampSplit( + training_fraction=training_fraction_split, + validation_fraction=validation_fraction_split, + test_fraction=test_fraction_split, + key=timestamp_split_column_name, + ) + elif any( + [ + training_fraction_split is not None, + validation_fraction_split is not None, + test_fraction_split is not None, + ] + ): + fraction_split = gca_training_pipeline.FractionSplit( + training_fraction=training_fraction_split, + validation_fraction=validation_fraction_split, + test_fraction=test_fraction_split, + ) + + splits = [ + split + for split in [ + filter_split, + predefined_split, + timestamp_split_column_name, + fraction_split, + ] + if split is not None + ] + + # Fallback to fraction split if nothing else is specified + if len(splits) == 0: + _LOGGER.info( + "No dataset split provided. The service will use a default split." + ) + elif len(splits) > 1: + raise ValueError( + """Can only specify one of: + 1. training_filter_split, validation_filter_split, test_filter_split + 2. predefined_split_column_name + 3. timestamp_split_column_name, training_fraction_split, validation_fraction_split, test_fraction_split + 4. training_fraction_split, validation_fraction_split, test_fraction_split""" + ) + + # create GCS destination gcs_destination = None if gcs_destination_uri_prefix: gcs_destination = gca_io.GcsDestination( @@ -439,7 +535,9 @@ def _create_input_data_config( # create input data config input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=fraction_split, + filter_split=filter_split, predefined_split=predefined_split, + timestamp_split=timestamp_split, dataset_id=dataset.name, annotation_schema_uri=annotation_schema_uri, gcs_destination=gcs_destination, @@ -453,11 +551,15 @@ def _run_job( training_task_definition: str, training_task_inputs: Union[dict, proto.Message], dataset: Optional[datasets._Dataset], - training_fraction_split: float, - validation_fraction_split: float, - test_fraction_split: float, - annotation_schema_uri: Optional[str] = None, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, + annotation_schema_uri: Optional[str] = None, model: Optional[gca_model.Model] = None, gcs_destination_uri_prefix: Optional[str] = None, bigquery_destination: Optional[str] = None, @@ -488,15 +590,6 @@ def _run_job( [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. For tabular Datasets, all their data is exported to training, to pick and choose from. - training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. - validation_fraction_split (float): - The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. - test_fraction_split (float): - The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. annotation_schema_uri (str): Google Cloud Storage URI points to a YAML file describing annotation schema. The schema is defined as an OpenAPI 3.0.2 @@ -519,6 +612,36 @@ def _run_job( ``annotations_filter`` and ``annotation_schema_uri``. + training_fraction_split (float): + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + validation_fraction_split (float): + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -529,6 +652,16 @@ def _run_job( ignored by the pipeline. Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + + Supported only for tabular and time series Datasets. + This parameter must be used with training_fraction_split, validation_fraction_split and test_fraction_split. model (~.model.Model): Optional. Describes the Model that may be uploaded (via [ModelService.UploadMode][]) by this TrainingPipeline. The @@ -583,7 +716,11 @@ def _run_job( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, gcs_destination_uri_prefix=gcs_destination_uri_prefix, bigquery_destination=bigquery_destination, ) @@ -1574,8 +1711,6 @@ def __init__( self._requirements = requirements self._script_path = script_path - # TODO(b/172365904) add filter split, training_pipeline.FilterSplit - # TODO(b/172368070) add timestamp split, training_pipeline.TimestampSplit def run( self, dataset: Optional[ @@ -1601,10 +1736,14 @@ def run( accelerator_count: int = 0, boot_disk_type: str = "pd-ssd", boot_disk_size_gb: int = 100, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, tensorboard: Optional[str] = None, sync=True, ) -> Optional[models.Model]: @@ -1616,12 +1755,36 @@ def run( ie: replica_count = 10 will result in 1 chief and 9 workers All replicas have same machine_type, accelerator_type, and accelerator_count - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI.If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. + + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. + + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. Args: dataset ( @@ -1745,14 +1908,35 @@ def run( Size in GB of the boot disk, default is 100GB. boot disk size must be within the range of [100, 64000]. training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -1762,6 +1946,15 @@ def run( key is not present or has an invalid value, that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. tensorboard (str): Optional. The name of a Vertex AI @@ -1818,7 +2011,11 @@ def run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, tensorboard=tensorboard, sync=sync, ) @@ -1844,10 +2041,14 @@ def _run( service_account: Optional[str] = None, network: Optional[str] = None, bigquery_destination: Optional[str] = None, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, tensorboard: Optional[str] = None, sync=True, ) -> Optional[models.Model]: @@ -1918,14 +2119,35 @@ def _run( - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - The fraction of the input data that is to be - used to validate the Model. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - The fraction of the input data that is to be - used to evaluate the Model. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -1935,6 +2157,15 @@ def _run( key is not present or has an invalid value, that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. tensorboard (str): Optional. The name of a Vertex AI @@ -2001,7 +2232,11 @@ def _run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, model=managed_model, gcs_destination_uri_prefix=base_output_dir, bigquery_destination=bigquery_destination, @@ -2238,8 +2473,6 @@ def __init__( self._command = command - # TODO(b/172365904) add filter split, training_pipeline.FilterSplit - # TODO(b/172368070) add timestamp split, training_pipeline.TimestampSplit def run( self, dataset: Optional[ @@ -2265,10 +2498,14 @@ def run( accelerator_count: int = 0, boot_disk_type: str = "pd-ssd", boot_disk_size_gb: int = 100, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, tensorboard: Optional[str] = None, sync=True, ) -> Optional[models.Model]: @@ -2280,12 +2517,36 @@ def run( ie: replica_count = 10 will result in 1 chief and 9 workers All replicas have same machine_type, accelerator_type, and accelerator_count - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. + + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. + + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. Args: dataset (Union[datasets.ImageDataset,datasets.TabularDataset,datasets.TextDataset,datasets.VideoDataset]): @@ -2402,14 +2663,35 @@ def run( Size in GB of the boot disk, default is 100GB. boot disk size must be within the range of [100, 64000]. training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -2419,6 +2701,15 @@ def run( key is not present or has an invalid value, that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. tensorboard (str): Optional. The name of a Vertex AI @@ -2474,7 +2765,11 @@ def run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, tensorboard=tensorboard, sync=sync, ) @@ -2499,10 +2794,14 @@ def _run( service_account: Optional[str] = None, network: Optional[str] = None, bigquery_destination: Optional[str] = None, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, tensorboard: Optional[str] = None, sync=True, ) -> Optional[models.Model]: @@ -2569,14 +2868,35 @@ def _run( - AIP_VALIDATION_DATA_URI = "bigquery_destination.dataset_*.validation" - AIP_TEST_DATA_URI = "bigquery_destination.dataset_*.test" training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - The fraction of the input data that is to be - used to validate the Model. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - The fraction of the input data that is to be - used to evaluate the Model. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -2586,6 +2906,15 @@ def _run( key is not present or has an invalid value, that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. tensorboard (str): Optional. The name of a Vertex AI @@ -2646,7 +2975,11 @@ def _run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, model=managed_model, gcs_destination_uri_prefix=base_output_dir, bigquery_destination=bigquery_destination, @@ -2848,10 +3181,11 @@ def run( self, dataset: datasets.TabularDataset, target_column: str, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, weight_column: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, @@ -2864,12 +3198,25 @@ def run( ) -> models.Model: """Runs the training job and returns a model. - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. + + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. Args: dataset (datasets.TabularDataset): @@ -2883,14 +3230,14 @@ def run( target_column (str): Required. The name of the column values of which the Model is to predict. training_fraction_split (float): - Required. The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - Required. The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - Required. The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -2901,6 +3248,16 @@ def run( ignored by the pipeline. Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + + Supported only for tabular and time series Datasets. + This parameter must be used with training_fraction_split, validation_fraction_split and test_fraction_split. weight_column (str): Optional. Name of the column that should be used as the weight column. Higher values in this column give more importance to the row @@ -2992,6 +3349,7 @@ def run( validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, weight_column=weight_column, budget_milli_node_hours=budget_milli_node_hours, model_display_name=model_display_name, @@ -3008,10 +3366,11 @@ def _run( self, dataset: datasets.TabularDataset, target_column: str, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, weight_column: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, @@ -3024,12 +3383,25 @@ def _run( ) -> models.Model: """Runs the training job and returns a model. - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. + + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. Args: dataset (datasets.TabularDataset): @@ -3043,14 +3415,14 @@ def _run( target_column (str): Required. The name of the column values of which the Model is to predict. training_fraction_split (float): - Required. The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - Required. The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - Required. The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -3061,6 +3433,16 @@ def _run( ignored by the pipeline. Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + + Supported only for tabular and time series Datasets. + This parameter must be used with training_fraction_split, validation_fraction_split and test_fraction_split. weight_column (str): Optional. Name of the column that should be used as the weight column. Higher values in this column give more importance to the row @@ -3200,6 +3582,7 @@ def _run( validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, model=model, ) @@ -3740,9 +4123,9 @@ def _run( training_task_definition=training_task_definition, training_task_inputs=training_task_inputs_dict, dataset=dataset, - training_fraction_split=0.8, - validation_fraction_split=0.1, - test_fraction_split=0.1, + training_fraction_split=None, + validation_fraction_split=None, + test_fraction_split=None, predefined_split_column_name=predefined_split_column_name, model=model, ) @@ -3929,9 +4312,12 @@ def __init__( def run( self, dataset: datasets.ImageDataset, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, @@ -3940,12 +4326,24 @@ def run( ) -> models.Model: """Runs the AutoML Image training job and returns a model. - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. Args: dataset (datasets.ImageDataset): @@ -3956,15 +4354,36 @@ def run( [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. For tabular Datasets, all their data is exported to training, to pick and choose from. - training_fraction_split: float = 0.8 - Required. The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. - validation_fraction_split: float = 0.1 - Required. The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. - test_fraction_split: float = 0.1 - Required. The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + training_fraction_split (float): + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + validation_fraction_split (float): + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. budget_milli_node_hours: int = 1000 Optional. The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. @@ -4026,6 +4445,9 @@ def run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, budget_milli_node_hours=budget_milli_node_hours, model_display_name=model_display_name, model_labels=model_labels, @@ -4038,9 +4460,12 @@ def _run( self, dataset: datasets.ImageDataset, base_model: Optional[models.Model] = None, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, budget_milli_node_hours: int = 1000, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, @@ -4049,12 +4474,24 @@ def _run( ) -> models.Model: """Runs the training job and returns a model. - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. Args: dataset (datasets.ImageDataset): @@ -4072,14 +4509,35 @@ def _run( must be in the same Project and Location as the new Model to train, and have the same model_type. training_fraction_split (float): - Required. The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - Required. The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - Required. The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. budget_milli_node_hours (int): Optional. The train budget of creating this Model, expressed in milli node hours i.e. 1,000 value in this field means 1 node hour. @@ -4162,6 +4620,9 @@ def _run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, model=model_tbt, ) @@ -4437,10 +4898,14 @@ def run( accelerator_count: int = 0, boot_disk_type: str = "pd-ssd", boot_disk_size_gb: int = 100, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, tensorboard: Optional[str] = None, sync=True, ) -> Optional[models.Model]: @@ -4452,12 +4917,36 @@ def run( ie: replica_count = 10 will result in 1 chief and 9 workers All replicas have same machine_type, accelerator_type, and accelerator_count - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI.If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. + + Predefined splits: + Assigns input data to training, validation, and test sets based on the value of a provided key. + If using predefined splits, ``predefined_split_column_name`` must be provided. + Supported only for tabular Datasets. + + Timestamp splits: + Assigns input data to training, validation, and test sets + based on a provided timestamps. The youngest data pieces are + assigned to training set, next to validation set, and the oldest + to the test set. + Supported only for tabular Datasets. Args: dataset (Union[datasets.ImageDataset,datasets.TabularDataset,datasets.TextDataset,datasets.VideoDataset,]): @@ -4574,14 +5063,35 @@ def run( Size in GB of the boot disk, default is 100GB. boot disk size must be within the range of [100, 64000]. training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -4591,6 +5101,15 @@ def run( key is not present or has an invalid value, that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. tensorboard (str): Optional. The name of a Vertex AI @@ -4640,7 +5159,11 @@ def run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, bigquery_destination=bigquery_destination, tensorboard=tensorboard, sync=sync, @@ -4665,10 +5188,14 @@ def _run( base_output_dir: Optional[str] = None, service_account: Optional[str] = None, network: Optional[str] = None, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, predefined_split_column_name: Optional[str] = None, + timestamp_split_column_name: Optional[str] = None, bigquery_destination: Optional[str] = None, tensorboard: Optional[str] = None, sync=True, @@ -4723,14 +5250,35 @@ def _run( Private services access must already be configured for the network. If left unspecified, the job is not peered with any network. training_fraction_split (float): - The fraction of the input data that is to be - used to train the Model. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - The fraction of the input data that is to be - used to validate the Model. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - The fraction of the input data that is to be - used to evaluate the Model. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. predefined_split_column_name (str): Optional. The key is a name of one of the Dataset's data columns. The value of the key (either the label's value or @@ -4740,6 +5288,15 @@ def _run( key is not present or has an invalid value, that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. + timestamp_split_column_name (str): + Optional. The key is a name of one of the Dataset's data + columns. The value of the key values of the key (the values in + the column) must be in RFC 3339 `date-time` format, where + `time-offset` = `"Z"` (e.g. 1985-04-12T23:20:50.52Z). If for a + piece of data the key is not present or has an invalid value, + that piece is ignored by the pipeline. + Supported only for tabular and time series Datasets. tensorboard (str): Optional. The name of a Vertex AI @@ -4800,7 +5357,11 @@ def _run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, predefined_split_column_name=predefined_split_column_name, + timestamp_split_column_name=timestamp_split_column_name, model=managed_model, gcs_destination_uri_prefix=base_output_dir, bigquery_destination=bigquery_destination, @@ -4945,18 +5506,32 @@ def __init__( def run( self, dataset: datasets.VideoDataset, - training_fraction_split: float = 0.8, - test_fraction_split: float = 0.2, + training_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the AutoML Image training job and returns a model. - Data fraction splits: - ``training_fraction_split``, and ``test_fraction_split`` may optionally - be provided, they must sum to up to 1. If none of the fractions are set, - by default roughly 80% of data will be used for training, and 20% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + ``training_fraction_split``, and ``test_fraction_split`` may optionally + be provided, they must sum to up to 1. If none of the fractions are set, + by default roughly 80% of data will be used for training, and 20% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. Args: dataset (datasets.VideoDataset): @@ -4967,12 +5542,26 @@ def run( [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. For tabular Datasets, all their data is exported to training, to pick and choose from. - training_fraction_split: float = 0.8 - Required. The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. - test_fraction_split: float = 0.2 - Required. The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + training_fraction_split (float): + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. model_display_name (str): Optional. The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 @@ -5014,6 +5603,8 @@ def run( dataset=dataset, training_fraction_split=training_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + test_filter_split=test_filter_split, model_display_name=model_display_name, model_labels=model_labels, sync=sync, @@ -5023,18 +5614,32 @@ def run( def _run( self, dataset: datasets.VideoDataset, - training_fraction_split: float = 0.8, - test_fraction_split: float = 0.2, + training_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. - Data fraction splits: - Any of ``training_fraction_split``, and ``test_fraction_split`` may optionally - be provided, they must sum to up to 1. If none of the fractions are set, - by default roughly 80% of data will be used for training, and 20% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, and ``test_fraction_split`` may optionally + be provided, they must sum to up to 1. If none of the fractions are set, + by default roughly 80% of data will be used for training, and 20% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. Args: dataset (datasets.VideoDataset): @@ -5046,11 +5651,25 @@ def _run( For tabular Datasets, all their data is exported to training, to pick and choose from. training_fraction_split (float): - Required. The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - Required. The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. model_display_name (str): Optional. The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can be consist of any UTF-8 @@ -5091,13 +5710,22 @@ def _run( model_tbt.display_name = model_display_name or self._display_name model_tbt.labels = model_labels or self._labels + # AutoMLVideo does not support validation, so pass in '-' if any other filter split is provided. + validation_filter_split = ( + "-" + if all([training_filter_split is not None, test_filter_split is not None]) + else None + ) + return self._run_job( training_task_definition=training_task_definition, training_task_inputs=training_task_inputs_dict, dataset=dataset, training_fraction_split=training_fraction_split, - validation_fraction_split=0.0, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, model=model_tbt, ) @@ -5252,21 +5880,36 @@ def __init__( def run( self, dataset: datasets.TextDataset, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. Args: dataset (datasets.TextDataset): @@ -5275,15 +5918,36 @@ def run( and what is compatible should be described in the used TrainingPipeline's [training_task_definition] [google.cloud.aiplatform.v1beta1.TrainingPipeline.training_task_definition]. - training_fraction_split: float = 0.8 - Required. The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. - validation_fraction_split: float = 0.1 - Required. The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. - test_fraction_split: float = 0.1 - Required. The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + training_fraction_split (float): + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. + validation_fraction_split (float): + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. + test_fraction_split (float): + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. model_display_name (str): Optional. The display name of the managed Vertex AI Model. The name can be up to 128 characters long and can consist @@ -5327,6 +5991,9 @@ def run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, model_display_name=model_display_name, model_labels=model_labels, sync=sync, @@ -5336,21 +6003,36 @@ def run( def _run( self, dataset: datasets.TextDataset, - training_fraction_split: float = 0.8, - validation_fraction_split: float = 0.1, - test_fraction_split: float = 0.1, + training_fraction_split: Optional[float] = None, + validation_fraction_split: Optional[float] = None, + test_fraction_split: Optional[float] = None, + training_filter_split: Optional[str] = None, + validation_filter_split: Optional[str] = None, + test_filter_split: Optional[str] = None, model_display_name: Optional[str] = None, model_labels: Optional[Dict[str, str]] = None, sync: bool = True, ) -> models.Model: """Runs the training job and returns a model. - Data fraction splits: - Any of ``training_fraction_split``, ``validation_fraction_split`` and - ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If - the provided ones sum to less than 1, the remainder is assigned to sets as - decided by Vertex AI. If none of the fractions are set, by default roughly 80% - of data will be used for training, 10% for validation, and 10% for test. + If training on a Vertex AI dataset, you can use one of the following split configurations: + Data fraction splits: + Any of ``training_fraction_split``, ``validation_fraction_split`` and + ``test_fraction_split`` may optionally be provided, they must sum to up to 1. If + the provided ones sum to less than 1, the remainder is assigned to sets as + decided by Vertex AI. If none of the fractions are set, by default roughly 80% + of data will be used for training, 10% for validation, and 10% for test. + + Data filter splits: + Assigns input data to training, validation, and test sets + based on the given filters, data pieces not matched by any + filter are ignored. Currently only supported for Datasets + containing DataItems. + If any of the filters in this message are to match nothing, then + they can be set as '-' (the minus sign). + If using filter splits, all of ``training_filter_split``, ``validation_filter_split`` and + ``test_filter_split`` must be provided. + Supported only for unstructured Datasets. Args: dataset (datasets.TextDataset): @@ -5362,14 +6044,35 @@ def _run( For Text Datasets, all their data is exported to training, to pick and choose from. training_fraction_split (float): - Required. The fraction of the input data that is to be - used to train the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to train + the Model. This is ignored if Dataset is not provided. validation_fraction_split (float): - Required. The fraction of the input data that is to be - used to validate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to validate + the Model. This is ignored if Dataset is not provided. test_fraction_split (float): - Required. The fraction of the input data that is to be - used to evaluate the Model. This is ignored if Dataset is not provided. + Optional. The fraction of the input data that is to be used to evaluate + the Model. This is ignored if Dataset is not provided. + training_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to train the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + validation_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to validate the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. + test_filter_split (str): + Optional. A filter on DataItems of the Dataset. DataItems that match + this filter are used to test the Model. A filter with same syntax + as the one used in DatasetService.ListDataItems may be used. If a + single DataItem is matched by more than one of the FilterSplit filters, + then it is assigned to the first set that applies to it in the training, + validation, test order. This is ignored if Dataset is not provided. model_display_name (str): Optional. If the script produces a managed Vertex AI Model. The display name of the Model. The name can be up to 128 characters long and can be consist @@ -5409,7 +6112,9 @@ def _run( training_fraction_split=training_fraction_split, validation_fraction_split=validation_fraction_split, test_fraction_split=test_fraction_split, - predefined_split_column_name=None, + training_filter_split=training_filter_split, + validation_filter_split=validation_filter_split, + test_filter_split=test_filter_split, model=model, ) diff --git a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py index d699563327..8dc1f362ba 100644 --- a/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_forecasting_training_jobs.py @@ -103,14 +103,11 @@ _TEST_DATASET_NAME = "test-dataset-name" _TEST_MODEL_DISPLAY_NAME = "model-display-name" + _TEST_LABELS = {"key": "value"} _TEST_MODEL_LABELS = {"model_key": "model_value"} -_TEST_TRAINING_FRACTION_SPLIT = 0.8 -_TEST_VALIDATION_FRACTION_SPLIT = 0.1 -_TEST_TEST_FRACTION_SPLIT = 0.1 -_TEST_PREDEFINED_SPLIT_COLUMN_NAME = "split" -_TEST_OUTPUT_PYTHON_PACKAGE_PATH = "gs://test/ouput/python/trainer.tar.gz" +_TEST_PREDEFINED_SPLIT_COLUMN_NAME = "split" _TEST_MODEL_NAME = "projects/my-project/locations/us-central1/models/12345" @@ -261,18 +258,11 @@ def test_run_call_pipeline_service_create( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, predefined_split=gca_training_pipeline.PredefinedSplit( key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME ), @@ -348,19 +338,12 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - # Test that if defaults to the job display name true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_time_series.name, ) @@ -422,17 +405,10 @@ def test_run_call_pipeline_if_set_additional_experiments( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - # Test that if defaults to the job display name true_managed_model = gca_model.Model(display_name=_TEST_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_time_series.name, ) diff --git a/tests/unit/aiplatform/test_automl_image_training_jobs.py b/tests/unit/aiplatform/test_automl_image_training_jobs.py index a46f960b1c..7f092f12d1 100644 --- a/tests/unit/aiplatform/test_automl_image_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_image_training_jobs.py @@ -74,6 +74,10 @@ _TEST_FRACTION_SPLIT_VALIDATION = 0.2 _TEST_FRACTION_SPLIT_TEST = 0.2 +_TEST_FILTER_SPLIT_TRAINING = "train" +_TEST_FILTER_SPLIT_VALIDATION = "validate" +_TEST_FILTER_SPLIT_TEST = "test" + _TEST_MODEL_NAME = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/models/{_TEST_MODEL_ID}" ) @@ -159,6 +163,7 @@ def mock_model_service_get(): def mock_dataset_image(): ds = mock.MagicMock(datasets.ImageDataset) ds.name = _TEST_DATASET_NAME + ds.metadata_schema_uri = _TEST_METADATA_SCHEMA_URI_IMAGE ds._latest_future = None ds._exception = None ds._gca_resource = gca_dataset.Dataset( @@ -172,7 +177,7 @@ def mock_dataset_image(): @pytest.fixture -def mock_model_image(): +def mock_model(): model = mock.MagicMock(models.Model) model.name = _TEST_MODEL_ID model._latest_future = None @@ -193,7 +198,7 @@ def setup_method(self): def teardown_method(self): initializer.global_pool.shutdown(wait=True) - def test_init_all_parameters(self, mock_model_image): + def test_init_all_parameters(self, mock_model): """Ensure all private members are set correctly at initialization""" aiplatform.init(project=_TEST_PROJECT) @@ -202,7 +207,7 @@ def test_init_all_parameters(self, mock_model_image): display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_ICN, model_type=_TEST_MODEL_TYPE_MOBILE, - base_model=mock_model_image, + base_model=mock_model, multi_label=True, ) @@ -210,9 +215,9 @@ def test_init_all_parameters(self, mock_model_image): assert job._model_type == _TEST_MODEL_TYPE_MOBILE assert job._prediction_type == _TEST_PREDICTION_TYPE_ICN assert job._multi_label is True - assert job._base_model == mock_model_image + assert job._base_model == mock_model - def test_init_wrong_parameters(self, mock_model_image): + def test_init_wrong_parameters(self, mock_model): """Ensure correct exceptions are raised when initializing with invalid args""" aiplatform.init(project=_TEST_PROJECT) @@ -233,7 +238,7 @@ def test_init_wrong_parameters(self, mock_model_image): training_jobs.AutoMLImageTrainingJob( display_name=_TEST_DISPLAY_NAME, prediction_type=_TEST_PREDICTION_TYPE_IOD, - base_model=mock_model_image, + base_model=mock_model, ) @pytest.mark.parametrize("sync", [True, False]) @@ -243,7 +248,7 @@ def test_run_call_pipeline_service_create( mock_pipeline_service_get, mock_dataset_image, mock_model_service_get, - mock_model_image, + mock_model, sync, ): """Create and run an AutoML ICN training job, verify calls and return value""" @@ -254,18 +259,16 @@ def test_run_call_pipeline_service_create( ) job = training_jobs.AutoMLImageTrainingJob( - display_name=_TEST_DISPLAY_NAME, - base_model=mock_model_image, - labels=_TEST_LABELS, + display_name=_TEST_DISPLAY_NAME, base_model=mock_model, labels=_TEST_LABELS, ) model_from_job = job.run( dataset=mock_dataset_image, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, + test_filter_split=_TEST_FILTER_SPLIT_TEST, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, sync=sync, @@ -274,21 +277,21 @@ def test_run_call_pipeline_service_create( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction=_TEST_FRACTION_SPLIT_TEST, + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_FILTER_SPLIT_TRAINING, + validation_filter=_TEST_FILTER_SPLIT_VALIDATION, + test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, - labels=mock_model_image._gca_resource.labels, - description=mock_model_image._gca_resource.description, + labels=mock_model._gca_resource.labels, + description=mock_model._gca_resource.description, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_image.name, + filter_split=true_filter_split, dataset_id=mock_dataset_image.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( @@ -333,9 +336,6 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( model_from_job = job.run( dataset=mock_dataset_image, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, ) @@ -343,12 +343,6 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction=_TEST_FRACTION_SPLIT_TEST, - ) - # Test that if defaults to the job display name true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME, @@ -357,7 +351,7 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_image.name, + dataset_id=mock_dataset_image.name ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( @@ -398,13 +392,38 @@ def test_run_called_twice_raises(self, mock_dataset_image, sync): with pytest.raises(RuntimeError): job.run( + dataset=mock_dataset_image, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + sync=sync, + ) + + @pytest.mark.usefixtures( + "mock_pipeline_service_create", + "mock_pipeline_service_get", + "mock_model_service_get", + ) + @pytest.mark.parametrize("sync", [True, False]) + def test_run_with_two_split_raises( + self, mock_dataset_image, sync, + ): + aiplatform.init(project=_TEST_PROJECT) + + job = training_jobs.AutoMLImageTrainingJob(display_name=_TEST_DISPLAY_NAME,) + + with pytest.raises(ValueError): + model_from_job = job.run( dataset=mock_dataset_image, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, + test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) + if not sync: + model_from_job.wait() @pytest.mark.parametrize("sync", [True, False]) def test_run_raises_if_pipeline_fails( @@ -444,3 +463,226 @@ def test_raises_before_run_is_called(self, mock_pipeline_service_create): with pytest.raises(RuntimeError): job.state + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_fraction( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_image, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + job = training_jobs.AutoMLImageTrainingJob( + display_name=_TEST_DISPLAY_NAME, base_model=mock_model + ) + + model_from_job = job.run( + dataset=mock_dataset_image, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_fraction_split = gca_training_pipeline.FractionSplit( + training_fraction=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction=_TEST_FRACTION_SPLIT_TEST, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + fraction_split=true_fraction_split, dataset_id=mock_dataset_image.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_image_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_filter( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_image, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLImageTrainingJob( + display_name=_TEST_DISPLAY_NAME, base_model=mock_model + ) + + model_from_job = job.run( + dataset=mock_dataset_image, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, + test_filter_split=_TEST_FILTER_SPLIT_TEST, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_FILTER_SPLIT_TRAINING, + validation_filter=_TEST_FILTER_SPLIT_VALIDATION, + test_filter=_TEST_FILTER_SPLIT_TEST, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + filter_split=true_filter_split, dataset_id=mock_dataset_image.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_image_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_default( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_image, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLImageTrainingJob( + display_name=_TEST_DISPLAY_NAME, base_model=mock_model + ) + + model_from_job = job.run( + dataset=mock_dataset_image, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + dataset_id=mock_dataset_image.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_image_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_WITH_BASE_MODEL, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + def test_splits_filter_incomplete( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_image, + mock_model_service_get, + mock_model, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLImageTrainingJob( + display_name=_TEST_DISPLAY_NAME, base_model=mock_model + ) + + with pytest.raises(ValueError): + job.run( + dataset=mock_dataset_image, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + validation_fraction_split=None, + test_filter_split=_TEST_FILTER_SPLIT_TEST, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + ) diff --git a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py index 2c380206e4..41614b738f 100644 --- a/tests/unit/aiplatform/test_automl_tabular_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_tabular_training_jobs.py @@ -140,10 +140,12 @@ _TEST_LABELS = {"key": "value"} _TEST_MODEL_LABELS = {"model_key": "model_value"} -_TEST_TRAINING_FRACTION_SPLIT = 0.6 -_TEST_VALIDATION_FRACTION_SPLIT = 0.2 -_TEST_TEST_FRACTION_SPLIT = 0.2 -_TEST_PREDEFINED_SPLIT_COLUMN_NAME = "split" +_TEST_FRACTION_SPLIT_TRAINING = 0.6 +_TEST_FRACTION_SPLIT_VALIDATION = 0.2 +_TEST_FRACTION_SPLIT_TEST = 0.2 + +_TEST_SPLIT_PREDEFINED_COLUMN_NAME = "split" +_TEST_SPLIT_TIMESTAMP_COLUMN_NAME = "timestamp" _TEST_OUTPUT_PYTHON_PACKAGE_PATH = "gs://test/ouput/python/trainer.tar.gz" @@ -325,10 +327,6 @@ def test_run_call_pipeline_service_create( target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, @@ -342,12 +340,6 @@ def test_run_call_pipeline_service_create( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, labels=_TEST_MODEL_LABELS, @@ -355,10 +347,6 @@ def test_run_call_pipeline_service_create( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), dataset_id=mock_dataset_tabular.name, ) @@ -417,10 +405,6 @@ def test_run_call_pipeline_service_create_with_export_eval_data_items( dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, @@ -437,22 +421,12 @@ def test_run_call_pipeline_service_create_with_export_eval_data_items( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), dataset_id=mock_dataset_tabular.name, ) @@ -508,9 +482,6 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( model_from_job = job.run( dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, @@ -523,12 +494,6 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - # Test that if defaults to the job display name true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME, @@ -537,7 +502,7 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_tabular.name, + dataset_id=mock_dataset_tabular.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( @@ -584,10 +549,6 @@ def test_run_call_pipeline_service_create_if_no_column_transformations( dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, @@ -601,22 +562,12 @@ def test_run_call_pipeline_service_create_if_no_column_transformations( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), dataset_id=mock_dataset_tabular.name, ) @@ -665,10 +616,6 @@ def test_run_call_pipeline_service_create_if_set_additional_experiments( dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, @@ -682,22 +629,12 @@ def test_run_call_pipeline_service_create_if_set_additional_experiments( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), dataset_id=mock_dataset_tabular.name, ) @@ -746,10 +683,6 @@ def test_run_call_pipeline_service_create_with_column_specs( dataset=mock_dataset_tabular_alternative, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, @@ -759,19 +692,9 @@ def test_run_call_pipeline_service_create_with_column_specs( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - true_managed_model = gca_model.Model(display_name=_TEST_MODEL_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), dataset_id=mock_dataset_tabular_alternative.name, ) @@ -858,10 +781,6 @@ def test_run_call_pipeline_service_create_with_column_specs_not_auto( dataset=mock_dataset_tabular_alternative, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, weight_column=_TEST_TRAINING_WEIGHT_COLUMN, budget_milli_node_hours=_TEST_TRAINING_BUDGET_MILLI_NODE_HOURS, disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, @@ -871,19 +790,9 @@ def test_run_call_pipeline_service_create_with_column_specs_not_auto( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - true_managed_model = gca_model.Model(display_name=_TEST_MODEL_DISPLAY_NAME) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), dataset_id=mock_dataset_tabular_alternative.name, ) @@ -923,9 +832,6 @@ def test_run_called_twice_raises(self, mock_dataset_tabular, sync): dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -938,9 +844,6 @@ def test_run_called_twice_raises(self, mock_dataset_tabular, sync): dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -965,9 +868,6 @@ def test_run_raises_if_pipeline_fails( model_display_name=_TEST_MODEL_DISPLAY_NAME, dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -996,9 +896,6 @@ def test_wait_for_resource_creation_does_not_fail_if_creation_does_not_fail( model_display_name=_TEST_MODEL_DISPLAY_NAME, dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=False, ) @@ -1033,9 +930,6 @@ def test_create_fails(self, mock_dataset_tabular, sync): model_display_name=_TEST_MODEL_DISPLAY_NAME, dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) assert e.match("Mock fail") @@ -1065,9 +959,6 @@ def test_create_fails(self, mock_dataset_tabular, sync): model_display_name=_TEST_MODEL_DISPLAY_NAME, dataset=mock_dataset_tabular, target_column=_TEST_TRAINING_TARGET_COLUMN, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -1163,3 +1054,280 @@ def test_properties_throw_if_not_available(self): assert e.match( regexp=r"AutoMLTabularTrainingJob resource has not been created" ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_fraction( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_tabular, + mock_model_service_get, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTabularTrainingJob( + display_name=_TEST_DISPLAY_NAME, + optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, + optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, + column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + optimization_objective_recall_value=None, + optimization_objective_precision_value=None, + ) + + model_from_job = job.run( + dataset=mock_dataset_tabular, + target_column=_TEST_TRAINING_TARGET_COLUMN, + weight_column=_TEST_TRAINING_WEIGHT_COLUMN, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_fraction_split = gca_training_pipeline.FractionSplit( + training_fraction=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction=_TEST_FRACTION_SPLIT_TEST, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + fraction_split=true_fraction_split, dataset_id=mock_dataset_tabular.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_tabular, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_timestamp( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_tabular, + mock_model_service_get, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTabularTrainingJob( + display_name=_TEST_DISPLAY_NAME, + optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, + optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, + column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + optimization_objective_recall_value=None, + optimization_objective_precision_value=None, + ) + + model_from_job = job.run( + dataset=mock_dataset_tabular, + target_column=_TEST_TRAINING_TARGET_COLUMN, + weight_column=_TEST_TRAINING_WEIGHT_COLUMN, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + timestamp_split_column_name=_TEST_SPLIT_TIMESTAMP_COLUMN_NAME, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_split = gca_training_pipeline.TimestampSplit( + training_fraction=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction=_TEST_FRACTION_SPLIT_TEST, + key=_TEST_SPLIT_TIMESTAMP_COLUMN_NAME, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + timestamp_split=true_split, dataset_id=mock_dataset_tabular.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_tabular, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_predefined( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_tabular, + mock_model_service_get, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTabularTrainingJob( + display_name=_TEST_DISPLAY_NAME, + optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, + optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, + column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + optimization_objective_recall_value=None, + optimization_objective_precision_value=None, + ) + + model_from_job = job.run( + dataset=mock_dataset_tabular, + target_column=_TEST_TRAINING_TARGET_COLUMN, + weight_column=_TEST_TRAINING_WEIGHT_COLUMN, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + predefined_split_column_name=_TEST_SPLIT_PREDEFINED_COLUMN_NAME, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_split = gca_training_pipeline.PredefinedSplit( + key=_TEST_SPLIT_PREDEFINED_COLUMN_NAME + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + predefined_split=true_split, dataset_id=mock_dataset_tabular.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_tabular, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_default( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_tabular, + mock_model_service_get, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTabularTrainingJob( + display_name=_TEST_DISPLAY_NAME, + optimization_prediction_type=_TEST_TRAINING_OPTIMIZATION_PREDICTION_TYPE, + optimization_objective=_TEST_TRAINING_OPTIMIZATION_OBJECTIVE_NAME, + column_transformations=_TEST_TRAINING_COLUMN_TRANSFORMATIONS, + optimization_objective_recall_value=None, + optimization_objective_precision_value=None, + ) + + model_from_job = job.run( + dataset=mock_dataset_tabular, + target_column=_TEST_TRAINING_TARGET_COLUMN, + weight_column=_TEST_TRAINING_WEIGHT_COLUMN, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + disable_early_stopping=_TEST_TRAINING_DISABLE_EARLY_STOPPING, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + dataset_id=mock_dataset_tabular.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_tabular, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) diff --git a/tests/unit/aiplatform/test_automl_text_training_jobs.py b/tests/unit/aiplatform/test_automl_text_training_jobs.py index 583789c00e..20220a1247 100644 --- a/tests/unit/aiplatform/test_automl_text_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_text_training_jobs.py @@ -59,6 +59,10 @@ _TEST_FRACTION_SPLIT_TRAINING = 0.6 _TEST_FRACTION_SPLIT_VALIDATION = 0.2 _TEST_FRACTION_SPLIT_TEST = 0.2 +_TEST_FILTER_SPLIT_TRAINING = "train" +_TEST_FILTER_SPLIT_VALIDATION = "validate" +_TEST_FILTER_SPLIT_TEST = "test" +_TEST_PREDEFINED_SPLIT_COLUMN_NAME = "predefined_column" _TEST_MODEL_NAME = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/models/{_TEST_MODEL_ID}" @@ -145,6 +149,7 @@ def mock_model_service_get(): def mock_dataset_text(): ds = mock.MagicMock(datasets.TextDataset) ds.name = _TEST_DATASET_NAME + ds.metadata_schema_uri = _TEST_METADATA_SCHEMA_URI_TEXT ds._latest_future = None ds._exception = None ds._gca_resource = gca_dataset.Dataset( @@ -270,28 +275,19 @@ def test_init_aiplatform_with_encryption_key_name_and_create_training_job( model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, sync=sync, ) if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction=_TEST_FRACTION_SPLIT_TEST, - ) - true_managed_model = gca_model.Model( display_name=_TEST_MODEL_DISPLAY_NAME, encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, + dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( @@ -334,19 +330,19 @@ def test_run_call_pipeline_service_create_classification( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, + test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction=_TEST_FRACTION_SPLIT_TEST, + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_FILTER_SPLIT_TRAINING, + validation_filter=_TEST_FILTER_SPLIT_VALIDATION, + test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( @@ -356,7 +352,7 @@ def test_run_call_pipeline_service_create_classification( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, + filter_split=true_filter_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( @@ -472,19 +468,19 @@ def test_run_call_pipeline_service_create_sentiment( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, + test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction=_TEST_FRACTION_SPLIT_TEST, + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_FILTER_SPLIT_TRAINING, + validation_filter=_TEST_FILTER_SPLIT_VALIDATION, + test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( @@ -492,7 +488,7 @@ def test_run_call_pipeline_service_create_sentiment( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, + filter_split=true_filter_split, dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( @@ -537,9 +533,6 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( model_from_job = job.run( dataset=mock_dataset_text, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, model_display_name=None, # Omit model_display_name sync=sync, ) @@ -547,19 +540,13 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_FRACTION_SPLIT_TRAINING, - validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, - test_fraction=_TEST_FRACTION_SPLIT_TEST, - ) - # Test that if defaults to the job display name true_managed_model = gca_model.Model( display_name=_TEST_DISPLAY_NAME, labels=_TEST_LABELS, ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, + dataset_id=mock_dataset_text.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( @@ -602,13 +589,42 @@ def test_run_called_twice_raises(self, mock_dataset_text, sync): with pytest.raises(RuntimeError): job.run( + dataset=mock_dataset_text, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + sync=sync, + ) + + @pytest.mark.usefixtures( + "mock_pipeline_service_create", + "mock_pipeline_service_get", + "mock_model_service_get", + ) + @pytest.mark.parametrize("sync", [True, False]) + def test_run_with_two_split_raises( + self, mock_dataset_text, sync, + ): + aiplatform.init(project=_TEST_PROJECT) + + job = training_jobs.AutoMLTextTrainingJob( + display_name=_TEST_DISPLAY_NAME, + prediction_type="classification", + multi_label=True, + ) + + with pytest.raises(ValueError): + model_from_job = job.run( dataset=mock_dataset_text, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, + test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) + if not sync: + model_from_job.wait() @pytest.mark.parametrize("sync", [True, False]) def test_run_raises_if_pipeline_fails( @@ -638,3 +654,198 @@ def test_run_raises_if_pipeline_fails( with pytest.raises(RuntimeError): job.get_model() + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_fraction( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_text, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTextTrainingJob( + display_name=_TEST_DISPLAY_NAME, + prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, + multi_label=_TEST_CLASSIFICATION_MULTILABEL, + ) + + model_from_job = job.run( + dataset=mock_dataset_text, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction_split=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_fraction_split = gca_training_pipeline.FractionSplit( + training_fraction=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction=_TEST_FRACTION_SPLIT_TEST, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + fraction_split=true_fraction_split, dataset_id=mock_dataset_text.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_text_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_filter( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_text, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTextTrainingJob( + display_name=_TEST_DISPLAY_NAME, + prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, + multi_label=_TEST_CLASSIFICATION_MULTILABEL, + ) + + model_from_job = job.run( + dataset=mock_dataset_text, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + validation_filter_split=_TEST_FILTER_SPLIT_VALIDATION, + test_filter_split=_TEST_FILTER_SPLIT_TEST, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_FILTER_SPLIT_TRAINING, + validation_filter=_TEST_FILTER_SPLIT_VALIDATION, + test_filter=_TEST_FILTER_SPLIT_TEST, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + filter_split=true_filter_split, dataset_id=mock_dataset_text.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_text_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_default( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_text, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLTextTrainingJob( + display_name=_TEST_DISPLAY_NAME, + prediction_type=_TEST_PREDICTION_TYPE_CLASSIFICATION, + multi_label=_TEST_CLASSIFICATION_MULTILABEL, + ) + + model_from_job = job.run( + dataset=mock_dataset_text, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + dataset_id=mock_dataset_text.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_text_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS_CLASSIFICATION, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) diff --git a/tests/unit/aiplatform/test_automl_video_training_jobs.py b/tests/unit/aiplatform/test_automl_video_training_jobs.py index fc7d6f38e3..7326050ae4 100644 --- a/tests/unit/aiplatform/test_automl_video_training_jobs.py +++ b/tests/unit/aiplatform/test_automl_video_training_jobs.py @@ -54,7 +54,13 @@ ) _TEST_FRACTION_SPLIT_TRAINING = 0.8 +_TEST_FRACTION_SPLIT_VALIDATION = 0.0 _TEST_FRACTION_SPLIT_TEST = 0.2 +_TEST_ALTERNATE_FRACTION_SPLIT_TRAINING = 0.7 +_TEST_ALTERNATE_FRACTION_SPLIT_TEST = 0.3 +_TEST_FILTER_SPLIT_TRAINING = "train" +_TEST_FILTER_SPLIT_VALIDATION = "-" +_TEST_FILTER_SPLIT_TEST = "test" _TEST_MODEL_NAME = ( f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/models/{_TEST_MODEL_ID}" @@ -141,6 +147,7 @@ def mock_model_service_get(): def mock_dataset_video(): ds = mock.MagicMock(datasets.VideoDataset) ds.name = _TEST_DATASET_NAME + ds.metadata_schema_uri = _TEST_METADATA_SCHEMA_URI_VIDEO ds._latest_future = None ds._exception = None ds._gca_resource = gca_dataset.Dataset( @@ -231,6 +238,72 @@ def test_init_aiplatform_with_encryption_key_name_and_create_training_job( model_type=_TEST_MODEL_TYPE_CLOUD, ) + model_from_job = job.run( + dataset=mock_dataset_video, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + dataset_id=mock_dataset_video.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_video_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + mock_model_service_get.assert_called_once_with(name=_TEST_MODEL_NAME) + assert job._gca_resource is mock_pipeline_service_get.return_value + assert model_from_job._gca_resource is mock_model_service_get.return_value + assert job.get_model()._gca_resource is mock_model_service_get.return_value + assert not job.has_failed + assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_fraction( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_video, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLVideoTrainingJob( + display_name=_TEST_DISPLAY_NAME, + prediction_type=_TEST_PREDICTION_TYPE_VCN, + model_type=_TEST_MODEL_TYPE_CLOUD, + ) + model_from_job = job.run( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, @@ -244,6 +317,7 @@ def test_init_aiplatform_with_encryption_key_name_and_create_training_job( true_fraction_split = gca_training_pipeline.FractionSplit( training_fraction=_TEST_FRACTION_SPLIT_TRAINING, + validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, test_fraction=_TEST_FRACTION_SPLIT_TEST, ) @@ -271,12 +345,131 @@ def test_init_aiplatform_with_encryption_key_name_and_create_training_job( training_pipeline=true_training_pipeline, ) - mock_model_service_get.assert_called_once_with(name=_TEST_MODEL_NAME) - assert job._gca_resource is mock_pipeline_service_get.return_value - assert model_from_job._gca_resource is mock_model_service_get.return_value - assert job.get_model()._gca_resource is mock_model_service_get.return_value - assert not job.has_failed - assert job.state == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_filter( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_video, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLVideoTrainingJob( + display_name=_TEST_DISPLAY_NAME, + prediction_type=_TEST_PREDICTION_TYPE_VCN, + model_type=_TEST_MODEL_TYPE_CLOUD, + ) + + model_from_job = job.run( + dataset=mock_dataset_video, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + test_filter_split=_TEST_FILTER_SPLIT_TEST, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_FILTER_SPLIT_TRAINING, + validation_filter=_TEST_FILTER_SPLIT_VALIDATION, + test_filter=_TEST_FILTER_SPLIT_TEST, + ) + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + filter_split=true_filter_split, dataset_id=mock_dataset_video.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_video_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_splits_default( + self, + mock_pipeline_service_create, + mock_pipeline_service_get, + mock_dataset_video, + mock_model_service_get, + mock_model, + sync, + ): + """ + Initiate aiplatform with encryption key name. + Create and run an AutoML Video Classification training job, verify calls and return value + """ + + aiplatform.init( + project=_TEST_PROJECT, + encryption_spec_key_name=_TEST_DEFAULT_ENCRYPTION_KEY_NAME, + ) + + job = training_jobs.AutoMLVideoTrainingJob( + display_name=_TEST_DISPLAY_NAME, + prediction_type=_TEST_PREDICTION_TYPE_VCN, + model_type=_TEST_MODEL_TYPE_CLOUD, + ) + + model_from_job = job.run( + dataset=mock_dataset_video, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + sync=sync, + ) + + if not sync: + model_from_job.wait() + + true_managed_model = gca_model.Model( + display_name=_TEST_MODEL_DISPLAY_NAME, + description=mock_model._gca_resource.description, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + true_input_data_config = gca_training_pipeline.InputDataConfig( + dataset_id=mock_dataset_video.name, + ) + + true_training_pipeline = gca_training_pipeline.TrainingPipeline( + display_name=_TEST_DISPLAY_NAME, + training_task_definition=schema.training_job.definition.automl_video_classification, + training_task_inputs=_TEST_TRAINING_TASK_INPUTS, + model_to_upload=true_managed_model, + input_data_config=true_input_data_config, + encryption_spec=_TEST_DEFAULT_ENCRYPTION_SPEC, + ) + + mock_pipeline_service_create.assert_called_once_with( + parent=initializer.global_config.common_location_path(), + training_pipeline=true_training_pipeline, + ) @pytest.mark.parametrize("sync", [True, False]) def test_run_call_pipeline_service_create( @@ -305,17 +498,18 @@ def test_run_call_pipeline_service_create( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + training_filter_split=_TEST_FILTER_SPLIT_TRAINING, + test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) if not sync: model_from_job.wait() - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_FRACTION_SPLIT_TRAINING, - test_fraction=_TEST_FRACTION_SPLIT_TEST, + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_FILTER_SPLIT_TRAINING, + validation_filter=_TEST_FILTER_SPLIT_VALIDATION, + test_filter=_TEST_FILTER_SPLIT_TEST, ) true_managed_model = gca_model.Model( @@ -326,7 +520,7 @@ def test_run_call_pipeline_service_create( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_dataset_video.name, + filter_split=true_filter_split, dataset_id=mock_dataset_video.name, ) true_training_pipeline = gca_training_pipeline.TrainingPipeline( @@ -371,16 +565,17 @@ def test_run_call_pipeline_if_no_model_display_name_nor_model_labels( model_from_job = job.run( dataset=mock_dataset_video, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + training_fraction_split=_TEST_ALTERNATE_FRACTION_SPLIT_TRAINING, + test_fraction_split=_TEST_ALTERNATE_FRACTION_SPLIT_TEST, ) if not sync: model_from_job.wait() true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_FRACTION_SPLIT_TRAINING, - test_fraction=_TEST_FRACTION_SPLIT_TEST, + training_fraction=_TEST_ALTERNATE_FRACTION_SPLIT_TRAINING, + validation_fraction=_TEST_FRACTION_SPLIT_VALIDATION, + test_fraction=_TEST_ALTERNATE_FRACTION_SPLIT_TEST, ) # Test that if defaults to the job display name @@ -422,19 +617,41 @@ def test_run_called_twice_raises( job.run( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, - test_fraction_split=_TEST_FRACTION_SPLIT_TEST, sync=sync, ) with pytest.raises(RuntimeError): job.run( + dataset=mock_dataset_video, + model_display_name=_TEST_MODEL_DISPLAY_NAME, + sync=sync, + ) + + @pytest.mark.usefixtures( + "mock_pipeline_service_create", + "mock_pipeline_service_get", + "mock_model_service_get", + ) + @pytest.mark.parametrize("sync", [True, False]) + def test_run_with_two_split_raises( + self, mock_dataset_video, sync, + ): + aiplatform.init(project=_TEST_PROJECT) + + job = training_jobs.AutoMLVideoTrainingJob(display_name=_TEST_DISPLAY_NAME,) + + with pytest.raises(ValueError): + model_from_job = job.run( dataset=mock_dataset_video, model_display_name=_TEST_MODEL_DISPLAY_NAME, training_fraction_split=_TEST_FRACTION_SPLIT_TRAINING, test_fraction_split=_TEST_FRACTION_SPLIT_TEST, + training_filter_split=_TEST_FILTER_SPLIT_TEST, + test_filter_split=_TEST_FILTER_SPLIT_TEST, sync=sync, ) + if not sync: + model_from_job.wait() @pytest.mark.parametrize("sync", [True, False]) def test_run_raises_if_pipeline_fails( diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py index 3e694e6a1e..0fd781b380 100644 --- a/tests/unit/aiplatform/test_training_jobs.py +++ b/tests/unit/aiplatform/test_training_jobs.py @@ -103,13 +103,14 @@ _TEST_LABELS = {"key": "value"} _TEST_MODEL_LABELS = {"model_key": "model_value"} -_TEST_DEFAULT_TRAINING_FRACTION_SPLIT = 0.8 -_TEST_DEFAULT_VALIDATION_FRACTION_SPLIT = 0.1 -_TEST_DEFAULT_TEST_FRACTION_SPLIT = 0.1 _TEST_TRAINING_FRACTION_SPLIT = 0.6 _TEST_VALIDATION_FRACTION_SPLIT = 0.2 _TEST_TEST_FRACTION_SPLIT = 0.2 +_TEST_TRAINING_FILTER_SPLIT = "train" +_TEST_VALIDATION_FILTER_SPLIT = "validate" +_TEST_TEST_FILTER_SPLIT = "test" _TEST_PREDEFINED_SPLIT_COLUMN_NAME = "split" +_TEST_TIMESTAMP_SPLIT_COLUMN_NAME = "timestamp" _TEST_PROJECT = "test-project" _TEST_LOCATION = "us-central1" @@ -579,6 +580,7 @@ def mock_python_package_to_gcs(): def mock_tabular_dataset(): ds = mock.MagicMock(datasets.TabularDataset) ds.name = _TEST_DATASET_NAME + ds.metadata_schema_uri = _TEST_METADATA_SCHEMA_URI_TABULAR ds._latest_future = None ds._exception = None ds._gca_resource = gca_dataset.Dataset( @@ -595,6 +597,7 @@ def mock_tabular_dataset(): def mock_nontabular_dataset(): ds = mock.MagicMock(datasets.ImageDataset) ds.name = _TEST_DATASET_NAME + ds.metadata_schema_uri = _TEST_METADATA_SCHEMA_URI_NONTABULAR ds._latest_future = None ds._exception = None ds._gca_resource = gca_dataset.Dataset( @@ -668,7 +671,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, + timestamp_split_column_name=_TEST_TIMESTAMP_SPLIT_COLUMN_NAME, tensorboard=_TEST_TENSORBOARD_RESOURCE_NAME, sync=sync, ) @@ -708,10 +711,11 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( }, } - true_fraction_split = gca_training_pipeline.FractionSplit( + true_timestamp_split = gca_training_pipeline.TimestampSplit( training_fraction=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction=_TEST_TEST_FRACTION_SPLIT, + key=_TEST_TIMESTAMP_SPLIT_COLUMN_NAME, ) env = [ @@ -748,10 +752,7 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), + timestamp_split=true_timestamp_split, dataset_id=mock_tabular_dataset.name, gcs_destination=gca_io.GcsDestination( output_uri_prefix=_TEST_BASE_OUTPUT_DIR @@ -843,9 +844,6 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, sync=sync, ) @@ -879,12 +877,6 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( }, } - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - env = [ gca_env_var.EnvVar(name=str(key), value=str(value)) for key, value in _TEST_MODEL_SERVING_CONTAINER_ENVIRONMENT_VARIABLES.items() @@ -918,7 +910,6 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, predefined_split=gca_training_pipeline.PredefinedSplit( key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME ), @@ -1049,6 +1040,34 @@ def test_run_with_invalid_accelerator_type_raises( accelerator_type=_TEST_INVALID_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + sync=sync, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_run_with_two_splits_raises( + self, + mock_pipeline_service_create, + mock_python_package_to_gcs, + mock_tabular_dataset, + mock_model_service_get, + sync, + ): + aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) + + job = training_jobs.CustomTrainingJob( + display_name=_TEST_DISPLAY_NAME, + script_path=_TEST_LOCAL_SCRIPT_FILE_NAME, + container_uri=_TEST_TRAINING_CONTAINER_IMAGE, + ) + + with pytest.raises(ValueError): + job.run( + dataset=mock_tabular_dataset, + replica_count=1, + machine_type=_TEST_MACHINE_TYPE, + accelerator_type=_TEST_INVALID_ACCELERATOR_TYPE, + accelerator_count=_TEST_ACCELERATOR_COUNT, + predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, @@ -1123,6 +1142,9 @@ def test_run_call_pipeline_service_create_with_no_dataset( training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, + training_filter_split=_TEST_TRAINING_FILTER_SPLIT, + validation_filter_split=_TEST_VALIDATION_FILTER_SPLIT, + test_filter_split=_TEST_TEST_FILTER_SPLIT, sync=sync, ) @@ -1379,9 +1401,6 @@ def test_run_call_pipeline_service_create_distributed_training( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -1441,12 +1460,6 @@ def test_run_call_pipeline_service_create_distributed_training( }, ] - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - true_container_spec = gca_model.ModelContainerSpec( image_uri=_TEST_SERVING_CONTAINER_IMAGE, predict_route=_TEST_SERVING_CONTAINER_PREDICTION_ROUTE, @@ -1464,7 +1477,6 @@ def test_run_call_pipeline_service_create_distributed_training( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, dataset_id=mock_tabular_dataset.name, gcs_destination=gca_io.GcsDestination( output_uri_prefix=_TEST_BASE_OUTPUT_DIR @@ -1660,6 +1672,9 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_ machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, + training_filter_split=_TEST_TRAINING_FILTER_SPLIT, + validation_filter_split=_TEST_VALIDATION_FILTER_SPLIT, + test_filter_split=_TEST_TEST_FILTER_SPLIT, sync=sync, ) @@ -1693,10 +1708,10 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_ }, } - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_DEFAULT_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_DEFAULT_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_DEFAULT_TEST_FRACTION_SPLIT, + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_TRAINING_FILTER_SPLIT, + validation_filter=_TEST_VALIDATION_FILTER_SPLIT, + test_filter=_TEST_TEST_FILTER_SPLIT, ) env = [ @@ -1732,7 +1747,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_ ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, + filter_split=true_filter_split, dataset_id=mock_nontabular_dataset.name, annotation_schema_uri=_TEST_ANNOTATION_SCHEMA_URI, gcs_destination=gca_io.GcsDestination( @@ -1909,9 +1924,6 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, service_account=_TEST_SERVICE_ACCOUNT, tensorboard=_TEST_TENSORBOARD_RESOURCE_NAME, @@ -1946,12 +1958,6 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( }, } - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - env = [ gca_env_var.EnvVar(name=str(key), value=str(value)) for key, value in _TEST_MODEL_SERVING_CONTAINER_ENVIRONMENT_VARIABLES.items() @@ -1986,7 +1992,6 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, predefined_split=gca_training_pipeline.PredefinedSplit( key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME ), @@ -2079,7 +2084,7 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, + timestamp_split_column_name=_TEST_TIMESTAMP_SPLIT_COLUMN_NAME, sync=sync, ) @@ -2106,10 +2111,11 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( }, } - true_fraction_split = gca_training_pipeline.FractionSplit( + true_timestamp_split = gca_training_pipeline.TimestampSplit( training_fraction=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction=_TEST_TEST_FRACTION_SPLIT, + key=_TEST_TIMESTAMP_SPLIT_COLUMN_NAME, ) env = [ @@ -2145,10 +2151,7 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), + timestamp_split=true_timestamp_split, dataset_id=mock_tabular_dataset.name, bigquery_destination=gca_io.BigQueryDestination( output_uri=_TEST_BIGQUERY_DESTINATION @@ -2276,6 +2279,33 @@ def test_run_with_invalid_accelerator_type_raises( accelerator_type=_TEST_INVALID_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, + sync=sync, + ) + + @pytest.mark.parametrize("sync", [True, False]) + def test_run_with_two_split_raises( + self, + mock_pipeline_service_create, + mock_python_package_to_gcs, + mock_tabular_dataset, + mock_model_service_get, + sync, + ): + aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) + + job = training_jobs.CustomContainerTrainingJob( + display_name=_TEST_DISPLAY_NAME, + container_uri=_TEST_TRAINING_CONTAINER_IMAGE, + ) + + with pytest.raises(ValueError): + job.run( + dataset=mock_tabular_dataset, + replica_count=1, + machine_type=_TEST_MACHINE_TYPE, + accelerator_type=_TEST_INVALID_ACCELERATOR_TYPE, + accelerator_count=_TEST_ACCELERATOR_COUNT, + predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, @@ -2432,9 +2462,6 @@ def test_run_returns_none_if_no_model_to_upload( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -2734,6 +2761,9 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, model_labels=_TEST_MODEL_LABELS, + training_filter_split=_TEST_TRAINING_FILTER_SPLIT, + validation_filter_split=_TEST_VALIDATION_FILTER_SPLIT, + test_filter_split=_TEST_TEST_FILTER_SPLIT, sync=sync, ) @@ -2760,10 +2790,10 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( }, } - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_DEFAULT_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_DEFAULT_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_DEFAULT_TEST_FRACTION_SPLIT, + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_TRAINING_FILTER_SPLIT, + validation_filter=_TEST_VALIDATION_FILTER_SPLIT, + test_filter=_TEST_TEST_FILTER_SPLIT, ) env = [ @@ -2799,7 +2829,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, + filter_split=true_filter_split, dataset_id=mock_nontabular_dataset.name, annotation_schema_uri=_TEST_ANNOTATION_SCHEMA_URI, gcs_destination=gca_io.GcsDestination( @@ -3257,7 +3287,6 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, sync=sync, ) @@ -3331,9 +3360,6 @@ def test_run_call_pipeline_service_create_with_tabular_dataset( true_input_data_config = gca_training_pipeline.InputDataConfig( fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), dataset_id=mock_tabular_dataset.name, gcs_destination=gca_io.GcsDestination( output_uri_prefix=_TEST_BASE_OUTPUT_DIR @@ -3421,9 +3447,6 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, sync=sync, ) @@ -3452,12 +3475,6 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis }, } - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_TEST_FRACTION_SPLIT, - ) - env = [ gca_env_var.EnvVar(name=str(key), value=str(value)) for key, value in _TEST_MODEL_SERVING_CONTAINER_ENVIRONMENT_VARIABLES.items() @@ -3492,7 +3509,6 @@ def test_run_call_pipeline_service_create_with_tabular_dataset_without_model_dis ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, predefined_split=gca_training_pipeline.PredefinedSplit( key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME ), @@ -3582,7 +3598,7 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction_split=_TEST_TEST_FRACTION_SPLIT, - predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, + timestamp_split_column_name=_TEST_TIMESTAMP_SPLIT_COLUMN_NAME, sync=sync, ) @@ -3610,10 +3626,11 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( }, } - true_fraction_split = gca_training_pipeline.FractionSplit( + true_timestamp_split = gca_training_pipeline.TimestampSplit( training_fraction=_TEST_TRAINING_FRACTION_SPLIT, validation_fraction=_TEST_VALIDATION_FRACTION_SPLIT, test_fraction=_TEST_TEST_FRACTION_SPLIT, + key=_TEST_TIMESTAMP_SPLIT_COLUMN_NAME, ) env = [ @@ -3649,10 +3666,7 @@ def test_run_call_pipeline_service_create_with_bigquery_destination( ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, - predefined_split=gca_training_pipeline.PredefinedSplit( - key=_TEST_PREDEFINED_SPLIT_COLUMN_NAME - ), + timestamp_split=true_timestamp_split, dataset_id=mock_tabular_dataset.name, bigquery_destination=gca_io.BigQueryDestination( output_uri=_TEST_BIGQUERY_DESTINATION @@ -3726,9 +3740,6 @@ def test_run_called_twice_raises( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -3742,9 +3753,6 @@ def test_run_called_twice_raises( accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, model_display_name=_TEST_MODEL_DISPLAY_NAME, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -3788,6 +3796,38 @@ def test_run_with_invalid_accelerator_type_raises( sync=sync, ) + @pytest.mark.parametrize("sync", [True, False]) + def test_run_with_two_split_raises( + self, + mock_pipeline_service_create, + mock_python_package_to_gcs, + mock_tabular_dataset, + mock_model_service_get, + sync, + ): + aiplatform.init(project=_TEST_PROJECT, staging_bucket=_TEST_BUCKET_NAME) + + job = training_jobs.CustomPythonPackageTrainingJob( + display_name=_TEST_DISPLAY_NAME, + python_package_gcs_uri=_TEST_OUTPUT_PYTHON_PACKAGE_PATH, + python_module_name=_TEST_PYTHON_MODULE_NAME, + container_uri=_TEST_TRAINING_CONTAINER_IMAGE, + ) + + with pytest.raises(ValueError): + job.run( + dataset=mock_tabular_dataset, + replica_count=1, + machine_type=_TEST_MACHINE_TYPE, + accelerator_type=_TEST_INVALID_ACCELERATOR_TYPE, + accelerator_count=_TEST_ACCELERATOR_COUNT, + predefined_split_column_name=_TEST_PREDEFINED_SPLIT_COLUMN_NAME, + training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, + validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, + test_fraction_split=_TEST_TEST_FRACTION_SPLIT, + sync=sync, + ) + @pytest.mark.parametrize("sync", [True, False]) def test_run_with_incomplete_model_info_raises_with_model_to_upload( self, @@ -4013,9 +4053,6 @@ def test_run_raises_if_pipeline_fails( machine_type=_TEST_MACHINE_TYPE, accelerator_type=_TEST_ACCELERATOR_TYPE, accelerator_count=_TEST_ACCELERATOR_COUNT, - training_fraction_split=_TEST_TRAINING_FRACTION_SPLIT, - validation_fraction_split=_TEST_VALIDATION_FRACTION_SPLIT, - test_fraction_split=_TEST_TEST_FRACTION_SPLIT, sync=sync, ) @@ -4250,6 +4287,9 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_ accelerator_count=_TEST_ACCELERATOR_COUNT, service_account=_TEST_SERVICE_ACCOUNT, tensorboard=_TEST_TENSORBOARD_RESOURCE_NAME, + training_filter_split=_TEST_TRAINING_FILTER_SPLIT, + validation_filter_split=_TEST_VALIDATION_FILTER_SPLIT, + test_filter_split=_TEST_TEST_FILTER_SPLIT, sync=sync, ) @@ -4277,10 +4317,10 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_ }, } - true_fraction_split = gca_training_pipeline.FractionSplit( - training_fraction=_TEST_DEFAULT_TRAINING_FRACTION_SPLIT, - validation_fraction=_TEST_DEFAULT_VALIDATION_FRACTION_SPLIT, - test_fraction=_TEST_DEFAULT_TEST_FRACTION_SPLIT, + true_filter_split = gca_training_pipeline.FilterSplit( + training_filter=_TEST_TRAINING_FILTER_SPLIT, + validation_filter=_TEST_VALIDATION_FILTER_SPLIT, + test_filter=_TEST_TEST_FILTER_SPLIT, ) env = [ @@ -4316,7 +4356,7 @@ def test_run_call_pipeline_service_create_with_nontabular_dataset_without_model_ ) true_input_data_config = gca_training_pipeline.InputDataConfig( - fraction_split=true_fraction_split, + filter_split=true_filter_split, dataset_id=mock_nontabular_dataset.name, annotation_schema_uri=_TEST_ANNOTATION_SCHEMA_URI, gcs_destination=gca_io.GcsDestination( From a58ea826c575b9b0c8cb69e47fc2f07a98bb285b Mon Sep 17 00:00:00 2001 From: Yaqi Ji Date: Fri, 20 Aug 2021 13:42:08 -0700 Subject: [PATCH 19/28] feat: add PipelineJob.list --- google/cloud/aiplatform/pipeline_jobs.py | 50 ++++++++++++++++++++- tests/unit/aiplatform/test_pipeline_jobs.py | 31 +++++++++++++ 2 files changed, 80 insertions(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/pipeline_jobs.py b/google/cloud/aiplatform/pipeline_jobs.py index 393f61c965..bb98b1f0d5 100644 --- a/google/cloud/aiplatform/pipeline_jobs.py +++ b/google/cloud/aiplatform/pipeline_jobs.py @@ -18,7 +18,7 @@ import datetime import time import re -from typing import Any, Optional, Dict +from typing import Any, Dict, List, Optional from google.auth import credentials as auth_credentials from google.cloud.aiplatform import base @@ -376,6 +376,54 @@ def cancel(self) -> None: """ self.api_client.cancel_pipeline_job(name=self.resource_name) + @classmethod + def list( + cls, + filter: Optional[str] = None, + order_by: Optional[str] = None, + project: Optional[str] = None, + location: Optional[str] = None, + credentials: Optional[auth_credentials.Credentials] = None, + ) -> List["PipelineJob"]: + """List all instances of this PipelineJob resource. + + Example Usage: + + aiplatform.PipelineJob.list( + filter='display_name="experiment_a27"', + order_by='create_time desc' + ) + + Args: + filter (str): + Optional. An expression for filtering the results of the request. + For field names both snake_case and camelCase are supported. + order_by (str): + Optional. A comma-separated list of fields to order by, sorted in + ascending order. Use "desc" after a field name for descending. + Supported fields: `display_name`, `create_time`, `update_time` + project (str): + Optional. Project to retrieve list from. If not set, project + set in aiplatform.init will be used. + location (str): + Optional. Location to retrieve list from. If not set, location + set in aiplatform.init will be used. + credentials (auth_credentials.Credentials): + Optional. Custom credentials to use to retrieve list. Overrides + credentials set in aiplatform.init. + + Returns: + List[PipelineJob] - A list of PipelineJob resource objects + """ + + return cls._list_with_local_order( + filter=filter, + order_by=order_by, + project=project, + location=location, + credentials=credentials, + ) + def wait_for_resource_creation(self) -> None: """Waits until resource has been created.""" self._wait_for_resource_creation() diff --git a/tests/unit/aiplatform/test_pipeline_jobs.py b/tests/unit/aiplatform/test_pipeline_jobs.py index 1f1d5c96de..0e3eddbf22 100644 --- a/tests/unit/aiplatform/test_pipeline_jobs.py +++ b/tests/unit/aiplatform/test_pipeline_jobs.py @@ -162,6 +162,14 @@ def mock_pipeline_service_cancel(): yield mock_cancel_pipeline_job +@pytest.fixture +def mock_pipeline_service_list(): + with mock.patch.object( + pipeline_service_client_v1beta1.PipelineServiceClient, "list_pipeline_jobs" + ) as mock_list_pipeline_jobs: + yield mock_list_pipeline_jobs + + @pytest.fixture def mock_load_json(): with patch.object(storage.Blob, "download_as_bytes") as mock_load_json: @@ -278,6 +286,29 @@ def test_cancel_pipeline_job( name=_TEST_PIPELINE_JOB_NAME ) + @pytest.mark.usefixtures( + "mock_pipeline_service_create", "mock_pipeline_service_get", "mock_load_json", + ) + def test_list_pipeline_job(self, mock_pipeline_service_list): + aiplatform.init( + project=_TEST_PROJECT, + staging_bucket=_TEST_GCS_BUCKET_NAME, + credentials=_TEST_CREDENTIALS, + ) + + job = pipeline_jobs.PipelineJob( + display_name=_TEST_PIPELINE_JOB_DISPLAY_NAME, + template_path=_TEST_TEMPLATE_PATH, + job_id=_TEST_PIPELINE_JOB_ID, + ) + + job.run() + job.list() + + mock_pipeline_service_list.assert_called_once_with( + request={"parent": _TEST_PARENT, "filter": None} + ) + @pytest.mark.usefixtures( "mock_pipeline_service_create", "mock_pipeline_service_get", "mock_load_json", ) From 52a7b7c6ff0fefa88aead402c320a01ba6738813 Mon Sep 17 00:00:00 2001 From: Yaqi Ji Date: Fri, 20 Aug 2021 17:44:32 -0700 Subject: [PATCH 20/28] chore: update README.rst for XAI get_metadata. (#646) * Update README.rst * Update README.rst * Update README.rst * Update README.rst Co-authored-by: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> * Update README.rst Co-authored-by: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> * Update README.rst Co-authored-by: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> * Fix lint Co-authored-by: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> --- README.rst | 39 ++++++++++++++++++++++++++++++++------- 1 file changed, 32 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index 57ead60fea..e8fc200700 100644 --- a/README.rst +++ b/README.rst @@ -117,7 +117,7 @@ Initialize the SDK to store common configurations that you use with the SDK. experiment='my-experiment', # description of the experiment above - experiment_description='my experiment decsription' + experiment_description='my experiment decsription' ) Datasets @@ -149,7 +149,7 @@ You can also create and import a dataset in separate steps: To get a previously created Dataset: .. code-block:: Python - + dataset = aiplatform.ImageDataset('projects/my-project/location/us-central1/datasets/{DATASET_ID}') Vertex AI supports a variety of dataset schemas. References to these schemas are available under the @@ -173,7 +173,7 @@ It must read datasets from the environment variables populated by the training s .. code-block:: Python - os.environ['AIP_DATA_FORMAT'] # provides format of data + os.environ['AIP_DATA_FORMAT'] # provides format of data os.environ['AIP_TRAINING_DATA_URI'] # uri to training split os.environ['AIP_VALIDATION_DATA_URI'] # uri to validation split os.environ['AIP_TEST_DATA_URI'] # uri to test split @@ -184,7 +184,7 @@ Please visit `Using a managed dataset in a custom training application`_ for a d It must write the model artifact to the environment variable populated by the traing service: -.. code-block:: Python +.. code-block:: Python os.environ['AIP_MODEL_DIR'] @@ -295,7 +295,7 @@ To deploy a model to a created endpoint: .. code-block:: Python model = aiplatform.Model('/projects/my-project/locations/us-central1/models/{MODEL_ID}') - + endpoint.deploy(model, min_replica_count=1, max_replica_count=5 @@ -312,10 +312,35 @@ To undeploy models from an endpoint: To delete an endpoint: .. code-block:: Python - + endpoint.delete() +Explainable AI: Get Metadata +---------------------------- + +To get metadata from TensorFlow 1 models: + +.. code-block:: Python + + from google.cloud.aiplatform.explain.metadata.tf.v1 import saved_model_metadata_builder + + builder = saved_model_metadata_builder.SavedModelMetadataBuilder( + 'gs://python/to/my/model/dir', tags=[tf.saved_model.tag_constants.SERVING] + ) + generated_md = builder.get_metadata() + +To get metadata from TensorFlow 2 models: + +.. code-block:: Python + + from google.cloud.aiplatform.explain.metadata.tf.v2 import saved_model_metadata_builder + + builder = saved_model_metadata_builder.SavedModelMetadataBuilder('gs://python/to/my/model/dir') + generated_md = builder.get_metadata() + + + Next Steps ~~~~~~~~~~ @@ -327,4 +352,4 @@ Next Steps APIs that we cover. .. _Vertex AI API Product documentation: https://cloud.google.com/vertex-ai/docs -.. _README: https://github.com/googleapis/google-cloud-python/blob/master/README.rst \ No newline at end of file +.. _README: https://github.com/googleapis/google-cloud-python/blob/master/README.rst From 8d88c006c5586b28d340448382a9292543448fd6 Mon Sep 17 00:00:00 2001 From: Karthik Ramasamy Date: Mon, 23 Aug 2021 10:23:44 -0700 Subject: [PATCH 21/28] feat: add util functions to get URLs for Tensorboard web app. (#635) --- .../aiplatform/utils/tensorboard_utils.py | 93 +++++++++++++++++++ tests/unit/aiplatform/test_utils.py | 52 +++++++++++ 2 files changed, 145 insertions(+) create mode 100644 google/cloud/aiplatform/utils/tensorboard_utils.py diff --git a/google/cloud/aiplatform/utils/tensorboard_utils.py b/google/cloud/aiplatform/utils/tensorboard_utils.py new file mode 100644 index 0000000000..d3cb1ef704 --- /dev/null +++ b/google/cloud/aiplatform/utils/tensorboard_utils.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Sequence, Dict +from google.cloud.aiplatform_v1beta1.services.tensorboard_service.client import ( + TensorboardServiceClient, +) + +_SERVING_DOMAIN = "tensorboard.googleusercontent.com" + + +def _parse_experiment_name(experiment_name: str) -> Dict[str, str]: + """Parses an experiment_name into its component segments. + + Args: + experiment_name: Resource name of the TensorboardExperiment. E.g. + "projects/123/locations/asia-east1/tensorboards/456/experiments/exp1" + + Returns: + Components of the experiment name. + + Raises: + ValueError if the experiment_name is invalid. + """ + matched = TensorboardServiceClient.parse_tensorboard_experiment_path( + experiment_name + ) + if not matched: + raise ValueError(f"Invalid experiment name: {experiment_name}.") + return matched + + +def get_experiment_url(experiment_name: str) -> str: + """Get URL for comparing experiments. + + Args: + experiment_name: Resource name of the TensorboardExperiment. E.g. + "projects/123/locations/asia-east1/tensorboards/456/experiments/exp1" + + Returns: + URL for the tensorboard web app. + """ + location = _parse_experiment_name(experiment_name)["location"] + name_for_url = experiment_name.replace("/", "+") + return f"https://{location}.{_SERVING_DOMAIN}/experiment/{name_for_url}" + + +def get_experiments_compare_url(experiment_names: Sequence[str]) -> str: + """Get URL for comparing experiments. + + Args: + experiment_names: Resource names of the TensorboardExperiments that needs to + be compared. + + Returns: + URL for the tensorboard web app. + """ + if len(experiment_names) < 2: + raise ValueError("At least two experiment_names are required.") + + locations = { + _parse_experiment_name(experiment_name)["location"] + for experiment_name in experiment_names + } + if len(locations) != 1: + raise ValueError( + f"Got experiments from different locations: {', '.join(locations)}." + ) + location = locations.pop() + + experiment_url_segments = [] + for idx, experiment_name in enumerate(experiment_names): + name_segments = _parse_experiment_name(experiment_name) + experiment_url_segments.append( + "{cnt}-{experiment}:{project}+{location}+{tensorboard}+{experiment}".format( + cnt=idx + 1, **name_segments + ) + ) + encoded_names = ",".join(experiment_url_segments) + return f"https://{location}.{_SERVING_DOMAIN}/compare/{encoded_names}" diff --git a/tests/unit/aiplatform/test_utils.py b/tests/unit/aiplatform/test_utils.py index ed85fb9f0a..bdc674ebc0 100644 --- a/tests/unit/aiplatform/test_utils.py +++ b/tests/unit/aiplatform/test_utils.py @@ -28,6 +28,7 @@ from google.cloud.aiplatform import compat from google.cloud.aiplatform import utils from google.cloud.aiplatform.utils import pipeline_utils +from google.cloud.aiplatform.utils import tensorboard_utils from google.cloud.aiplatform_v1beta1.services.model_service import ( client as model_service_client_v1beta1, @@ -454,3 +455,54 @@ def test_pipeline_utils_runtime_config_builder_parameter_not_found(self): my_builder.build() assert e.match(regexp=r"The pipeline parameter no_such_param is not found") + + +class TestTensorboardUtils: + def test_tensorboard_get_experiment_url(self): + actual = tensorboard_utils.get_experiment_url( + "projects/123/locations/asia-east1/tensorboards/456/experiments/exp1" + ) + assert actual == ( + "https://asia-east1.tensorboard." + + "googleusercontent.com/experiment/projects+123+locations+asia-east1+tensorboards+456+experiments+exp1" + ) + + def test_get_experiments_url_bad_experiment_name(self): + with pytest.raises(ValueError, match="Invalid experiment name: foo-bar."): + tensorboard_utils.get_experiment_url("foo-bar") + + def test_tensorboard_get_experiments_compare_url(self): + actual = tensorboard_utils.get_experiments_compare_url( + ( + "projects/123/locations/asia-east1/tensorboards/456/experiments/exp1", + "projects/123/locations/asia-east1/tensorboards/456/experiments/exp2", + ) + ) + assert actual == ( + "https://asia-east1.tensorboard." + + "googleusercontent.com/compare/1-exp1:123+asia-east1+456+exp1," + + "2-exp2:123+asia-east1+456+exp2" + ) + + def test_tensorboard_get_experiments_compare_url_fail_just_one_exp(self): + with pytest.raises( + ValueError, match="At least two experiment_names are required." + ): + tensorboard_utils.get_experiments_compare_url( + ("projects/123/locations/asia-east1/tensorboards/456/experiments/exp1",) + ) + + def test_tensorboard_get_experiments_compare_url_fail_diff_region(self): + with pytest.raises( + ValueError, match="Got experiments from different locations: asia-east.", + ): + tensorboard_utils.get_experiments_compare_url( + ( + "projects/123/locations/asia-east1/tensorboards/456/experiments/exp1", + "projects/123/locations/asia-east2/tensorboards/456/experiments/exp2", + ) + ) + + def test_get_experiments_compare_url_bad_experiment_name(self): + with pytest.raises(ValueError, match="Invalid experiment name: foo-bar."): + tensorboard_utils.get_experiments_compare_url(("foo-bar", "foo-bar1")) From 2f89343adbd69610fc5cacc7121119fc7279186e Mon Sep 17 00:00:00 2001 From: Yaqi Ji Date: Tue, 24 Aug 2021 13:37:23 -0700 Subject: [PATCH 22/28] fix: pipeline none values (#649) --- google/cloud/aiplatform/utils/pipeline_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/aiplatform/utils/pipeline_utils.py b/google/cloud/aiplatform/utils/pipeline_utils.py index 7aae56f8d4..31b08671a5 100644 --- a/google/cloud/aiplatform/utils/pipeline_utils.py +++ b/google/cloud/aiplatform/utils/pipeline_utils.py @@ -129,7 +129,7 @@ def _get_vertex_value( ValueError: if the parameter name is not found in pipeline root inputs, or value is none. """ - if not value: + if value is None: raise ValueError("None values should be filterd out.") if name not in self._parameter_types: From d97da413f6ae09a0285d4de0bb92f0495d899490 Mon Sep 17 00:00:00 2001 From: Vinny Senthil Date: Wed, 25 Aug 2021 12:34:20 -0700 Subject: [PATCH 23/28] chore: End-to-End Tabular System Test (#610) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Init end-to-end tabular integration test + script * Checkpoint, add assertions on prediction response * Add a presubmit config for system test changes * Lint * Address PR comments, split out E2E base class * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Drop sample lint type hint requirement * Address comments, add type hints to base class * Update prediction response inspection * 🦉 Updates from OwlBot See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md * Update base teardown fixture to delete Endpoints first * Change pytest-xdist to loadscope, add cfg header Co-authored-by: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> Co-authored-by: Owl Bot --- .kokoro/continuous/system.cfg | 4 +- .kokoro/continuous/unit.cfg | 4 +- .kokoro/presubmit/presubmit.cfg | 2 +- .kokoro/presubmit/release.cfg | 4 +- .kokoro/presubmit/system.cfg | 13 ++ samples/model-builder/noxfile_config.py | 2 +- tests/system/aiplatform/e2e_base.py | 101 +++++++++++ tests/system/aiplatform/test_e2e_tabular.py | 160 ++++++++++++++++++ .../california_housing_training_script.py | 69 ++++++++ 9 files changed, 354 insertions(+), 5 deletions(-) create mode 100644 .kokoro/presubmit/system.cfg create mode 100644 tests/system/aiplatform/e2e_base.py create mode 100644 tests/system/aiplatform/test_e2e_tabular.py create mode 100644 tests/system/aiplatform/test_resources/california_housing_training_script.py diff --git a/.kokoro/continuous/system.cfg b/.kokoro/continuous/system.cfg index eaa9edaab5..f5bf0c7420 100644 --- a/.kokoro/continuous/system.cfg +++ b/.kokoro/continuous/system.cfg @@ -1,3 +1,5 @@ +# Format: //devtools/kokoro/config/proto/build.proto + env_vars: { key: "NOX_SESSION" value: "system-3.8" @@ -6,5 +8,5 @@ env_vars: { # Run system tests in parallel, splitting up by file env_vars: { key: "PYTEST_ADDOPTS" - value: "-n=auto --dist=loadfile" + value: "-n=auto --dist=loadscope" } diff --git a/.kokoro/continuous/unit.cfg b/.kokoro/continuous/unit.cfg index 52c7230be9..7e92d003e1 100644 --- a/.kokoro/continuous/unit.cfg +++ b/.kokoro/continuous/unit.cfg @@ -1,3 +1,5 @@ +# Format: //devtools/kokoro/config/proto/build.proto + # Run all unit test sessions, in Python 3.6 to 3.9 env_vars: { key: "NOX_SESSION" @@ -7,5 +9,5 @@ env_vars: { # Run unit tests in parallel, splitting up by test env_vars: { key: "PYTEST_ADDOPTS" - value: "-n=auto" + value: "-n=auto --dist=loadscope" } diff --git a/.kokoro/presubmit/presubmit.cfg b/.kokoro/presubmit/presubmit.cfg index f351292171..a2bc4c4994 100644 --- a/.kokoro/presubmit/presubmit.cfg +++ b/.kokoro/presubmit/presubmit.cfg @@ -9,5 +9,5 @@ env_vars: { # Run unit tests in parallel, splitting up by file env_vars: { key: "PYTEST_ADDOPTS" - value: "-n=auto --dist=loadfile" + value: "-n=auto --dist=loadscope" } diff --git a/.kokoro/presubmit/release.cfg b/.kokoro/presubmit/release.cfg index b9398805e5..fc047df824 100644 --- a/.kokoro/presubmit/release.cfg +++ b/.kokoro/presubmit/release.cfg @@ -1,3 +1,5 @@ +# Format: //devtools/kokoro/config/proto/build.proto + # Run system tests in presubmit for library releases env_vars: { key: "NOX_SESSION" @@ -7,5 +9,5 @@ env_vars: { # Run system tests in parallel, splitting up by file env_vars: { key: "PYTEST_ADDOPTS" - value: "-n=auto --dist=loadfile" + value: "-n=auto --dist=loadscope" } diff --git a/.kokoro/presubmit/system.cfg b/.kokoro/presubmit/system.cfg new file mode 100644 index 0000000000..29bcaf044c --- /dev/null +++ b/.kokoro/presubmit/system.cfg @@ -0,0 +1,13 @@ +# Format: //devtools/kokoro/config/proto/build.proto + +# Run system tests when test files are modified +env_vars: { + key: "NOX_SESSION" + value: "system-3.8" +} + +# Run system tests in parallel, splitting up by file +env_vars: { + key: "PYTEST_ADDOPTS" + value: "-n=auto --dist=loadscope" +} diff --git a/samples/model-builder/noxfile_config.py b/samples/model-builder/noxfile_config.py index d83f6320cb..024eece69f 100644 --- a/samples/model-builder/noxfile_config.py +++ b/samples/model-builder/noxfile_config.py @@ -25,7 +25,7 @@ "ignored_versions": ["2.7"], # Old samples are opted out of enforcing Python type hints # All new samples should feature them - "enforce_type_hints": True, + "enforce_type_hints": False, # An envvar key for determining the project id to use. Change it # to 'BUILD_SPECIFIC_GCLOUD_PROJECT' if you want to opt in using a # build specific Cloud project. You can also use your own string diff --git a/tests/system/aiplatform/e2e_base.py b/tests/system/aiplatform/e2e_base.py new file mode 100644 index 0000000000..c0843133dd --- /dev/null +++ b/tests/system/aiplatform/e2e_base.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import abc +import importlib +import os +import pytest +import uuid +from typing import Any, Dict, Generator + +from google.api_core import exceptions +from google.cloud import aiplatform +from google.cloud import storage +from google.cloud.aiplatform import initializer + +_PROJECT = os.getenv("BUILD_SPECIFIC_GCLOUD_PROJECT") +_LOCATION = "us-central1" + + +class TestEndToEnd(metaclass=abc.ABCMeta): + @property + @classmethod + @abc.abstractmethod + def _temp_prefix(cls) -> str: + """Prefix to staging bucket and display names created by this end-to-end test. + Keep the string as short as possible and use kebab case, starting with a lowercase letter. + + Example: `"temp-vertex-hpt-test"` + """ + pass + + def setup_method(self): + importlib.reload(initializer) + importlib.reload(aiplatform) + + @pytest.fixture() + def shared_state(self) -> Generator[Dict[str, Any], None, None]: + shared_state = {} + yield shared_state + + @pytest.fixture() + def prepare_staging_bucket( + self, shared_state: Dict[str, Any] + ) -> Generator[storage.bucket.Bucket, None, None]: + """Create a staging bucket and store bucket resource object in shared state.""" + + staging_bucket_name = f"{self._temp_prefix.lower()}-{uuid.uuid4()}"[:63] + shared_state["staging_bucket_name"] = staging_bucket_name + + storage_client = storage.Client(project=_PROJECT) + shared_state["storage_client"] = storage_client + + shared_state["bucket"] = storage_client.create_bucket( + staging_bucket_name, location=_LOCATION + ) + yield + + @pytest.fixture() + def delete_staging_bucket(self, shared_state: Dict[str, Any]): + """Delete the staging bucket and all it's contents""" + + yield + + # Get the staging bucket used for testing and wipe it + bucket = shared_state["bucket"] + bucket.delete(force=True) + + @pytest.fixture(autouse=True) + def teardown(self, shared_state: Dict[str, Any]): + """Delete every Vertex AI resource created during test""" + + yield + + # Bring all Endpoints to the front of the list + # Ensures Models are undeployed first before we attempt deletion + shared_state["resources"].sort( + key=lambda r: 1 if isinstance(r, aiplatform.Endpoint) else 2 + ) + + for resource in shared_state["resources"]: + try: + if isinstance(resource, aiplatform.Endpoint): + resource.delete(force=True) # Undeploy model then delete endpoint + else: + resource.delete() + except exceptions.GoogleAPIError as e: + print(f"Could not delete resource: {resource} due to: {e}") diff --git a/tests/system/aiplatform/test_e2e_tabular.py b/tests/system/aiplatform/test_e2e_tabular.py new file mode 100644 index 0000000000..9a330f34cf --- /dev/null +++ b/tests/system/aiplatform/test_e2e_tabular.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import os +import uuid +from urllib import request + +import pytest + +from google.cloud import aiplatform +from tests.system.aiplatform import e2e_base + + +_BLOB_PATH = "california-housing-data.csv" +_DATASET_SRC = "https://dl.google.com/mlcc/mledu-datasets/california_housing_train.csv" +_DIR_NAME = os.path.dirname(os.path.abspath(__file__)) +_LOCAL_TRAINING_SCRIPT_PATH = os.path.join( + _DIR_NAME, "test_resources/california_housing_training_script.py" +) + + +@pytest.mark.usefixtures("prepare_staging_bucket", "delete_staging_bucket", "teardown") +class TestEndToEndTabular(e2e_base.TestEndToEnd): + """End to end system test of the Vertex SDK with tabular data adapted from + reference notebook http://shortn/_eyoNx3SN0X""" + + _temp_prefix = "temp-vertex-sdk-e2e-tabular" + + def test_end_to_end_tabular(self, shared_state): + """Build dataset, train a custom and AutoML model, deploy, and get predictions""" + + assert shared_state["bucket"] + bucket = shared_state["bucket"] + + blob = bucket.blob(_BLOB_PATH) + + # Download the CSV file into memory and save it directory to staging bucket + with request.urlopen(_DATASET_SRC) as response: + data = response.read() + blob.upload_from_string(data) + + # Collection of resources generated by this test, to be deleted during teardown + shared_state["resources"] = [] + + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + staging_bucket=shared_state["staging_bucket_name"], + ) + + # Create and import to single managed dataset for both training jobs + + ds = aiplatform.TabularDataset.create( + display_name=f"{self._temp_prefix}-dataset-{uuid.uuid4()}", + gcs_source=[f'gs://{shared_state["staging_bucket_name"]}/{_BLOB_PATH}'], + sync=False, + ) + + shared_state["resources"].extend([ds]) + + # Define both training jobs + + custom_job = aiplatform.CustomTrainingJob( + display_name=f"{self._temp_prefix}-train-housing-custom-{uuid.uuid4()}", + script_path=_LOCAL_TRAINING_SCRIPT_PATH, + container_uri="gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest", + requirements=["gcsfs==0.7.1"], + model_serving_container_image_uri="gcr.io/cloud-aiplatform/prediction/tf2-cpu.2-2:latest", + ) + + automl_job = aiplatform.AutoMLTabularTrainingJob( + display_name=f"{self._temp_prefix}-train-housing-automl-{uuid.uuid4()}", + optimization_prediction_type="regression", + optimization_objective="minimize-rmse", + ) + + # Kick off both training jobs, AutoML job will take approx one hour to run + + custom_model = custom_job.run( + ds, + replica_count=1, + model_display_name=f"{self._temp_prefix}-custom-housing-model-{uuid.uuid4()}", + sync=False, + ) + + automl_model = automl_job.run( + dataset=ds, + target_column="median_house_value", + model_display_name=f"{self._temp_prefix}-automl-housing-model-{uuid.uuid4()}", + sync=False, + ) + + shared_state["resources"].extend( + [automl_job, automl_model, custom_job, custom_model] + ) + + # Deploy both models after training completes + custom_endpoint = custom_model.deploy(machine_type="n1-standard-4", sync=False) + automl_endpoint = automl_model.deploy(machine_type="n1-standard-4", sync=False) + shared_state["resources"].extend([automl_endpoint, custom_endpoint]) + + # Send online prediction with same instance to both deployed models + # This sample is taken from an observation where median_house_value = 94600 + custom_endpoint.wait() + custom_prediction = custom_endpoint.predict( + [ + { + "longitude": -124.35, + "latitude": 40.54, + "housing_median_age": 52.0, + "total_rooms": 1820.0, + "total_bedrooms": 300.0, + "population": 806, + "households": 270.0, + "median_income": 3.014700, + }, + ] + ) + automl_endpoint.wait() + automl_prediction = automl_endpoint.predict( + [ + { + "longitude": "-124.35", + "latitude": "40.54", + "housing_median_age": "52.0", + "total_rooms": "1820.0", + "total_bedrooms": "300.0", + "population": "806", + "households": "270.0", + "median_income": "3.014700", + }, + ] + ) + + # Ensure a single prediction was returned + assert len(custom_prediction.predictions) == 1 + assert len(automl_prediction.predictions) == 1 + + # Ensure the models are remotely accurate + try: + automl_result = automl_prediction.predictions[0]["value"] + custom_result = custom_prediction.predictions[0][0] + assert 200000 > automl_result > 50000 + assert 200000 > custom_result > 50000 + except KeyError as e: + raise RuntimeError("Unexpected prediction response structure:", e) diff --git a/tests/system/aiplatform/test_resources/california_housing_training_script.py b/tests/system/aiplatform/test_resources/california_housing_training_script.py new file mode 100644 index 0000000000..ae5b26e531 --- /dev/null +++ b/tests/system/aiplatform/test_resources/california_housing_training_script.py @@ -0,0 +1,69 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import pandas as pd +import os +import tensorflow as tf +from tensorflow.keras import layers + + +# uncomment and bump up replica_count for distributed training +# strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy() +# tf.distribute.experimental_set_strategy(strategy) + +target = "median_house_value" + + +def aip_data_to_dataframe(wild_card_path): + return pd.concat( + [ + pd.read_csv(fp.numpy().decode()) + for fp in tf.data.Dataset.list_files([wild_card_path]) + ] + ) + + +def get_features_and_labels(df): + features = df.drop(target, axis=1) + return {key: features[key].values for key in features.columns}, df[target].values + + +def data_prep(wild_card_path): + return get_features_and_labels(aip_data_to_dataframe(wild_card_path)) + + +train_features, train_labels = data_prep(os.environ["AIP_TRAINING_DATA_URI"]) + +feature_columns = [ + tf.feature_column.numeric_column(name) for name in train_features.keys() +] + +model = tf.keras.Sequential( + [layers.DenseFeatures(feature_columns), layers.Dense(64), layers.Dense(1)] +) +model.compile(loss="mse", optimizer="adam") + +model.fit( + train_features, + train_labels, + epochs=10, + validation_data=data_prep(os.environ["AIP_VALIDATION_DATA_URI"]), +) +print(model.evaluate(*data_prep(os.environ["AIP_TEST_DATA_URI"]))) + +# save as Vertex AI Managed model +tf.saved_model.save(model, os.environ["AIP_MODEL_DIR"]) From 8fde2ce4441139784bc0fdd62c88d4b833018765 Mon Sep 17 00:00:00 2001 From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> Date: Thu, 26 Aug 2021 15:21:28 -0400 Subject: [PATCH 24/28] fix: Populate service_account and network in PipelineJob instead of pipeline_spec (#658) --- google/cloud/aiplatform/pipeline_jobs.py | 4 ++-- tests/unit/aiplatform/test_pipeline_jobs.py | 6 ++++++ 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/google/cloud/aiplatform/pipeline_jobs.py b/google/cloud/aiplatform/pipeline_jobs.py index bb98b1f0d5..5c948d17b9 100644 --- a/google/cloud/aiplatform/pipeline_jobs.py +++ b/google/cloud/aiplatform/pipeline_jobs.py @@ -250,10 +250,10 @@ def run( Optional. Whether to execute this method synchronously. If False, this method will unblock and it will be executed in a concurrent Future. """ if service_account: - self._gca_resource.pipeline_spec.service_account = service_account + self._gca_resource.service_account = service_account if network: - self._gca_resource.pipeline_spec.network = network + self._gca_resource.network = network _LOGGER.log_create_with_lro(self.__class__) diff --git a/tests/unit/aiplatform/test_pipeline_jobs.py b/tests/unit/aiplatform/test_pipeline_jobs.py index 0e3eddbf22..bc39f4d9d9 100644 --- a/tests/unit/aiplatform/test_pipeline_jobs.py +++ b/tests/unit/aiplatform/test_pipeline_jobs.py @@ -84,6 +84,8 @@ def mock_pipeline_service_create(): name=_TEST_PIPELINE_JOB_NAME, state=gca_pipeline_state_v1beta1.PipelineState.PIPELINE_STATE_SUCCEEDED, create_time=_TEST_PIPELINE_CREATE_TIME, + service_account=_TEST_SERVICE_ACCOUNT, + network=_TEST_NETWORK, ) yield mock_create_pipeline_job @@ -93,6 +95,8 @@ def make_pipeline_job(state): name=_TEST_PIPELINE_JOB_NAME, state=state, create_time=_TEST_PIPELINE_CREATE_TIME, + service_account=_TEST_SERVICE_ACCOUNT, + network=_TEST_NETWORK, ) @@ -239,6 +243,8 @@ def test_run_call_pipeline_service_create( "root": _TEST_PIPELINE_JOB_SPEC["pipelineSpec"]["root"], }, runtime_config=runtime_config, + service_account=_TEST_SERVICE_ACCOUNT, + network=_TEST_NETWORK, ) mock_pipeline_service_create.assert_called_once_with( From 4ad67dc0344210a8273c13978a98964cf72c3c54 Mon Sep 17 00:00:00 2001 From: Anthonios Partheniou Date: Fri, 27 Aug 2021 13:27:11 -0400 Subject: [PATCH 25/28] chore: migrate default branch from master to main (#661) --- .github/sync-repo-settings.yaml | 8 ++-- .kokoro/build.sh | 2 +- .kokoro/test-samples-impl.sh | 2 +- CONTRIBUTING.rst | 12 +++--- docs/conf.py | 10 ++--- google/cloud/aiplatform/training_jobs.py | 10 ++--- owlbot.py | 53 ++++++++++++++++++++++++ schema/predict/instance/docs/conf.py | 10 ++--- schema/predict/params/docs/conf.py | 10 ++--- schema/predict/prediction/docs/conf.py | 10 ++--- 10 files changed, 90 insertions(+), 37 deletions(-) diff --git a/.github/sync-repo-settings.yaml b/.github/sync-repo-settings.yaml index ab4509fa20..e0ac340c75 100644 --- a/.github/sync-repo-settings.yaml +++ b/.github/sync-repo-settings.yaml @@ -1,10 +1,10 @@ -# https://github.com/googleapis/repo-automation-bots/tree/master/packages/sync-repo-settings -# Rules for master branch protection +# https://github.com/googleapis/repo-automation-bots/tree/main/packages/sync-repo-settings +# Rules for main branch protection mergeCommitAllowed: true branchProtectionRules: # Identifies the protection rule pattern. Name of the branch to be protected. -# Defaults to `master` -- pattern: master +# Defaults to `main` +- pattern: main requiresCodeOwnerReviews: true requiresStrictStatusChecks: true requiredStatusCheckContexts: diff --git a/.kokoro/build.sh b/.kokoro/build.sh index 35e4a0f6ce..32e6d625b4 100755 --- a/.kokoro/build.sh +++ b/.kokoro/build.sh @@ -41,7 +41,7 @@ python3 -m pip install --upgrade --quiet nox python3 -m nox --version # If this is a continuous build, send the test log to the FlakyBot. -# See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. +# See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"continuous"* ]]; then cleanup() { chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/.kokoro/test-samples-impl.sh b/.kokoro/test-samples-impl.sh index 311a8d54b9..8a324c9c7b 100755 --- a/.kokoro/test-samples-impl.sh +++ b/.kokoro/test-samples-impl.sh @@ -80,7 +80,7 @@ for file in samples/**/requirements.txt; do EXIT=$? # If this is a periodic build, send the test log to the FlakyBot. - # See https://github.com/googleapis/repo-automation-bots/tree/master/packages/flakybot. + # See https://github.com/googleapis/repo-automation-bots/tree/main/packages/flakybot. if [[ $KOKORO_BUILD_ARTIFACTS_SUBDIR = *"periodic"* ]]; then chmod +x $KOKORO_GFILE_DIR/linux_amd64/flakybot $KOKORO_GFILE_DIR/linux_amd64/flakybot diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 5da926e318..865fcc0e88 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -50,9 +50,9 @@ You'll have to create a development environment using a Git checkout: # Configure remotes such that you can pull changes from the googleapis/python-aiplatform # repository into your local repository. $ git remote add upstream git@github.com:googleapis/python-aiplatform.git - # fetch and merge changes from upstream into master + # fetch and merge changes from upstream into main $ git fetch upstream - $ git merge upstream/master + $ git merge upstream/main Now your local repo is set up such that you will push changes to your GitHub repo, from which you can submit a pull request. @@ -110,12 +110,12 @@ Coding Style variables:: export GOOGLE_CLOUD_TESTING_REMOTE="upstream" - export GOOGLE_CLOUD_TESTING_BRANCH="master" + export GOOGLE_CLOUD_TESTING_BRANCH="main" By doing this, you are specifying the location of the most up-to-date version of ``python-aiplatform``. The the suggested remote name ``upstream`` should point to the official ``googleapis`` checkout and the - the branch should be the main branch on that remote (``master``). + the branch should be the main branch on that remote (``main``). - This repository contains configuration for the `pre-commit `__ tool, which automates checking @@ -209,7 +209,7 @@ The `description on PyPI`_ for the project comes directly from the ``README``. Due to the reStructuredText (``rst``) parser used by PyPI, relative links which will work on GitHub (e.g. ``CONTRIBUTING.rst`` instead of -``https://github.com/googleapis/python-aiplatform/blob/master/CONTRIBUTING.rst``) +``https://github.com/googleapis/python-aiplatform/blob/main/CONTRIBUTING.rst``) may cause problems creating links or rendering the description. .. _description on PyPI: https://pypi.org/project/google-cloud-aiplatform @@ -234,7 +234,7 @@ We support: Supported versions can be found in our ``noxfile.py`` `config`_. -.. _config: https://github.com/googleapis/python-aiplatform/blob/master/noxfile.py +.. _config: https://github.com/googleapis/python-aiplatform/blob/main/noxfile.py We also explicitly decided to support Python 3 beginning with version 3.6. diff --git a/docs/conf.py b/docs/conf.py index 5e1669c22d..87d0ac7189 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -76,8 +76,8 @@ # The encoding of source files. # source_encoding = 'utf-8-sig' -# The master toctree document. -master_doc = "index" +# The root toctree document. +root_doc = "index" # General information about the project. project = "google-cloud-aiplatform" @@ -280,7 +280,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ ( - master_doc, + root_doc, "google-cloud-aiplatform.tex", "google-cloud-aiplatform Documentation", author, @@ -315,7 +315,7 @@ # (source start file, name, description, authors, manual section). man_pages = [ ( - master_doc, + root_doc, "google-cloud-aiplatform", "google-cloud-aiplatform Documentation", [author], @@ -334,7 +334,7 @@ # dir menu entry, description, category) texinfo_documents = [ ( - master_doc, + root_doc, "google-cloud-aiplatform", "google-cloud-aiplatform Documentation", author, diff --git a/google/cloud/aiplatform/training_jobs.py b/google/cloud/aiplatform/training_jobs.py index 15ef20af74..66efc2bac6 100644 --- a/google/cloud/aiplatform/training_jobs.py +++ b/google/cloud/aiplatform/training_jobs.py @@ -333,7 +333,7 @@ def _create_input_data_config( annotation_schema_uri (str): Google Cloud Storage URI points to a YAML file describing annotation schema. The schema is defined as an OpenAPI 3.0.2 - [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/master/versions/3.0.2.md#schema-object) The schema files + [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files that can be used here are found in gs://google-cloud-aiplatform/schema/dataset/annotation/, note that the chosen schema must be consistent with @@ -593,7 +593,7 @@ def _run_job( annotation_schema_uri (str): Google Cloud Storage URI points to a YAML file describing annotation schema. The schema is defined as an OpenAPI 3.0.2 - [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/master/versions/3.0.2.md#schema-object) The schema files + [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files that can be used here are found in gs://google-cloud-aiplatform/schema/dataset/annotation/, note that the chosen schema must be consistent with @@ -1808,7 +1808,7 @@ def run( annotation_schema_uri (str): Google Cloud Storage URI points to a YAML file describing annotation schema. The schema is defined as an OpenAPI 3.0.2 - [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/master/versions/3.0.2.md#schema-object) The schema files + [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files that can be used here are found in gs://google-cloud-aiplatform/schema/dataset/annotation/, note that the chosen schema must be consistent with @@ -2563,7 +2563,7 @@ def run( annotation_schema_uri (str): Google Cloud Storage URI points to a YAML file describing annotation schema. The schema is defined as an OpenAPI 3.0.2 - [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/master/versions/3.0.2.md#schema-object) The schema files + [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files that can be used here are found in gs://google-cloud-aiplatform/schema/dataset/annotation/, note that the chosen schema must be consistent with @@ -4963,7 +4963,7 @@ def run( annotation_schema_uri (str): Google Cloud Storage URI points to a YAML file describing annotation schema. The schema is defined as an OpenAPI 3.0.2 - [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/master/versions/3.0.2.md#schema-object) The schema files + [Schema Object](https://github.com/OAI/OpenAPI-Specification/blob/main/versions/3.0.2.md#schema-object) The schema files that can be used here are found in gs://google-cloud-aiplatform/schema/dataset/annotation/, note that the chosen schema must be consistent with diff --git a/owlbot.py b/owlbot.py index d08a25d661..48f058ecaf 100644 --- a/owlbot.py +++ b/owlbot.py @@ -90,6 +90,59 @@ ".kokoro/**/*.cfg" ] ) # the microgenerator has a good coveragerc file +# Remove the replacements below once https://github.com/googleapis/synthtool/pull/1188 is merged + +# Update googleapis/repo-automation-bots repo to main in .kokoro/*.sh files +s.replace(".kokoro/*.sh", "repo-automation-bots/tree/master", "repo-automation-bots/tree/main") + +# Customize CONTRIBUTING.rst to replace master with main +s.replace( + "CONTRIBUTING.rst", + "fetch and merge changes from upstream into master", + "fetch and merge changes from upstream into main", +) + +s.replace( + "CONTRIBUTING.rst", + "git merge upstream/master", + "git merge upstream/main", +) + +s.replace( + "CONTRIBUTING.rst", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"master\"""", + """export GOOGLE_CLOUD_TESTING_BRANCH=\"main\"""", +) + +s.replace( + "CONTRIBUTING.rst", + "remote \(``master``\)", + "remote (``main``)", +) + +s.replace( + "CONTRIBUTING.rst", + "blob/master/CONTRIBUTING.rst", + "blob/main/CONTRIBUTING.rst", +) + +s.replace( + "CONTRIBUTING.rst", + "blob/master/noxfile.py", + "blob/main/noxfile.py", +) + +s.replace( + "**/docs/conf.py", + "master_doc", + "root_doc", +) + +s.replace( + "**/docs/conf.py", + "# The master toctree document.", + "# The root toctree document.", +) # Update samples config to use `ucaip-sample-tests` project s.replace(".kokoro/samples/python3.*/common.cfg", diff --git a/schema/predict/instance/docs/conf.py b/schema/predict/instance/docs/conf.py index a12fb006b9..670c3a5192 100644 --- a/schema/predict/instance/docs/conf.py +++ b/schema/predict/instance/docs/conf.py @@ -74,8 +74,8 @@ # The encoding of source files. # source_encoding = 'utf-8-sig' -# The master toctree document. -master_doc = "index" +# The root toctree document. +root_doc = "index" # General information about the project. project = u"google-cloud-aiplatform-v1-schema-predict-instance" @@ -272,7 +272,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-instance.tex", u"google-cloud-aiplatform-v1-schema-predict-instance Documentation", author, @@ -307,7 +307,7 @@ # (source start file, name, description, authors, manual section). man_pages = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-instance", u"Google Cloud Aiplatform V1 Schema Predict Instance Documentation", [author], @@ -326,7 +326,7 @@ # dir menu entry, description, category) texinfo_documents = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-instance", u"google-cloud-aiplatform-v1-schema-predict-instance Documentation", author, diff --git a/schema/predict/params/docs/conf.py b/schema/predict/params/docs/conf.py index 6917071403..7b4605df71 100644 --- a/schema/predict/params/docs/conf.py +++ b/schema/predict/params/docs/conf.py @@ -74,8 +74,8 @@ # The encoding of source files. # source_encoding = 'utf-8-sig' -# The master toctree document. -master_doc = "index" +# The root toctree document. +root_doc = "index" # General information about the project. project = u"google-cloud-aiplatform-v1-schema-predict-params" @@ -272,7 +272,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-params.tex", u"google-cloud-aiplatform-v1-schema-predict-params Documentation", author, @@ -307,7 +307,7 @@ # (source start file, name, description, authors, manual section). man_pages = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-params", u"Google Cloud Aiplatform V1 Schema Predict Params Documentation", [author], @@ -326,7 +326,7 @@ # dir menu entry, description, category) texinfo_documents = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-params", u"google-cloud-aiplatform-v1-schema-predict-params Documentation", author, diff --git a/schema/predict/prediction/docs/conf.py b/schema/predict/prediction/docs/conf.py index c0f73900a9..43b935efb4 100644 --- a/schema/predict/prediction/docs/conf.py +++ b/schema/predict/prediction/docs/conf.py @@ -74,8 +74,8 @@ # The encoding of source files. # source_encoding = 'utf-8-sig' -# The master toctree document. -master_doc = "index" +# The root toctree document. +root_doc = "index" # General information about the project. project = u"google-cloud-aiplatform-v1-schema-predict-prediction" @@ -272,7 +272,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-prediction.tex", u"google-cloud-aiplatform-v1-schema-predict-prediction Documentation", author, @@ -307,7 +307,7 @@ # (source start file, name, description, authors, manual section). man_pages = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-prediction", u"Google Cloud Aiplatform V1 Schema Predict Prediction Documentation", [author], @@ -326,7 +326,7 @@ # dir menu entry, description, category) texinfo_documents = [ ( - master_doc, + root_doc, "google-cloud-aiplatform-v1-schema-predict-prediction", u"google-cloud-aiplatform-v1-schema-predict-prediction Documentation", author, From db580ad43e97e0d877c29c0e8c077c37dee33ff3 Mon Sep 17 00:00:00 2001 From: sasha-gitg <44654632+sasha-gitg@users.noreply.github.com> Date: Sun, 29 Aug 2021 18:49:54 -0400 Subject: [PATCH 26/28] feat: Add wait_for_resource_creation to BatchPredictionJob and unblock async creation when model is pending creation. (#660) --- README.rst | 33 ++++++ google/cloud/aiplatform/base.py | 16 ++- google/cloud/aiplatform/jobs.py | 121 ++++++++++---------- google/cloud/aiplatform/models.py | 4 +- tests/system/aiplatform/e2e_base.py | 11 ++ tests/system/aiplatform/test_e2e_tabular.py | 50 ++++++-- tests/unit/aiplatform/test_jobs.py | 22 ++-- tests/unit/aiplatform/test_training_jobs.py | 6 +- 8 files changed, 177 insertions(+), 86 deletions(-) diff --git a/README.rst b/README.rst index e8fc200700..1f4462722c 100644 --- a/README.rst +++ b/README.rst @@ -274,6 +274,39 @@ Please visit `Importing models to Vertex AI`_ for a detailed overview: .. _Importing models to Vertex AI: https://cloud.google.com/vertex-ai/docs/general/import-model +Batch Prediction +---------------- + +To create a batch prediction job: + +.. code-block:: Python + + model = aiplatform.Model('/projects/my-project/locations/us-central1/models/{MODEL_ID}') + + batch_prediction_job = model.batch_predict( + job_display_name='my-batch-prediction-job', + instances_format='csv' + machine_type='n1-standard-4', + gcs_source=['gs://path/to/my/file.csv'] + gcs_destination_prefix='gs://path/to/by/batch_prediction/results/' + ) + +You can also create a batch prediction job asynchronously by including the `sync=False` argument: + +.. code-block:: Python + + batch_prediction_job = model.batch_predict(..., sync=False) + + # wait for resource to be created + batch_prediction_job.wait_for_resource_creation() + + # get the state + batch_prediction_job.state + + # block until job is complete + batch_prediction_job.wait() + + Endpoints --------- diff --git a/google/cloud/aiplatform/base.py b/google/cloud/aiplatform/base.py index 20f9aa07ad..d7d0e6317b 100644 --- a/google/cloud/aiplatform/base.py +++ b/google/cloud/aiplatform/base.py @@ -680,17 +680,21 @@ def wrapper(*args, **kwargs): inspect.getfullargspec(method).annotations["return"] ) + # object produced by the method + returned_object = bound_args.arguments.get(return_input_arg) + # is a classmethod that creates the object and returns it if args and inspect.isclass(args[0]): - # assumes classmethod is our resource noun - returned_object = args[0]._empty_constructor() + + # assumes class in classmethod is the resource noun + returned_object = ( + args[0]._empty_constructor() + if not returned_object + else returned_object + ) self = returned_object else: # instance method - - # object produced by the method - returned_object = bound_args.arguments.get(return_input_arg) - # if we're returning an input object if returned_object and returned_object is not self: diff --git a/google/cloud/aiplatform/jobs.py b/google/cloud/aiplatform/jobs.py index 6a5eb8ffee..ed59996310 100644 --- a/google/cloud/aiplatform/jobs.py +++ b/google/cloud/aiplatform/jobs.py @@ -32,15 +32,6 @@ from google.cloud import aiplatform from google.cloud.aiplatform import base from google.cloud.aiplatform import compat -from google.cloud.aiplatform import constants -from google.cloud.aiplatform import initializer -from google.cloud.aiplatform import hyperparameter_tuning -from google.cloud.aiplatform import utils -from google.cloud.aiplatform.utils import console_utils -from google.cloud.aiplatform.utils import source_utils -from google.cloud.aiplatform.utils import worker_spec_utils - -from google.cloud.aiplatform.compat.services import job_service_client from google.cloud.aiplatform.compat.types import ( batch_prediction_job as gca_bp_job_compat, batch_prediction_job_v1 as gca_bp_job_v1, @@ -58,6 +49,13 @@ machine_resources_v1beta1 as gca_machine_resources_v1beta1, study as gca_study_compat, ) +from google.cloud.aiplatform import constants +from google.cloud.aiplatform import initializer +from google.cloud.aiplatform import hyperparameter_tuning +from google.cloud.aiplatform import utils +from google.cloud.aiplatform.utils import console_utils +from google.cloud.aiplatform.utils import source_utils +from google.cloud.aiplatform.utils import worker_spec_utils _LOGGER = base.Logger(__name__) @@ -352,7 +350,7 @@ def completion_stats(self) -> Optional[gca_completion_stats.CompletionStats]: def create( cls, job_display_name: str, - model_name: str, + model_name: Union[str, "aiplatform.Model"], instances_format: str = "jsonl", predictions_format: str = "jsonl", gcs_source: Optional[Union[str, Sequence[str]]] = None, @@ -384,10 +382,12 @@ def create( Required. The user-defined name of the BatchPredictionJob. The name can be up to 128 characters long and can be consist of any UTF-8 characters. - model_name (str): + model_name (Union[str, aiplatform.Model]): Required. A fully-qualified model resource name or model ID. Example: "projects/123/locations/us-central1/models/456" or "456" when project and location are initialized or passed. + + Or an instance of aiplatform.Model. instances_format (str): Required. The format in which instances are given, must be one of "jsonl", "csv", "bigquery", "tf-record", "tf-record-gzip", @@ -533,15 +533,17 @@ def create( """ utils.validate_display_name(job_display_name) + if labels: utils.validate_labels(labels) - model_name = utils.full_resource_name( - resource_name=model_name, - resource_noun="models", - project=project, - location=location, - ) + if isinstance(model_name, str): + model_name = utils.full_resource_name( + resource_name=model_name, + resource_noun="models", + project=project, + location=location, + ) # Raise error if both or neither source URIs are provided if bool(gcs_source) == bool(bigquery_source): @@ -570,6 +572,7 @@ def create( f"{predictions_format} is not an accepted prediction format " f"type. Please choose from: {constants.BATCH_PREDICTION_OUTPUT_STORAGE_FORMATS}" ) + gca_bp_job = gca_bp_job_compat gca_io = gca_io_compat gca_machine_resources = gca_machine_resources_compat @@ -584,7 +587,6 @@ def create( # Required Fields gapic_batch_prediction_job.display_name = job_display_name - gapic_batch_prediction_job.model = model_name input_config = gca_bp_job.BatchPredictionJob.InputConfig() output_config = gca_bp_job.BatchPredictionJob.OutputConfig() @@ -657,63 +659,43 @@ def create( metadata=explanation_metadata, parameters=explanation_parameters ) - # TODO (b/174502913): Support private feature once released - - api_client = cls._instantiate_client(location=location, credentials=credentials) + empty_batch_prediction_job = cls._empty_constructor( + project=project, location=location, credentials=credentials, + ) return cls._create( - api_client=api_client, - parent=initializer.global_config.common_location_path( - project=project, location=location - ), - batch_prediction_job=gapic_batch_prediction_job, + empty_batch_prediction_job=empty_batch_prediction_job, + model_or_model_name=model_name, + gca_batch_prediction_job=gapic_batch_prediction_job, generate_explanation=generate_explanation, - project=project or initializer.global_config.project, - location=location or initializer.global_config.location, - credentials=credentials or initializer.global_config.credentials, sync=sync, ) @classmethod - @base.optional_sync() + @base.optional_sync(return_input_arg="empty_batch_prediction_job") def _create( cls, - api_client: job_service_client.JobServiceClient, - parent: str, - batch_prediction_job: Union[ + empty_batch_prediction_job: "BatchPredictionJob", + model_or_model_name: Union[str, "aiplatform.Model"], + gca_batch_prediction_job: Union[ gca_bp_job_v1beta1.BatchPredictionJob, gca_bp_job_v1.BatchPredictionJob ], generate_explanation: bool, - project: str, - location: str, - credentials: Optional[auth_credentials.Credentials], sync: bool = True, ) -> "BatchPredictionJob": """Create a batch prediction job. Args: - api_client (dataset_service_client.DatasetServiceClient): - Required. An instance of DatasetServiceClient with the correct api_endpoint - already set based on user's preferences. - batch_prediction_job (gca_bp_job.BatchPredictionJob): + empty_batch_prediction_job (BatchPredictionJob): + Required. BatchPredictionJob without _gca_resource populated. + model_or_model_name (Union[str, aiplatform.Model]): + Required. Required. A fully-qualified model resource name or + an instance of aiplatform.Model. + gca_batch_prediction_job (gca_bp_job.BatchPredictionJob): Required. a batch prediction job proto for creating a batch prediction job on Vertex AI. generate_explanation (bool): Required. Generate explanation along with the batch prediction results. - parent (str): - Required. Also known as common location path, that usually contains the - project and location that the user provided to the upstream method. - Example: "projects/my-prj/locations/us-central1" - project (str): - Required. Project to upload this model to. Overrides project set in - aiplatform.init. - location (str): - Required. Location to upload this model to. Overrides location set in - aiplatform.init. - credentials (Optional[auth_credentials.Credentials]): - Custom credentials to use to upload this model. Overrides - credentials set in aiplatform.init. - Returns: (jobs.BatchPredictionJob): Instantiated representation of the created batch prediction job. @@ -725,21 +707,34 @@ def _create( by Vertex AI. """ # select v1beta1 if explain else use default v1 + + parent = initializer.global_config.common_location_path( + project=empty_batch_prediction_job.project, + location=empty_batch_prediction_job.location, + ) + + model_resource_name = ( + model_or_model_name + if isinstance(model_or_model_name, str) + else model_or_model_name.resource_name + ) + + gca_batch_prediction_job.model = model_resource_name + + api_client = empty_batch_prediction_job.api_client + if generate_explanation: api_client = api_client.select_version(compat.V1BETA1) _LOGGER.log_create_with_lro(cls) gca_batch_prediction_job = api_client.create_batch_prediction_job( - parent=parent, batch_prediction_job=batch_prediction_job + parent=parent, batch_prediction_job=gca_batch_prediction_job ) - batch_prediction_job = cls( - batch_prediction_job_name=gca_batch_prediction_job.name, - project=project, - location=location, - credentials=credentials, - ) + empty_batch_prediction_job._gca_resource = gca_batch_prediction_job + + batch_prediction_job = empty_batch_prediction_job _LOGGER.log_create_complete(cls, batch_prediction_job._gca_resource, "bpj") @@ -843,6 +838,10 @@ def iter_outputs( f"on your prediction output:\n{output_info}" ) + def wait_for_resource_creation(self) -> None: + """Waits until resource has been created.""" + self._wait_for_resource_creation() + class _RunnableJob(_Job): """ABC to interface job as a runnable training class.""" diff --git a/google/cloud/aiplatform/models.py b/google/cloud/aiplatform/models.py index 4af337b3e8..ca1aefd9f3 100644 --- a/google/cloud/aiplatform/models.py +++ b/google/cloud/aiplatform/models.py @@ -981,7 +981,6 @@ def undeploy( if deployed_model_id in traffic_split and traffic_split[deployed_model_id]: raise ValueError("Model being undeployed should have 0 traffic.") if sum(traffic_split.values()) != 100: - # TODO(b/172678233) verify every referenced deployed model exists raise ValueError( "Sum of all traffic within traffic split needs to be 100." ) @@ -2167,11 +2166,10 @@ def batch_predict( (jobs.BatchPredictionJob): Instantiated representation of the created batch prediction job. """ - self.wait() return jobs.BatchPredictionJob.create( job_display_name=job_display_name, - model_name=self.resource_name, + model_name=self, instances_format=instances_format, predictions_format=predictions_format, gcs_source=gcs_source, diff --git a/tests/system/aiplatform/e2e_base.py b/tests/system/aiplatform/e2e_base.py index c0843133dd..de91c1249a 100644 --- a/tests/system/aiplatform/e2e_base.py +++ b/tests/system/aiplatform/e2e_base.py @@ -43,6 +43,17 @@ def _temp_prefix(cls) -> str: """ pass + @classmethod + def _make_display_name(cls, key: str) -> str: + """Helper method to make unique display_names. + + Args: + key (str): Required. Identifier for the display name. + Returns: + Unique display name. + """ + return f"{cls._temp_prefix}-{key}-{uuid.uuid4()}" + def setup_method(self): importlib.reload(initializer) importlib.reload(aiplatform) diff --git a/tests/system/aiplatform/test_e2e_tabular.py b/tests/system/aiplatform/test_e2e_tabular.py index 9a330f34cf..a55ea237e4 100644 --- a/tests/system/aiplatform/test_e2e_tabular.py +++ b/tests/system/aiplatform/test_e2e_tabular.py @@ -16,12 +16,15 @@ # import os -import uuid from urllib import request import pytest from google.cloud import aiplatform +from google.cloud.aiplatform.compat.types import ( + job_state as gca_job_state, + pipeline_state as gca_pipeline_state, +) from tests.system.aiplatform import e2e_base @@ -64,9 +67,11 @@ def test_end_to_end_tabular(self, shared_state): # Create and import to single managed dataset for both training jobs + dataset_gcs_source = f'gs://{shared_state["staging_bucket_name"]}/{_BLOB_PATH}' + ds = aiplatform.TabularDataset.create( - display_name=f"{self._temp_prefix}-dataset-{uuid.uuid4()}", - gcs_source=[f'gs://{shared_state["staging_bucket_name"]}/{_BLOB_PATH}'], + display_name=self._make_display_name("dataset"), + gcs_source=[dataset_gcs_source], sync=False, ) @@ -75,7 +80,7 @@ def test_end_to_end_tabular(self, shared_state): # Define both training jobs custom_job = aiplatform.CustomTrainingJob( - display_name=f"{self._temp_prefix}-train-housing-custom-{uuid.uuid4()}", + display_name=self._make_display_name("train-housing-custom"), script_path=_LOCAL_TRAINING_SCRIPT_PATH, container_uri="gcr.io/cloud-aiplatform/training/tf-cpu.2-2:latest", requirements=["gcsfs==0.7.1"], @@ -83,7 +88,7 @@ def test_end_to_end_tabular(self, shared_state): ) automl_job = aiplatform.AutoMLTabularTrainingJob( - display_name=f"{self._temp_prefix}-train-housing-automl-{uuid.uuid4()}", + display_name=self._make_display_name("train-housing-automl"), optimization_prediction_type="regression", optimization_objective="minimize-rmse", ) @@ -93,14 +98,14 @@ def test_end_to_end_tabular(self, shared_state): custom_model = custom_job.run( ds, replica_count=1, - model_display_name=f"{self._temp_prefix}-custom-housing-model-{uuid.uuid4()}", + model_display_name=self._make_display_name("custom-housing-model"), sync=False, ) automl_model = automl_job.run( dataset=ds, target_column="median_house_value", - model_display_name=f"{self._temp_prefix}-automl-housing-model-{uuid.uuid4()}", + model_display_name=self._make_display_name("automl-housing-model"), sync=False, ) @@ -113,6 +118,21 @@ def test_end_to_end_tabular(self, shared_state): automl_endpoint = automl_model.deploy(machine_type="n1-standard-4", sync=False) shared_state["resources"].extend([automl_endpoint, custom_endpoint]) + custom_batch_prediction_job = custom_model.batch_predict( + job_display_name=self._make_display_name("automl-housing-model"), + instances_format="csv", + machine_type="n1-standard-4", + gcs_source=dataset_gcs_source, + gcs_destination_prefix=f'gs://{shared_state["staging_bucket_name"]}/bp_results/', + sync=False, + ) + + shared_state["resources"].append(custom_batch_prediction_job) + + custom_job.wait_for_resource_creation() + automl_job.wait_for_resource_creation() + custom_batch_prediction_job.wait_for_resource_creation() + # Send online prediction with same instance to both deployed models # This sample is taken from an observation where median_house_value = 94600 custom_endpoint.wait() @@ -130,6 +150,9 @@ def test_end_to_end_tabular(self, shared_state): }, ] ) + + custom_batch_prediction_job.wait() + automl_endpoint.wait() automl_prediction = automl_endpoint.predict( [ @@ -146,6 +169,19 @@ def test_end_to_end_tabular(self, shared_state): ] ) + assert ( + custom_job.state + == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + ) + assert ( + automl_job.state + == gca_pipeline_state.PipelineState.PIPELINE_STATE_SUCCEEDED + ) + assert ( + custom_batch_prediction_job.state + == gca_job_state.JobState.JOB_STATE_SUCCEEDED + ) + # Ensure a single prediction was returned assert len(custom_prediction.predictions) == 1 assert len(automl_prediction.predictions) == 1 diff --git a/tests/unit/aiplatform/test_jobs.py b/tests/unit/aiplatform/test_jobs.py index d10eb0335d..f14eea99bc 100644 --- a/tests/unit/aiplatform/test_jobs.py +++ b/tests/unit/aiplatform/test_jobs.py @@ -212,6 +212,11 @@ def get_batch_prediction_job_mock(): job_service_client.JobServiceClient, "get_batch_prediction_job" ) as get_batch_prediction_job_mock: get_batch_prediction_job_mock.side_effect = [ + gca_batch_prediction_job.BatchPredictionJob( + name=_TEST_BATCH_PREDICTION_JOB_NAME, + display_name=_TEST_DISPLAY_NAME, + state=_TEST_JOB_STATE_PENDING, + ), gca_batch_prediction_job.BatchPredictionJob( name=_TEST_BATCH_PREDICTION_JOB_NAME, display_name=_TEST_DISPLAY_NAME, @@ -390,7 +395,7 @@ def test_batch_prediction_job_status(self, get_batch_prediction_job_mock): bp_job_state = bp.state assert get_batch_prediction_job_mock.call_count == 2 - assert bp_job_state == _TEST_JOB_STATE_SUCCESS + assert bp_job_state == _TEST_JOB_STATE_RUNNING get_batch_prediction_job_mock.assert_called_with( name=_TEST_BATCH_PREDICTION_JOB_NAME @@ -475,8 +480,9 @@ def test_batch_predict_gcs_source_and_dest( sync=sync, ) - if not sync: - batch_prediction_job.wait() + batch_prediction_job.wait_for_resource_creation() + + batch_prediction_job.wait() # Construct expected request expected_gapic_batch_prediction_job = gca_batch_prediction_job.BatchPredictionJob( @@ -514,8 +520,9 @@ def test_batch_predict_gcs_source_bq_dest( sync=sync, ) - if not sync: - batch_prediction_job.wait() + batch_prediction_job.wait_for_resource_creation() + + batch_prediction_job.wait() assert ( batch_prediction_job.output_info @@ -571,8 +578,9 @@ def test_batch_predict_with_all_args( sync=sync, ) - if not sync: - batch_prediction_job.wait() + batch_prediction_job.wait_for_resource_creation() + + batch_prediction_job.wait() # Construct expected request expected_gapic_batch_prediction_job = gca_batch_prediction_job_v1beta1.BatchPredictionJob( diff --git a/tests/unit/aiplatform/test_training_jobs.py b/tests/unit/aiplatform/test_training_jobs.py index 0fd781b380..1a919f1635 100644 --- a/tests/unit/aiplatform/test_training_jobs.py +++ b/tests/unit/aiplatform/test_training_jobs.py @@ -25,6 +25,7 @@ import sys import tarfile import tempfile +import uuid from unittest import mock from unittest.mock import patch @@ -614,11 +615,12 @@ class TestCustomTrainingJob: def setup_method(self): importlib.reload(initializer) importlib.reload(aiplatform) - with open(_TEST_LOCAL_SCRIPT_FILE_NAME, "w") as fp: + self._local_script_file_name = f"{uuid.uuid4()}-{_TEST_LOCAL_SCRIPT_FILE_NAME}" + with open(self._local_script_file_name, "w") as fp: fp.write(_TEST_PYTHON_SOURCE) def teardown_method(self): - pathlib.Path(_TEST_LOCAL_SCRIPT_FILE_NAME).unlink() + pathlib.Path(self._local_script_file_name).unlink() initializer.global_pool.shutdown(wait=True) @pytest.mark.parametrize("sync", [True, False]) From ec6355b75b765516c1448c25ff1d325b7f70bc59 Mon Sep 17 00:00:00 2001 From: "gcf-owl-bot[bot]" <78513119+gcf-owl-bot[bot]@users.noreply.github.com> Date: Mon, 30 Aug 2021 15:32:38 +0000 Subject: [PATCH 27/28] chore(python): disable dependency dashboard (#666) --- .github/.OwlBot.lock.yaml | 2 +- renovate.json | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/.OwlBot.lock.yaml b/.github/.OwlBot.lock.yaml index a9fcd07cc4..b75186cf1b 100644 --- a/.github/.OwlBot.lock.yaml +++ b/.github/.OwlBot.lock.yaml @@ -1,3 +1,3 @@ docker: image: gcr.io/repo-automation-bots/owlbot-python:latest - digest: sha256:9743664022bd63a8084be67f144898314c7ca12f0a03e422ac17c733c129d803 + digest: sha256:d6761eec279244e57fe9d21f8343381a01d3632c034811a72f68b83119e58c69 diff --git a/renovate.json b/renovate.json index c04895563e..9fa8816fe8 100644 --- a/renovate.json +++ b/renovate.json @@ -1,6 +1,8 @@ { "extends": [ - "config:base", ":preserveSemverRanges" + "config:base", + ":preserveSemverRanges", + ":disableDependencyDashboard" ], "ignorePaths": [".pre-commit-config.yaml"], "pip_requirements": { From 916c1d4240a4ccd81ddc5b1ed67aece21b60fc11 Mon Sep 17 00:00:00 2001 From: "release-please[bot]" <55107282+release-please[bot]@users.noreply.github.com> Date: Mon, 30 Aug 2021 18:25:13 -0400 Subject: [PATCH 28/28] chore: release 1.4.0 (#591) Co-authored-by: release-please[bot] <55107282+release-please[bot]@users.noreply.github.com> --- CHANGELOG.md | 25 +++++++++++++++++++++++++ setup.py | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b843cbaa7e..1730a90ca6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,30 @@ # Changelog +## [1.4.0](https://www.github.com/googleapis/python-aiplatform/compare/v1.3.0...v1.4.0) (2021-08-30) + + +### Features + +* add filter and timestamp splits ([#627](https://www.github.com/googleapis/python-aiplatform/issues/627)) ([1a13577](https://www.github.com/googleapis/python-aiplatform/commit/1a135775966c8a2303ded529eba514dcf9db7205)) +* add labels to all resource creation apis ([#601](https://www.github.com/googleapis/python-aiplatform/issues/601)) ([4e7666a](https://www.github.com/googleapis/python-aiplatform/commit/4e7666a30b4472698ed980d9d746ba85ad4142d8)) +* add PipelineJob.list ([a58ea82](https://www.github.com/googleapis/python-aiplatform/commit/a58ea826c575b9b0c8cb69e47fc2f07a98bb285b)) +* add support for export_evaluated_data_items_config in AutoMLTab… ([#583](https://www.github.com/googleapis/python-aiplatform/issues/583)) ([2a6b0a3](https://www.github.com/googleapis/python-aiplatform/commit/2a6b0a369296698f79d75e93007e4c7319f3523c)) +* add util functions to get URLs for Tensorboard web app. ([#635](https://www.github.com/googleapis/python-aiplatform/issues/635)) ([8d88c00](https://www.github.com/googleapis/python-aiplatform/commit/8d88c006c5586b28d340448382a9292543448fd6)) +* Add wait_for_resource_creation to BatchPredictionJob and unblock async creation when model is pending creation. ([#660](https://www.github.com/googleapis/python-aiplatform/issues/660)) ([db580ad](https://www.github.com/googleapis/python-aiplatform/commit/db580ad43e97e0d877c29c0e8c077c37dee33ff3)) +* Added the VertexAiResourceNoun.to_dict() method ([#588](https://www.github.com/googleapis/python-aiplatform/issues/588)) ([b478075](https://www.github.com/googleapis/python-aiplatform/commit/b478075efb05553760514256fee9a63126a9916f)) +* expose base_output_dir for custom job ([#586](https://www.github.com/googleapis/python-aiplatform/issues/586)) ([2f138d1](https://www.github.com/googleapis/python-aiplatform/commit/2f138d1dfe4959d1b5f53a9dfef90a18de9908ec)) +* expose boot disk type and size for CustomTrainingJob, CustomPythonPackageTrainingJob, and CustomContainerTrainingJob ([#602](https://www.github.com/googleapis/python-aiplatform/issues/602)) ([355ea24](https://www.github.com/googleapis/python-aiplatform/commit/355ea24c6dd9b061ae0933df4dd07dd5b8c2232b)) +* split GAPIC samples by service ([#599](https://www.github.com/googleapis/python-aiplatform/issues/599)) ([5f15b4f](https://www.github.com/googleapis/python-aiplatform/commit/5f15b4f9a4bad2c9447747a8bdebaa99eab00b75)) + + +### Bug Fixes + +* Fixed bug in TabularDataset.column_names ([#590](https://www.github.com/googleapis/python-aiplatform/issues/590)) ([0fbcd59](https://www.github.com/googleapis/python-aiplatform/commit/0fbcd592cd7e9c4b0a131d777fa84e592a43a21c)) +* pipeline none values ([#649](https://www.github.com/googleapis/python-aiplatform/issues/649)) ([2f89343](https://www.github.com/googleapis/python-aiplatform/commit/2f89343adbd69610fc5cacc7121119fc7279186e)) +* Populate service_account and network in PipelineJob instead of pipeline_spec ([#658](https://www.github.com/googleapis/python-aiplatform/issues/658)) ([8fde2ce](https://www.github.com/googleapis/python-aiplatform/commit/8fde2ce4441139784bc0fdd62c88d4b833018765)) +* re-remove extra TB dependencies introduced due to merge conflict ([#593](https://www.github.com/googleapis/python-aiplatform/issues/593)) ([433b94a](https://www.github.com/googleapis/python-aiplatform/commit/433b94a78004de6d3a4726317d8bac32c358ace8)) +* Update BatchPredictionJob.iter_outputs() and BQ docstrings ([#631](https://www.github.com/googleapis/python-aiplatform/issues/631)) ([28f32fd](https://www.github.com/googleapis/python-aiplatform/commit/28f32fd11470ad86d2f103346b3e6be8f1adc2d8)) + ## [1.3.0](https://www.github.com/googleapis/python-aiplatform/compare/v1.2.0...v1.3.0) (2021-07-30) diff --git a/setup.py b/setup.py index f6eeaca9fe..20485d23e5 100644 --- a/setup.py +++ b/setup.py @@ -21,7 +21,7 @@ import setuptools # type: ignore name = "google-cloud-aiplatform" -version = "1.3.0" +version = "1.4.0" description = "Cloud AI Platform API client library" package_root = os.path.abspath(os.path.dirname(__file__))