From a6c342ecf6b4ce61298c3adbc3ad0b5067c7126a Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Tue, 21 Dec 2021 22:09:38 -0300 Subject: [PATCH 01/11] Update builder.py builder de pycaret --- learning_orchestra_client/builder/builder.py | 173 +++++++++++++++++++ 1 file changed, 173 insertions(+) diff --git a/learning_orchestra_client/builder/builder.py b/learning_orchestra_client/builder/builder.py index ae229cc..62ed133 100644 --- a/learning_orchestra_client/builder/builder.py +++ b/learning_orchestra_client/builder/builder.py @@ -5,6 +5,179 @@ from typing import Union +class BuilderPycaret: + __CODE_FIELD = "function" + __PARAMETERS_FIELD = "functionParameters" + __NAME_FIELD = "name" + __DESCRIPTION_FIELD = "description" + __FILE_NAME = "filename" + + def __init__(self, cluster_ip: str): + self.__api_path = "/api/learningOrchestra/v1/builder/pycaret" + self.__api_path_get_file = "/api/learningOrchestra/v1/explore/getlocaldatapycaret" + self.__service_url_get_file = f'{cluster_ip}{self.__api_path_get_file}' + self.__service_url = f'{cluster_ip}{self.__api_path}' + self.__response_treat = ResponseTreat() + self.__cluster_ip = cluster_ip + self.__entity_reader = EntityReader(self.__service_url) + self.__observer = Observer(self.__cluster_ip) + + def run_pycaret_sync(self, + name: str, + parameters: dict, + code: str, + description: str = "", + pretty_response: bool = False) -> Union[dict, str]: + """ + description: This method runs a python 3 code in sync mode, so it + represents a wildcard for the data scientist. It can be used when + train, predict, tune, explore or any other pipe must be customized. The + function is also useful for new pipes. pretty_response: If true it + returns a string, otherwise a dictionary. + + name: Is the name of the object stored in Learning Orchestra storage + system (volume or mongoDB). + url: Url to CSV file. + + return: A JSON object with an error or warning message or the correct + operation result. + """ + request_body = { + self.__NAME_FIELD: name, + self.__PARAMETERS_FIELD: parameters, + self.__CODE_FIELD: code, + self.__DESCRIPTION_FIELD: description} + + request_url = self.__service_url + response = requests.post(url=request_url, json=request_body) + self.__observer.wait(name) + + return self.__response_treat.treatment(response, pretty_response) + + def run_pycaret_async(self, + name: str, + parameters: dict, + code: str, + description: str = "", + pretty_response: bool = False) -> Union[dict, str]: + """ + description: This method runs a python 3 code in async mode, so it + represents a wildcard for the data scientist. It does not lock the + caller, so a wait method must be used. It can be used when train, + predict, tune, explore or any other pipe must be customized. The + function is also useful for new pipes. + + pretty_response: If true it returns a string, otherwise a dictionary. + name: Is the name of the function to be called + code: the Python code + parameters: the parameters of the function being called + + return: A JSON object with an error or warning message or the correct + operation result. + """ + request_body = { + self.__NAME_FIELD: name, + self.__PARAMETERS_FIELD: parameters, + self.__CODE_FIELD: code, + self.__DESCRIPTION_FIELD: description} + + request_url = self.__service_url + + response = requests.post(url=request_url, json=request_body) + return self.__response_treat.treatment(response, pretty_response) + + def search_all_executions(self, pretty_response: bool = False) \ + -> Union[dict, str]: + """ + description: This method retrieves all created functions metadata, + i.e., it does not retrieve the function result content. + + pretty_response: If true it returns a string, otherwise a dictionary. + + return: All function executions metadata stored in Learning Orchestra + or an empty result. + """ + response = self.__entity_reader.read_all_instances_from_entity() + return self.__response_treat.treatment(response, pretty_response) + + def delete_execution(self, name: str, pretty_response=False) \ + -> Union[dict, str]: + """ + description: This method is responsible for deleting the function. + This delete operation is asynchronous, so it does not lock the caller + until the deletion finished. Instead, it returns a JSON object with a + URL for a future use. The caller uses the URL for delete checks. + + pretty_response: If true it returns a string, otherwise a dictionary. + name: Represents the function name. + + return: JSON object with an error message, a warning message or a + correct delete message + """ + + request_url = f'{self.__service_url}/{name}' + + response = requests.delete(request_url) + return self.__response_treat.treatment(response, pretty_response) + + def search_execution_content(self, + name: str, + query: dict = {}, + limit: int = 10, + skip: int = 0, + pretty_response: bool = False) \ + -> Union[dict, str]: + """ + description: This method is responsible for retrieving the function + results, including metadata. A function is executed many times, using + different parameters, + thus many results are stored + in Learning Orchestra. + + pretty_response: If true it returns a string, otherwise a dictionary. + name: Is the name of the function. + query: Query to make in MongoDB(default: empty query) + limit: Number of rows to return in pagination(default: 10) (maximum is + set at 20 rows per request) + skip: Number of rows to skip in pagination(default: 0) + + return: + A page with some function results inside or an error if there + is no such function. The current page is also returned to be used in + future content requests. + """ + + response = self.__entity_reader.read_entity_content( + name, query, limit, skip) + + return self.__response_treat.treatment(response, pretty_response) + + def wait(self, dataset_name: str, timeout: int = None) -> dict: + """ + description: This method is responsible to create a synchronization + barrier for the run_function_async method or delete_function method. + + name: Represents the function name. + timeout: Represents the time in seconds to wait for a function to + finish its run. + + return: JSON object with an error message, a warning message or a + correct function result + """ + return self.__observer.wait(dataset_name, timeout) + + def search_report(self, + file_name: str) -> str: + request_body = { + self.__FILE_NAME: file_name + } + + request_url = self.__service_url_get_file + + response = requests.get(url=request_url, json=request_body) + return f'{response.text}' + + class BuilderSparkMl: __TRAIN_FIELD = "trainDatasetName" __TEST_FIELD = "testDatasetName" From 43c2b50b78cab224f40902bcf9cf6dc340f40000 Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Tue, 21 Dec 2021 22:09:49 -0300 Subject: [PATCH 02/11] Create titanic_pycaret_pipe_lo.py --- pipeline/titanic_pycaret_pipe_lo.py | 50 +++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 pipeline/titanic_pycaret_pipe_lo.py diff --git a/pipeline/titanic_pycaret_pipe_lo.py b/pipeline/titanic_pycaret_pipe_lo.py new file mode 100644 index 0000000..8233287 --- /dev/null +++ b/pipeline/titanic_pycaret_pipe_lo.py @@ -0,0 +1,50 @@ +from learning_orchestra_client.builder.builder import BuilderPycaret +from learning_orchestra_client.dataset.csv import DatasetCsv + +CLUSTER_IP = "http://34.125.23.29" + +dataset_csv = DatasetCsv(CLUSTER_IP) +dataset_csv.delete_dataset("train") +dataset_csv.insert_dataset_async( + url="https://raw.githubusercontent.com/JonatasMiguel/PycaretTitanic/main/train.csv", + dataset_name="train", +) +dataset_csv.wait(dataset_name="train", + timeout=1) + +version = 3012 +name = f'titanic{version}' +codigo = """ +from pycaret import classification +from pycaret.classification import pull,tune_model + +clas = classification.setup(data=train, target='Survived', train_size=0.7, silent=True) +best = classification.compare_models() + +best_tuned = tune_model(best) + +report = pull() +report.to_csv('report_tuned', sep='\t', encoding='utf-8') + +final_gbr = classification.finalize_model(best_tuned) +classification.save_model(final_gbr, 'titanic_pycaret') + +response = None +""" +builder = BuilderPycaret(CLUSTER_IP) + +# builder.run_pycaret_async( +# name=name, +# parameters={ +# "train": "$train" +# }, +# code=codigo) +# builder.wait(name, 1) +# +# print(builder.search_execution_content( +# name=name, +# pretty_response=True)) + +print(builder.search_report(file_name='report_tuned')) + +print('fim') From 3a616ea11e6d37d8d583e6da24397db5af1c50f1 Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Wed, 12 Oct 2022 10:53:20 -0300 Subject: [PATCH 03/11] cliente para autokeras --- .../evaluate/autokeras.py | 8 ++ learning_orchestra_client/model/autokeras.py | 8 ++ .../predict/autokeras.py | 8 ++ learning_orchestra_client/train/autokeras.py | 14 ++ pipeline/autokeras_mnist.py | 121 ++++++++++++++++++ 5 files changed, 159 insertions(+) create mode 100644 learning_orchestra_client/evaluate/autokeras.py create mode 100644 learning_orchestra_client/model/autokeras.py create mode 100644 learning_orchestra_client/predict/autokeras.py create mode 100644 learning_orchestra_client/train/autokeras.py create mode 100644 pipeline/autokeras_mnist.py diff --git a/learning_orchestra_client/evaluate/autokeras.py b/learning_orchestra_client/evaluate/autokeras.py new file mode 100644 index 0000000..9e39a5c --- /dev/null +++ b/learning_orchestra_client/evaluate/autokeras.py @@ -0,0 +1,8 @@ +from ._evaluate import Evaluate + + +class EvaluateAutokeras(Evaluate): + def __init__(self, cluster_ip: str): + self.__api_path = "/api/learningOrchestra/v1/evaluate/autokeras" + self.__cluster_ip = cluster_ip + super().__init__(cluster_ip, self.__api_path) diff --git a/learning_orchestra_client/model/autokeras.py b/learning_orchestra_client/model/autokeras.py new file mode 100644 index 0000000..628806c --- /dev/null +++ b/learning_orchestra_client/model/autokeras.py @@ -0,0 +1,8 @@ +from ._model import Model + + +class ModelAutoKeras(Model): + def __init__(self, cluster_ip: str): + self.__api_path = "/api/learningOrchestra/v1/model/autokeras" + self.__cluster_ip = cluster_ip + super().__init__(cluster_ip, self.__api_path) diff --git a/learning_orchestra_client/predict/autokeras.py b/learning_orchestra_client/predict/autokeras.py new file mode 100644 index 0000000..f8ddd6a --- /dev/null +++ b/learning_orchestra_client/predict/autokeras.py @@ -0,0 +1,8 @@ +from ._predict import Predict + + +class PredictAutokeras(Predict): + def __init__(self, cluster_ip: str): + self.__api_path = "/api/learningOrchestra/v1/predict/autokeras" + self.__cluster_ip = cluster_ip + super().__init__(cluster_ip, self.__api_path) \ No newline at end of file diff --git a/learning_orchestra_client/train/autokeras.py b/learning_orchestra_client/train/autokeras.py new file mode 100644 index 0000000..564178b --- /dev/null +++ b/learning_orchestra_client/train/autokeras.py @@ -0,0 +1,14 @@ +from ._train import Train + + +class TrainAutokeras(Train): + __PARENT_NAME_FIELD = "parentName" + __METHOD_NAME_FIELD = "method" + __ClASS_PARAMETERS_FIELD = "methodParameters" + __NAME_FIELD = "name" + __DESCRIPTION_FIELD = "description" + + def __init__(self, cluster_ip: str): + self.__api_path = "/api/learningOrchestra/v1/train/autokeras" + self.__cluster_ip = cluster_ip + super().__init__(cluster_ip, self.__api_path) \ No newline at end of file diff --git a/pipeline/autokeras_mnist.py b/pipeline/autokeras_mnist.py new file mode 100644 index 0000000..8759e06 --- /dev/null +++ b/pipeline/autokeras_mnist.py @@ -0,0 +1,121 @@ +from learning_orchestra_client.function.python import FunctionPython +from learning_orchestra_client.dataset.generic import DatasetGeneric +from learning_orchestra_client.model.autokeras import ModelAutoKeras +from learning_orchestra_client.train.autokeras import TrainAutokeras +from learning_orchestra_client.predict.autokeras import PredictAutokeras +from learning_orchestra_client.evaluate.autokeras import EvaluateAutokeras + +CLUSTER_IP = "http://34.125.22.143" + +function_python = FunctionPython(CLUSTER_IP) +mnist_load_data = ''' + +from tensorflow.keras.datasets import mnist + +(x_train, y_train), (x_test, y_test) = mnist.load_data() + +response = { + "test_images": x_test, + "test_labels": y_test, + "train_images": x_train, + "train_labels": y_train +} +''' + +function_python.run_function_async( + name=f"mnist_load_data", + parameters={}, + code=mnist_load_data) +function_python.wait(f"mnist_load_data") + +model_autokeras = ModelAutoKeras(CLUSTER_IP) +model_autokeras.create_model_async( + name=f"mnist_model_autokeras", + module_path="autokeras.tasks.image", + class_name="ImageClassifier", + class_parameters={ + "overwrite": True, + "max_trials": 1 + } +) +model_autokeras.wait(f"mnist_model_autokeras") + +train_autokeras = TrainAutokeras(CLUSTER_IP) +train_autokeras.create_training_async( + name=f"mnist_model_trained_autokeras", + model_name=f"mnist_model_autokeras", + parent_name=f"mnist_model_autokeras", + method_name="fit", + parameters={ + "x": f"$mnist_load_data.train_images", + "y": f"$mnist_load_data.train_labels", + "epochs": 1, + } +) +train_autokeras.wait(f"mnist_model_trained_autokeras") + +predict_autokeras = PredictAutokeras(CLUSTER_IP) +predict_autokeras.create_prediction_async( + name=f"mnist_model_predicted_autokeras", + model_name=f"mnist_model_autokeras", + parent_name=f"mnist_model_trained_autokeras", + method_name="predict", + parameters={ + "x": f"$mnist_load_data.test_images" + } +) + +predict_autokeras.wait(f"mnist_model_predicted_autokeras") + +evaluate_autokeras = EvaluateAutokeras(CLUSTER_IP) +evaluate_autokeras.create_evaluate_async( + name=f"mnist_model_evaluated_autokeras", + model_name=f"mnist_model_autokeras", + parent_name=f"mnist_model_trained_autokeras", + method_name="evaluate", + parameters={ + "x": f"$mnist_load_data.test_images", + "y": f"$mnist_load_data.test_labels" + } +) + +evaluate_autokeras.wait(f"mnist_model_evaluated_autokeras") + +show_mnist_predict = ''' +print(mnist_predicted) +response = None +''' +function_python.run_function_async( + name=f"mnist_model_predicted_print", + parameters={ + "mnist_predicted": f"$mnist_model_predicted_autokeras" + }, + code=show_mnist_predict +) + +show_mnist_evaluate = ''' +print(mnist_evaluated) +response = None +''' +function_python.run_function_async( + name=f"mnist_model_evaluated_print", + parameters={ + "mnist_evaluated": f"$mnist_model_evaluated_autokeras" + }, + code=show_mnist_evaluate +) + +function_python.wait(f"mnist_model_evaluated_print") +function_python.wait(f"mnist_model_predicted_print") + +print(function_python.search_execution_content( + name=f"mnist_model_predicted_print", + limit=1, + skip=1, + pretty_response=True)) + +print(function_python.search_execution_content( + name=f"mnist_model_evaluated_print", + limit=1, + skip=1, + pretty_response=True)) \ No newline at end of file From 3f2b20b41d626fbe57c12a9438079832b939c6ba Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Wed, 12 Oct 2022 12:03:34 -0300 Subject: [PATCH 04/11] Update autokeras_mnist.py --- pipeline/autokeras_mnist.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/pipeline/autokeras_mnist.py b/pipeline/autokeras_mnist.py index 8759e06..763740a 100644 --- a/pipeline/autokeras_mnist.py +++ b/pipeline/autokeras_mnist.py @@ -7,12 +7,24 @@ CLUSTER_IP = "http://34.125.22.143" +dataset_generic = DatasetGeneric(CLUSTER_IP) +dataset_generic.insert_dataset_async( + dataset_name=f"mnist_dataset_autokeras", + url="https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz", +) +dataset_generic.wait(f"mnist_dataset_autokeras") + function_python = FunctionPython(CLUSTER_IP) mnist_load_data = ''' -from tensorflow.keras.datasets import mnist +def load_data(path): + import numpy as np + with np.load(path) as f: + x_train, y_train = f['x_train'], f['y_train'] + x_test, y_test = f['x_test'], f['y_test'] + return (x_train, y_train), (x_test, y_test) -(x_train, y_train), (x_test, y_test) = mnist.load_data() +(x_train, y_train), (x_test, y_test) = load_data(mnist_dataset_autokeras) response = { "test_images": x_test, @@ -24,7 +36,9 @@ function_python.run_function_async( name=f"mnist_load_data", - parameters={}, + parameters={ + "mnist_dataset_autokeras": f"$mnist_dataset_autokeras" + }, code=mnist_load_data) function_python.wait(f"mnist_load_data") From 4cd28208fc348d8182520b3c4dd3abb1fee59552 Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Thu, 20 Oct 2022 20:59:34 -0300 Subject: [PATCH 05/11] Update titanic_pycaret_pipe_lo.py --- pipeline/titanic_pycaret_pipe_lo.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/pipeline/titanic_pycaret_pipe_lo.py b/pipeline/titanic_pycaret_pipe_lo.py index 8233287..691de76 100644 --- a/pipeline/titanic_pycaret_pipe_lo.py +++ b/pipeline/titanic_pycaret_pipe_lo.py @@ -1,7 +1,7 @@ from learning_orchestra_client.builder.builder import BuilderPycaret from learning_orchestra_client.dataset.csv import DatasetCsv -CLUSTER_IP = "http://34.125.23.29" +CLUSTER_IP = "http://34.151.200.12" dataset_csv = DatasetCsv(CLUSTER_IP) dataset_csv.delete_dataset("train") @@ -12,7 +12,7 @@ dataset_csv.wait(dataset_name="train", timeout=1) -version = 3012 +version = 5 name = f'titanic{version}' codigo = """ from pycaret import classification @@ -33,17 +33,16 @@ """ builder = BuilderPycaret(CLUSTER_IP) -# builder.run_pycaret_async( -# name=name, -# parameters={ -# "train": "$train" -# }, -# code=codigo) -# builder.wait(name, 1) -# -# print(builder.search_execution_content( -# name=name, -# pretty_response=True)) +builder.run_pycaret_async( + name=name, + parameters={ + "train": "$train" + }, + code=codigo) +builder.wait(name, 1) +print(builder.search_execution_content( + name=name, + pretty_response=True)) print(builder.search_report(file_name='report_tuned')) From 5818957f74c1973b164b32203e6d3847c54994e3 Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Thu, 20 Oct 2022 21:00:32 -0300 Subject: [PATCH 06/11] Update titanic_pycaret_pipe_lo.py --- pipeline/titanic_pycaret_pipe_lo.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pipeline/titanic_pycaret_pipe_lo.py b/pipeline/titanic_pycaret_pipe_lo.py index 691de76..24b1da4 100644 --- a/pipeline/titanic_pycaret_pipe_lo.py +++ b/pipeline/titanic_pycaret_pipe_lo.py @@ -12,8 +12,8 @@ dataset_csv.wait(dataset_name="train", timeout=1) -version = 5 -name = f'titanic{version}' + +name = f'titanic' codigo = """ from pycaret import classification from pycaret.classification import pull,tune_model From cff49ca6271b27ae5280962ae79feb2022e5410b Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Thu, 20 Oct 2022 21:26:21 -0300 Subject: [PATCH 07/11] Create titanicPycaret.py --- pipeline/titanicPycaret.py | 83 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) create mode 100644 pipeline/titanicPycaret.py diff --git a/pipeline/titanicPycaret.py b/pipeline/titanicPycaret.py new file mode 100644 index 0000000..a8dc111 --- /dev/null +++ b/pipeline/titanicPycaret.py @@ -0,0 +1,83 @@ +from learning_orchestra_client.builder.builder import BuilderPycaret +from learning_orchestra_client.dataset.csv import DatasetCsv +from learning_orchestra_client.transform.data_type import TransformDataType +import time + +start = time.time() + +CLUSTER_IP = "http://34.151.210.120" + +dataset_csv = DatasetCsv(CLUSTER_IP) + + +dataset_csv.insert_dataset_async( + url="https://raw.githubusercontent.com/JonatasMiguel/" + "PycaretTitanic/main/train.csv", + dataset_name="train", +) +dataset_csv.wait(dataset_name="train", + timeout=1) + +transform_data_type = TransformDataType(CLUSTER_IP) +type_fields = { + "Age": "number", + "Fare": "number", + "Parch": "number", + "Pclass": "number", + "SibSp": "number" +} +transform_data_type.update_dataset_type_async( + dataset_name=f"train", + types=type_fields) + +transform_data_type.wait(dataset_name=f"train", + timeout=1) + +name = f'titanicPycaret' +code = """ +from pycaret import classification + +clas = classification.setup( + data=train, + target='Survived', + ignore_features=['Ticket'], + numeric_features=['Age','Fare','Parch','Pclass','SibSp'], + session_id = 1, + silent=True) + +best = classification.compare_models(turbo = False) + +best_tuned = classification.tune_model( + best, + n_iter = 100, + choose_better=True) + +best_tuned = classification.create_model(best_tuned) + +score = classification.pull() +score.to_csv('score', sep='\t', encoding='utf-8') + +final_gbr = classification.finalize_model(best_tuned) + +classification.save_model(final_gbr, 'titanic_pycaret') + +response = None +""" +builder = BuilderPycaret(CLUSTER_IP) + +builder.run_pycaret_async( + name=name, + parameters={ + "train": "$train" + }, + code=code) +builder.wait(name, 1) + +print(builder.search_execution_content( + name=name, + pretty_response=True)) + +print(builder.search_builder_register_predictions(file_name='score')) + +end = time.time() +print(f'Run time: {end - start}') From 3131bcdff1efe0efe8841b8bbad11498abdd9eaa Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Thu, 20 Oct 2022 21:27:20 -0300 Subject: [PATCH 08/11] Update simple_titanic_pycaret.py --- .../{titanic_pycaret_pipe_lo.py => simple_titanic_pycaret.py} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename pipeline/{titanic_pycaret_pipe_lo.py => simple_titanic_pycaret.py} (100%) diff --git a/pipeline/titanic_pycaret_pipe_lo.py b/pipeline/simple_titanic_pycaret.py similarity index 100% rename from pipeline/titanic_pycaret_pipe_lo.py rename to pipeline/simple_titanic_pycaret.py From 7aa7d523009dfdbd22562e1b7eb5049030bb14f3 Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Fri, 21 Oct 2022 10:14:33 -0300 Subject: [PATCH 09/11] =?UTF-8?q?titanic=20pipe=20com=20os=20servi=C3=A7os?= =?UTF-8?q?=20divididos?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pipeline/titanic_pycare_2.py | 96 ++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 pipeline/titanic_pycare_2.py diff --git a/pipeline/titanic_pycare_2.py b/pipeline/titanic_pycare_2.py new file mode 100644 index 0000000..062fcf8 --- /dev/null +++ b/pipeline/titanic_pycare_2.py @@ -0,0 +1,96 @@ +from learning_orchestra_client.model.pycaret import ModelPycaret +from learning_orchestra_client.tune.pycaret import TunePycaret +from learning_orchestra_client.dataset.csv import DatasetCsv +from learning_orchestra_client.transform.data_type import TransformDataType +from learning_orchestra_client.predict.pycaret import PredictPycaret +from learning_orchestra_client.evaluate.pycaret import EvaluatePycaret +import time + +start = time.time() + +CLUSTER_IP = "http://34.151.210.120" + +dataset_csv = DatasetCsv(CLUSTER_IP) + +dataset_csv.insert_dataset_async( + url="https://raw.githubusercontent.com/JonatasMiguel/" + "PycaretTitanic/main/train.csv", + dataset_name="train", +) +dataset_csv.wait(dataset_name="train", + timeout=1) + +dataset_csv.insert_dataset_async( + url="https://raw.githubusercontent.com/JonatasMiguel/" + "PycaretTitanic/main/test.csv", + dataset_name="test", +) +dataset_csv.wait(dataset_name="test", + timeout=1) + +transform_data_type = TransformDataType(CLUSTER_IP) +type_fields = { + "Age": "number", + "Fare": "number", + "Parch": "number", + "Pclass": "number", + "SibSp": "number" +} +transform_data_type.update_dataset_type_async( + dataset_name=f"train", + types=type_fields) + +transform_data_type.wait(dataset_name=f"train", + timeout=1) + +model_pycaret = ModelPycaret(CLUSTER_IP) +model_pycaret.create_model_async( + name="titanic_model_pycaret", + module_path="pycaret.classification.functional", + class_name="setup", + class_parameters={ + "data": "$train", + "target":"Survived", + "ignore_features": ['Ticket'], + "numeric_features": ['Age','Fare','Parch','Pclass','SibSp'], + "session_id": 1, +) +model_pycaret.wait("titanic_model_pycaret") + + +tune_pycaret = TunePycaret(CLUSTER_IP) +tune_pycaret.create_model_async( + name="titanic_tune_pycaret", + parent_name="titanic_model_pycaret", + model="titanic_model_pycaret" + module_path="pycaret.classification.functional", + class_name="tune_model", + class_parameters={ + "n_iter" = 100, + "choose_better"=True +) +tune_pycaret.wait("titanic_tune_pycaret") + + +predict_pycaret = PredictPycaret(CLUSTER_IP) +predict_pycaret.create_prediction_async( + name="titanic_predicted_pycaret", + model_name="titanic_tune_pycaret", + parent_name="titanic_model_pycaret", + method_name="predict_model", + parameters={ + "data": "$test" + } +) + +predict_pycaret.wait("titanic_predicted_pycaret") + +evaluate_pycaret = EvaluatePycaret(CLUSTER_IP) +evaluate_pycaret.create_evaluate_async( + name="titanic_evaluate_pycaret", + model_name="titanic_tune_pycaret", + parent_name="titanic_model_pycaret", + method_name="pull", +) + +evaluate_pycaret.wait("titanic_evaluate_pycaret") \ No newline at end of file From 5ce962ec89e33132d7426f25ce4243f8a7d25eea Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Fri, 21 Oct 2022 10:15:48 -0300 Subject: [PATCH 10/11] Add files via upload --- pipeline/titanic_pycare_2.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/pipeline/titanic_pycare_2.py b/pipeline/titanic_pycare_2.py index 062fcf8..d8d3b0e 100644 --- a/pipeline/titanic_pycare_2.py +++ b/pipeline/titanic_pycare_2.py @@ -93,4 +93,18 @@ method_name="pull", ) -evaluate_pycaret.wait("titanic_evaluate_pycaret") \ No newline at end of file +evaluate_pycaret.wait("titanic_evaluate_pycaret") + +show_mnist_evaluate = ''' +print(titanic_evaluate_pycaret) +response = None +''' +function_python.run_function_async( + name="titanic_evaluate_pycaret_print", + parameters={ + "titanic_evaluate_pycaret": "$titanic_evaluate_pycaret" + }, + code=show_mnist_evaluate +) + +function_python.wait("titanic_evaluate_pycaret_print") \ No newline at end of file From 7e83516ed3c6b1277b0274d3ad8156cf5973a98f Mon Sep 17 00:00:00 2001 From: Jonatas Miguel Date: Wed, 26 Oct 2022 22:38:01 -0300 Subject: [PATCH 11/11] =?UTF-8?q?extens=C3=B5es=20pycaret?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- learning_orchestra_client/model/pycaret.py | 8 + learning_orchestra_client/predict/pycaret.py | 8 + learning_orchestra_client/tune/_tune.py | 168 ++++++++++++++ learning_orchestra_client/tune/pycaret.py | 8 + ...tanic_pycare_2.py => titanic_pycaret_2.py} | 218 +++++++++--------- 5 files changed, 301 insertions(+), 109 deletions(-) create mode 100644 learning_orchestra_client/model/pycaret.py create mode 100644 learning_orchestra_client/predict/pycaret.py create mode 100644 learning_orchestra_client/tune/_tune.py create mode 100644 learning_orchestra_client/tune/pycaret.py rename pipeline/{titanic_pycare_2.py => titanic_pycaret_2.py} (95%) diff --git a/learning_orchestra_client/model/pycaret.py b/learning_orchestra_client/model/pycaret.py new file mode 100644 index 0000000..04a5793 --- /dev/null +++ b/learning_orchestra_client/model/pycaret.py @@ -0,0 +1,8 @@ +from ._model import Model + + +class ModelPycaret(Model): + def __init__(self, cluster_ip: str): + self.__api_path = "/api/learningOrchestra/v1/model/pycaret" + self.__cluster_ip = cluster_ip + super().__init__(cluster_ip, self.__api_path) \ No newline at end of file diff --git a/learning_orchestra_client/predict/pycaret.py b/learning_orchestra_client/predict/pycaret.py new file mode 100644 index 0000000..b28aae7 --- /dev/null +++ b/learning_orchestra_client/predict/pycaret.py @@ -0,0 +1,8 @@ +from ._predict import Predict + + +class PredictPycaret(Predict): + def __init__(self, cluster_ip: str): + self.__api_path = "/api/learningOrchestra/v1/predict/pycaret" + self.__cluster_ip = cluster_ip + super().__init__(cluster_ip, self.__api_path) \ No newline at end of file diff --git a/learning_orchestra_client/tune/_tune.py b/learning_orchestra_client/tune/_tune.py new file mode 100644 index 0000000..b82f9a2 --- /dev/null +++ b/learning_orchestra_client/tune/_tune.py @@ -0,0 +1,168 @@ +from learning_orchestra_client.observe.observe import Observer +from learning_orchestra_client._util._response_treat import ResponseTreat +from learning_orchestra_client._util._entity_reader import EntityReader +import requests +from typing import Union + + +class Tune: + __PARENT_NAME_FIELD = "parentName" + __MODEL_NAME_FIELD = "modelName" + __METHOD_NAME_FIELD = "method" + __ClASS_PARAMETERS_FIELD = "methodParameters" + __NAME_FIELD = "name" + __DESCRIPTION_FIELD = "description" + + def __init__(self, cluster_ip: str, api_path: str): + self.__service_url = f'{cluster_ip}{api_path}' + self.__response_treat = ResponseTreat() + self.__cluster_ip = cluster_ip + self.__entity_reader = EntityReader(self.__service_url) + self.__observer = Observer(self.__cluster_ip) + + def create_tune_sync(self, + name: str, + model_name: str, + parent_name: str, + method_name: str, + parameters: dict, + description: str = "", + pretty_response: bool = False) -> \ + Union[dict, str]: + """ + description: This method is responsible to tune models in sync mode + + pretty_response: If true it returns a string, otherwise a dictionary. + name: Is the name of the tune output object that will be created. + parent_name: Is the name of the previous ML step of the pipeline + method_name: is the name of the method to be executed (the ML tool way + to tune models) + parameters: Is the set of parameters used by the method + + return: A JSON object with an error or warning message or a URL + indicating the correct operation. + """ + request_body = { + self.__NAME_FIELD: name, + self.__MODEL_NAME_FIELD: model_name, + self.__PARENT_NAME_FIELD: parent_name, + self.__METHOD_NAME_FIELD: method_name, + self.__ClASS_PARAMETERS_FIELD: parameters, + self.__DESCRIPTION_FIELD: description} + + request_url = self.__service_url + + response = requests.post(url=request_url, json=request_body) + self.__observer.wait(name) + + return self.__response_treat.treatment(response, pretty_response) + + def create_tune_async(self, + name: str, + model_name: str, + parent_name: str, + method_name: str, + parameters: dict, + description: str = "", + pretty_response: bool = False) -> \ + Union[dict, str]: + """ + description: This method is responsible to tune models in async mode. + A wait method call is mandatory due to the asynchronous aspect. + + pretty_response: If true it returns a string, otherwise a dictionary. + name: Is the name of the tune output object that will be created. + parent_name: Is the name of the previous ML step of the pipeline + method_name: is the name of the method to be executed (the ML tool way + to tune models) + parameters: Is the set of parameters used by the method + + return: A JSON object with an error or warning message or a URL + indicating the correct operation. + """ + request_body = { + self.__NAME_FIELD: name, + self.__MODEL_NAME_FIELD: model_name, + self.__PARENT_NAME_FIELD: parent_name, + self.__METHOD_NAME_FIELD: method_name, + self.__ClASS_PARAMETERS_FIELD: parameters, + self.__DESCRIPTION_FIELD: description} + + request_url = self.__service_url + + response = requests.post(url=request_url, json=request_body) + return self.__response_treat.treatment(response, pretty_response) + + def search_all_tunes(self, pretty_response: bool = False) \ + -> Union[dict, str]: + """ + description: This method retrieves all tune metadata, i.e., it does + not retrieve the tune content. + + pretty_response: If true it returns a string, otherwise a dictionary. + + return: All predict metadata stored in Learning Orchestra or an empty + result. + """ + response = self.__entity_reader.read_all_instances_from_entity() + return self.__response_treat.treatment(response, pretty_response) + + def delete_tune(self, name: str, pretty_response=False) \ + -> Union[dict, str]: + """ + description: This method is responsible for deleting the tune step. + This delete operation is asynchronous, so it does not lock the caller + until the deletion finished. Instead, it returns a JSON object with a + URL for a future use. The caller uses the URL for delete checks. + + pretty_response: If true it returns a string, otherwise a dictionary. + name: Represents the tune name. + + return: JSON object with an error message, a warning message or a + correct delete message + """ + request_url = f'{self.__service_url}/{name}' + + response = requests.delete(request_url) + return self.__response_treat.treatment(response, pretty_response) + + def search_tune_content(self, + name: str, + query: dict = {}, + limit: int = 10, + skip: int = 0, + pretty_response: bool = False) \ + -> Union[dict, str]: + """ + description: This method is responsible for retrieving all the tune + tuples or registers, as well as the metadata content + + pretty_response: If true it returns a string, otherwise a dictionary. + name: Is the name of the tune object + query: Query to make in MongoDB(default: empty query) + limit: Number of rows to return in pagination(default: 10) (maximum is + set at 20 rows per request) + skip: Number of rows to skip in pagination(default: 0) + + return: A page with some tunes inside or an error if there + is no such tune object. The current page is also returned to be used in + future content requests. + """ + response = self.__entity_reader.read_entity_content( + name, query, limit, skip) + + return self.__response_treat.treatment(response, pretty_response) + + def wait(self, name: str, timeout: int = None) -> dict: + """ + description: This method is responsible to create a synchronization + barrier for the create_tune_async method, delete_tune method. + + name: Represents the tune name. + timeout: Represents the time in seconds to wait for a tune to + finish its run. + + return: JSON object with an error message, a warning message or a + correct tune result + """ + return self.__observer.wait(name, timeout) diff --git a/learning_orchestra_client/tune/pycaret.py b/learning_orchestra_client/tune/pycaret.py new file mode 100644 index 0000000..809d8e2 --- /dev/null +++ b/learning_orchestra_client/tune/pycaret.py @@ -0,0 +1,8 @@ +from ._tune import Tune + + +class TunePycaret(Tune): + def __init__(self, cluster_ip: str): + self.__api_path = "/api/learningOrchestra/v1/tune/pycaret" + self.__cluster_ip = cluster_ip + super().__init__(cluster_ip, self.__api_path) \ No newline at end of file diff --git a/pipeline/titanic_pycare_2.py b/pipeline/titanic_pycaret_2.py similarity index 95% rename from pipeline/titanic_pycare_2.py rename to pipeline/titanic_pycaret_2.py index d8d3b0e..3c59876 100644 --- a/pipeline/titanic_pycare_2.py +++ b/pipeline/titanic_pycaret_2.py @@ -1,110 +1,110 @@ -from learning_orchestra_client.model.pycaret import ModelPycaret -from learning_orchestra_client.tune.pycaret import TunePycaret -from learning_orchestra_client.dataset.csv import DatasetCsv -from learning_orchestra_client.transform.data_type import TransformDataType -from learning_orchestra_client.predict.pycaret import PredictPycaret -from learning_orchestra_client.evaluate.pycaret import EvaluatePycaret -import time - -start = time.time() - -CLUSTER_IP = "http://34.151.210.120" - -dataset_csv = DatasetCsv(CLUSTER_IP) - -dataset_csv.insert_dataset_async( - url="https://raw.githubusercontent.com/JonatasMiguel/" - "PycaretTitanic/main/train.csv", - dataset_name="train", -) -dataset_csv.wait(dataset_name="train", - timeout=1) - -dataset_csv.insert_dataset_async( - url="https://raw.githubusercontent.com/JonatasMiguel/" - "PycaretTitanic/main/test.csv", - dataset_name="test", -) -dataset_csv.wait(dataset_name="test", - timeout=1) - -transform_data_type = TransformDataType(CLUSTER_IP) -type_fields = { - "Age": "number", - "Fare": "number", - "Parch": "number", - "Pclass": "number", - "SibSp": "number" -} -transform_data_type.update_dataset_type_async( - dataset_name=f"train", - types=type_fields) - -transform_data_type.wait(dataset_name=f"train", - timeout=1) - -model_pycaret = ModelPycaret(CLUSTER_IP) -model_pycaret.create_model_async( - name="titanic_model_pycaret", - module_path="pycaret.classification.functional", - class_name="setup", - class_parameters={ - "data": "$train", - "target":"Survived", - "ignore_features": ['Ticket'], - "numeric_features": ['Age','Fare','Parch','Pclass','SibSp'], - "session_id": 1, -) -model_pycaret.wait("titanic_model_pycaret") - - -tune_pycaret = TunePycaret(CLUSTER_IP) -tune_pycaret.create_model_async( - name="titanic_tune_pycaret", - parent_name="titanic_model_pycaret", - model="titanic_model_pycaret" - module_path="pycaret.classification.functional", - class_name="tune_model", - class_parameters={ - "n_iter" = 100, - "choose_better"=True -) -tune_pycaret.wait("titanic_tune_pycaret") - - -predict_pycaret = PredictPycaret(CLUSTER_IP) -predict_pycaret.create_prediction_async( - name="titanic_predicted_pycaret", - model_name="titanic_tune_pycaret", - parent_name="titanic_model_pycaret", - method_name="predict_model", - parameters={ - "data": "$test" - } -) - -predict_pycaret.wait("titanic_predicted_pycaret") - -evaluate_pycaret = EvaluatePycaret(CLUSTER_IP) -evaluate_pycaret.create_evaluate_async( - name="titanic_evaluate_pycaret", - model_name="titanic_tune_pycaret", - parent_name="titanic_model_pycaret", - method_name="pull", -) - -evaluate_pycaret.wait("titanic_evaluate_pycaret") - -show_mnist_evaluate = ''' -print(titanic_evaluate_pycaret) -response = None -''' -function_python.run_function_async( - name="titanic_evaluate_pycaret_print", - parameters={ - "titanic_evaluate_pycaret": "$titanic_evaluate_pycaret" - }, - code=show_mnist_evaluate -) - +from learning_orchestra_client.model.pycaret import ModelPycaret +from learning_orchestra_client.tune.pycaret import TunePycaret +from learning_orchestra_client.dataset.csv import DatasetCsv +from learning_orchestra_client.transform.data_type import TransformDataType +from learning_orchestra_client.predict.pycaret import PredictPycaret +from learning_orchestra_client.evaluate.pycaret import EvaluatePycaret +import time + +start = time.time() + +CLUSTER_IP = "http://34.151.210.120" + +dataset_csv = DatasetCsv(CLUSTER_IP) + +dataset_csv.insert_dataset_async( + url="https://raw.githubusercontent.com/JonatasMiguel/" + "PycaretTitanic/main/train.csv", + dataset_name="train", +) +dataset_csv.wait(dataset_name="train", + timeout=1) + +dataset_csv.insert_dataset_async( + url="https://raw.githubusercontent.com/JonatasMiguel/" + "PycaretTitanic/main/test.csv", + dataset_name="test", +) +dataset_csv.wait(dataset_name="test", + timeout=1) + +transform_data_type = TransformDataType(CLUSTER_IP) +type_fields = { + "Age": "number", + "Fare": "number", + "Parch": "number", + "Pclass": "number", + "SibSp": "number" +} +transform_data_type.update_dataset_type_async( + dataset_name=f"train", + types=type_fields) + +transform_data_type.wait(dataset_name=f"train", + timeout=1) + +model_pycaret = ModelPycaret(CLUSTER_IP) +model_pycaret.create_model_async( + name="titanic_model_pycaret", + module_path="pycaret.classification.functional", + class_name="setup", + class_parameters={ + "data": "$train", + "target":"Survived", + "ignore_features": ['Ticket'], + "numeric_features": ['Age','Fare','Parch','Pclass','SibSp'], + "session_id": 1, +) +model_pycaret.wait("titanic_model_pycaret") + + +tune_pycaret = TunePycaret(CLUSTER_IP) +tune_pycaret.create_tune_async( + name="titanic_tune_pycaret", + parent_name="titanic_model_pycaret", + model="titanic_model_pycaret" + module_path="pycaret.classification.functional", + class_name="tune_model", + class_parameters={ + "n_iter" = 100, + "choose_better"=True +) +tune_pycaret.wait("titanic_tune_pycaret") + + +predict_pycaret = PredictPycaret(CLUSTER_IP) +predict_pycaret.create_prediction_async( + name="titanic_predicted_pycaret", + model_name="titanic_tune_pycaret", + parent_name="titanic_model_pycaret", + method_name="predict_model", + parameters={ + "data": "$test" + } +) + +predict_pycaret.wait("titanic_predicted_pycaret") + +evaluate_pycaret = EvaluatePycaret(CLUSTER_IP) +evaluate_pycaret.create_evaluate_async( + name="titanic_evaluate_pycaret", + model_name="titanic_tune_pycaret", + parent_name="titanic_model_pycaret", + method_name="pull", +) + +evaluate_pycaret.wait("titanic_evaluate_pycaret") + +show_mnist_evaluate = ''' +print(titanic_evaluate_pycaret) +response = None +''' +function_python.run_function_async( + name="titanic_evaluate_pycaret_print", + parameters={ + "titanic_evaluate_pycaret": "$titanic_evaluate_pycaret" + }, + code=show_mnist_evaluate +) + function_python.wait("titanic_evaluate_pycaret_print") \ No newline at end of file