From c34581359b293aaf1af8da87355d7240824f64f0 Mon Sep 17 00:00:00 2001 From: bert2code <68850622+bert2code@users.noreply.github.com> Date: Sat, 26 Sep 2020 01:24:46 +0600 Subject: [PATCH 1/6] Update download_dataset.py --- script/download_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/download_dataset.py b/script/download_dataset.py index b6f290dc..e6308706 100644 --- a/script/download_dataset.py +++ b/script/download_dataset.py @@ -21,7 +21,7 @@ os.makedirs(destination_dir) os.chdir(destination_dir) - for language in ('python', 'javascript', 'java', 'ruby', 'php', 'go'): + for language in ('java'): call(['wget', 'https://s3.amazonaws.com/code-search-net/CodeSearchNet/v2/{}.zip'.format(language), '-P', destination_dir, '-O', '{}.zip'.format(language)]) call(['unzip', '{}.zip'.format(language)]) call(['rm', '{}.zip'.format(language)]) From 5e9c4ca867d2b9c628674d006331322b243472b9 Mon Sep 17 00:00:00 2001 From: bert2code <68850622+bert2code@users.noreply.github.com> Date: Sat, 26 Sep 2020 01:25:15 +0600 Subject: [PATCH 2/6] Update data_dirs_test.txt --- src/data_dirs_test.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/data_dirs_test.txt b/src/data_dirs_test.txt index d5a4f530..71a2246f 100644 --- a/src/data_dirs_test.txt +++ b/src/data_dirs_test.txt @@ -1,6 +1 @@ -../resources/data/python/final/jsonl/test -../resources/data/javascript/final/jsonl/test ../resources/data/java/final/jsonl/test -../resources/data/php/final/jsonl/test -../resources/data/ruby/final/jsonl/test -../resources/data/go/final/jsonl/test \ No newline at end of file From 716aa989bf1fb542e74719b8ec8512857fe76af2 Mon Sep 17 00:00:00 2001 From: bert2code <68850622+bert2code@users.noreply.github.com> Date: Sat, 26 Sep 2020 01:25:29 +0600 Subject: [PATCH 3/6] Update data_dirs_train.txt --- src/data_dirs_train.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/data_dirs_train.txt b/src/data_dirs_train.txt index 28827543..2709b415 100644 --- a/src/data_dirs_train.txt +++ b/src/data_dirs_train.txt @@ -1,6 +1 @@ -../resources/data/python/final/jsonl/train -../resources/data/javascript/final/jsonl/train ../resources/data/java/final/jsonl/train -../resources/data/php/final/jsonl/train -../resources/data/ruby/final/jsonl/train -../resources/data/go/final/jsonl/train \ No newline at end of file From 9fc22b3727051bad0009bdf4f9bed0426dea2750 Mon Sep 17 00:00:00 2001 From: bert2code <68850622+bert2code@users.noreply.github.com> Date: Sat, 26 Sep 2020 01:25:46 +0600 Subject: [PATCH 4/6] Update data_dirs_valid.txt --- src/data_dirs_valid.txt | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/data_dirs_valid.txt b/src/data_dirs_valid.txt index 949e70a3..721ed021 100644 --- a/src/data_dirs_valid.txt +++ b/src/data_dirs_valid.txt @@ -1,6 +1 @@ -../resources/data/python/final/jsonl/valid -../resources/data/javascript/final/jsonl/valid ../resources/data/java/final/jsonl/valid -../resources/data/php/final/jsonl/valid -../resources/data/ruby/final/jsonl/valid -../resources/data/go/final/jsonl/valid \ No newline at end of file From 88cc8667204542794d341ef3653b1d1d8cfdb987 Mon Sep 17 00:00:00 2001 From: bert2code <68850622+bert2code@users.noreply.github.com> Date: Sat, 26 Sep 2020 01:27:20 +0600 Subject: [PATCH 5/6] Update predict.py --- src/predict.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/predict.py b/src/predict.py index 08690f46..60e7cfbe 100755 --- a/src/predict.py +++ b/src/predict.py @@ -113,7 +113,7 @@ def query_model(query, model, indices, language, topk=100): hyper_overrides={}) predictions = [] - for language in ('python', 'go', 'javascript', 'java', 'php', 'ruby'): + for language in ['java']: print("Evaluating language: %s" % language) definitions = pickle.load(open('../resources/data/{}_dedupe_definitions_v2.pkl'.format(language), 'rb')) indexes = [{'code_tokens': d['function_tokens'], 'language': d['language']} for d in tqdm(definitions)] From a4135f95d9cccd0c87fc50ec5f4404843fefff2d Mon Sep 17 00:00:00 2001 From: bert2code <68850622+bert2code@users.noreply.github.com> Date: Sat, 26 Sep 2020 01:27:47 +0600 Subject: [PATCH 6/6] Update download_dataset.py --- script/download_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/script/download_dataset.py b/script/download_dataset.py index e6308706..5e12d59a 100644 --- a/script/download_dataset.py +++ b/script/download_dataset.py @@ -21,7 +21,7 @@ os.makedirs(destination_dir) os.chdir(destination_dir) - for language in ('java'): + for language in ['java']: call(['wget', 'https://s3.amazonaws.com/code-search-net/CodeSearchNet/v2/{}.zip'.format(language), '-P', destination_dir, '-O', '{}.zip'.format(language)]) call(['unzip', '{}.zip'.format(language)]) call(['rm', '{}.zip'.format(language)])