Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
N
NASA Battery Dataset
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
GC TICc
16TICc19 HES-SO - Predict
NASA Battery Dataset
Commits
7707720e
Verified
Commit
7707720e
authored
6 years ago
by
Etienne Frank
Browse files
Options
Downloads
Patches
Plain Diff
WIP: Explore the automl
parent
5e5a3646
Branches
auto-scikit-learn
No related tags found
No related merge requests found
Pipeline
#1452
failed
6 years ago
Changes
5
Pipelines
1
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
.gitignore
+3
-0
3 additions, 0 deletions
.gitignore
.gitlab-ci.yml
+11
-8
11 additions, 8 deletions
.gitlab-ci.yml
Pipfile
+6
-0
6 additions, 0 deletions
Pipfile
Pipfile.lock
+507
-92
507 additions, 92 deletions
Pipfile.lock
explore_auto_scikit.py
+421
-0
421 additions, 0 deletions
explore_auto_scikit.py
with
948 additions
and
100 deletions
.gitignore
+
3
−
0
View file @
7707720e
output_graphs/
output_graphs/
batteries_data_temp/
results/
current_results/
# Byte-compiled / optimized / DLL files
# Byte-compiled / optimized / DLL files
...
...
This diff is collapsed.
Click to expand it.
.gitlab-ci.yml
+
11
−
8
View file @
7707720e
before_script
:
# before_script:
-
pipenv install
# - pkill -f apt
# - apt install -y python-dev
lint
:
# - apt install -y python3-dev
tags
:
# - pipenv install
-
ubuntu-docker
#
script
:
# lint:
-
pipenv run flake8
# tags:
# - ubuntu-docker
# script:
# - pipenv run flake8
This diff is collapsed.
Click to expand it.
Pipfile
+
6
−
0
View file @
7707720e
...
@@ -10,6 +10,12 @@ matplotlib = "*"
...
@@ -10,6 +10,12 @@ matplotlib = "*"
pillow
=
"*"
pillow
=
"*"
"flake8"
=
"*"
"flake8"
=
"*"
pylint
=
"*"
pylint
=
"*"
cython
=
"*"
pandas
=
"*"
pytest
=
"*"
openpyxl
=
"*"
"auto-sklearn2"
=
{
ref
=
"8bdcba15caa28cb4336d9cb6ee4108078ab6d8a2"
,
git
=
"git://github.com/automl/auto-sklearn.git"
}
auto-sklearn
=
"*"
[dev-packages]
[dev-packages]
...
...
This diff is collapsed.
Click to expand it.
Pipfile.lock
+
507
−
92
View file @
7707720e
This diff is collapsed.
Click to expand it.
explore_auto_scikit.py
0 → 100644
+
421
−
0
View file @
7707720e
import
shutil
import
glob
import
pickle
import
os
import
traceback
import
time
import
autosklearn.classification
import
sys
from
extraction.retrieve_battery_data
import
extract_charge_discharge_impedance
from
functools
import
reduce
import
itertools
import
numpy
as
np
import
sklearn.model_selection
import
sklearn.datasets
import
sklearn.metrics
import
multiprocessing
from
concurrent.futures
import
ThreadPoolExecutor
from
autosklearn.constants
import
*
from
autosklearn.classification
import
AutoSklearnClassifier
from
autosklearn.metrics
import
accuracy
from
sklearn.metrics
import
precision_recall_fscore_support
from
sklearn.metrics
import
f1_score
from
sklearn.externals
import
joblib
from
sklearn.metrics
import
confusion_matrix
import
pandas
as
pd
# import concurrent.futures
# np.set_printoptions(threshold=np.nan)
SEPARATOR
=
"
-----------------------------------
"
RESULTS_FOLDER
=
"
current_results
"
MODEL_FILE_SEP
=
"
___
"
def
chunk_it
(
seq
,
num
):
avg
=
len
(
seq
)
/
float
(
num
)
last
=
0
out
=
[]
while
last
<=
num
:
res
=
[
last
]
*
(
len
(
seq
[
int
(
last
*
avg
):
int
((
last
+
1
)
*
avg
)]))
out
+=
res
last
+=
1
return
out
# DISCRETISATION_TYPES = ["mean", "median", "standard-deviation"]
DISCRETISATION_TYPES
=
[
"
mean
"
,
"
gradiant_mean
"
,
"
median
"
,
"
gradiant_median
"
,
"
standard-deviation
"
,
"
gradiant_std
"
,
]
BATTERY_BASE
=
"
./data/BatteryAgingARC_25-44/
"
TEST_BATTERIES
=
[
BATTERY_BASE
+
"
B0025.mat
"
]
ALL_BATTERIES
=
[
# BATTERY_BASE + "B0025.mat",
BATTERY_BASE
+
"
B0026.mat
"
,
BATTERY_BASE
+
"
B0027.mat
"
,
BATTERY_BASE
+
"
B0028.mat
"
,
# BATTERY_BASE + "B0033.mat",
# BATTERY_BASE + "B0034.mat",
]
charges_params
=
[
"
voltage_measured
"
,
"
current_measured
"
,
"
temperature_measured
"
,
"
current_charge
"
,
"
voltage_charge
"
,
]
discharges_params
=
charges_params
+
[
"
capacity
"
]
# capacity not same length
impedance_params
=
[
# "re", # float_
# "rct", # float_
"
sense_current
"
,
# Complexes
"
battery_current
"
,
# Complexes
"
current_ratio
"
,
# Complexes
"
battery_impedance
"
,
# Complexes but multiple array of 1 complexe
# "rectified_impedance", # Complexes but multiple array of 1 complexe # not same length
]
def
discretize
(
array
,
discretisation
):
# array split should copy when split > lenght instead of empty arrays
splitted
=
np
.
array_split
(
array
,
discretisation
[
"
split
"
])
out
=
[]
if
"
mean
"
in
discretisation
[
"
types
"
]:
out
.
append
([
np
.
mean
(
split
)
for
split
in
splitted
])
if
"
median
"
in
discretisation
[
"
types
"
]:
out
.
append
([
np
.
median
(
split
)
for
split
in
splitted
])
if
"
standard deviation
"
in
discretisation
[
"
types
"
]:
out
.
append
([
np
.
std
(
split
)
for
split
in
splitted
])
splitted_gradient
=
np
.
array_split
(
np
.
gradient
(
array
),
discretisation
[
"
split
"
])
if
"
gradiant_mean
"
in
discretisation
[
"
types
"
]:
out
.
append
([
np
.
mean
(
split
)
for
split
in
splitted_gradient
])
if
"
gradiant_median
"
in
discretisation
[
"
types
"
]:
out
.
append
([
np
.
median
(
split
)
for
split
in
splitted_gradient
])
if
"
gradiant_std
"
in
discretisation
[
"
types
"
]:
out
.
append
([
np
.
std
(
split
)
for
split
in
splitted_gradient
])
return
out
# out = np.array(out)
# return out.T
def
append_params
(
result_cycles
,
cycles
,
used_parameters
,
discretisation
):
"""
One line contain 1 moment of every feature
"""
total_cycles_length
=
0
for
cycle
in
cycles
:
# # Raw
# cycle_length = len(cycle[used_parameters[0]])
# total_cycles_length += cycle_length
# for i in range(cycle_length):
# time_slice = []
# for param in used_parameters:
# time_slice.append(np.nan_to_num(cycle[param][i]))
# result_cycles.append(time_slice)
# Discretisation
cycle_length
=
len
(
cycle
[
used_parameters
[
0
]])
if
cycle_length
<
discretisation
[
"
split
"
]:
raise
"
Discretisation split is too big in comparison with cycle length
"
discretized
=
[]
for
param
in
used_parameters
:
np
.
nan_to_num
(
cycle
[
param
],
copy
=
False
)
mean_median_std
=
discretize
(
cycle
[
param
],
discretisation
)
discretized
+=
mean_median_std
transposed_discretized
=
np
.
array
(
discretized
).
T
for
time_slice
in
transposed_discretized
:
result_cycles
.
append
(
time_slice
)
total_cycles_length
+=
len
(
transposed_discretized
)
# CycleWay
# reduce all features together but before, each feature length N should be reduced to a fixed length
# result_cycles.append(np.nan_to_num(np.array(
# reduce((lambda acc, param: acc + cycle[param].tolist()), used_parameters, [])
# )))
return
total_cycles_length
def
combinations
(
array
):
return
list
(
itertools
.
chain
(
*
[
itertools
.
combinations
(
array
,
i
+
1
)
for
i
,
_
in
enumerate
(
array
)]))
def
get_charges
(
filepath
):
charge_cycles
,
discharge_cycles
,
impedance_cycles
=
extract_charge_discharge_impedance
(
filepath
)
return
charge_cycles
def
get_discharges
(
filepath
):
charge_cycles
,
discharge_cycles
,
impedance_cycles
=
extract_charge_discharge_impedance
(
filepath
)
return
discharge_cycles
def
get_impedance
(
filepath
):
charge_cycles
,
discharge_cycles
,
impedance_cycles
=
extract_charge_discharge_impedance
(
filepath
)
return
impedance_cycles
def
load_batteries
(
extract_function
,
params
,
all_batteries
=
ALL_BATTERIES
,
class_count
=-
3
,
discretisation
=
None
):
all_batteries_number
=
[
bat
.
split
(
os
.
sep
)[
-
1
].
split
(
"
.
"
)[
0
]
for
bat
in
all_batteries
]
# cycles_filename = "data__" + "-".join(params) + "__" + "-".join(all_batteries_number) + "__" + str(class_count) + ".p"
cycles_filename
=
"
__
"
.
join
([
"
batteries_data_temp/data
"
,
"
-
"
.
join
(
params
),
"
-
"
.
join
(
all_batteries_number
),
str
(
class_count
),
"
-
"
.
join
(
discretisation
[
"
types
"
]),
str
(
discretisation
[
"
split
"
]),
])
+
"
.p
"
X_cycles
=
[]
y_cycles
=
[]
if
os
.
path
.
exists
(
cycles_filename
):
loaded
=
pickle
.
load
(
open
(
cycles_filename
,
"
rb
"
)
)
return
loaded
[
"
x
"
],
loaded
[
"
y
"
]
else
:
for
filepath
in
all_batteries
:
cycles
=
extract_function
(
filepath
)
# charge_cycles, discharge_cycles, impedance_cycles = extract_charge_discharge_impedance(filepath)
total_cycles_length
=
append_params
(
X_cycles
,
cycles
,
params
,
discretisation
)
# add labels
y_cycles
+=
chunk_it
([
-
1
]
*
total_cycles_length
,
class_count
)
# cycle way
# X_cycles = np.zeros([len(X_cycles),len(max(X_cycles,key = lambda x: len(x)))]) # padding #CycleWay
X_cycles
=
np
.
array
(
X_cycles
)
y_cycles
=
np
.
array
(
y_cycles
)
pickle
.
dump
(
{
"
x
"
:
X_cycles
,
"
y
"
:
y_cycles
}
,
open
(
cycles_filename
,
"
wb
"
)
)
return
X_cycles
,
y_cycles
def
save_dataframe
(
cv_results
,
model_file_name
):
df
=
pd
.
DataFrame
(
cv_results
)
writer
=
pd
.
ExcelWriter
(
model_file_name
+
"
.xlsx
"
)
df
.
to_excel
(
writer
,
"
automl_dataframe
"
)
writer
.
save
()
def
auto_ML
(
options
):
title
,
params
,
extract_function
,
class_count
,
discretisation
,
folds
=
options
print
(
"
--------------------------------------------------------------------------------
"
)
print
(
title
)
print
(
params
)
print
(
"
--------------------------------------------------------------------------------
"
)
X_cycles
,
y_cycles
=
load_batteries
(
extract_function
,
params
,
ALL_BATTERIES
,
class_count
,
discretisation
)
# y_cycles = chunk_it(X_cycles, class_count) can be removed
# y_cycles = np.array(y_cycles) can be removed
print
(
X_cycles
.
shape
)
print
(
y_cycles
.
shape
)
# before split and are OK
# 40'000 inputs and 6 classes CV !not certain!
# 930'000 inputs and 6 classes holdout !not certain!
# 25'860 inputs and 3 classes CV
MAX_SIZE
=
258600000
X_train
,
X_test
,
y_train
,
y_test
=
\
sklearn
.
model_selection
.
train_test_split
(
X_cycles
,
y_cycles
,
random_state
=
1
)
X_train
=
X_train
[:
MAX_SIZE
]
y_train
=
y_train
[:
MAX_SIZE
]
# 0.3 is quite good
# k = 0.3
k
=
0.5
automl
=
autosklearn
.
classification
.
AutoSklearnClassifier
(
# time_left_for_this_task=69,
# per_run_time_limit=35,
time_left_for_this_task
=
int
(
3600
*
k
),
per_run_time_limit
=
int
(
360
*
k
),
ensemble_size
=
int
(
50
),
ensemble_nbest
=
int
(
200
),
# ml_memory_limit=1024,
# shared_mode=True,
# ensemble_size=50,
# ensemble_nbest=200,
# tmp_folder=tmp_folder,
# TODO Use CV instead of HOLDOUT
# resampling_strategy='holdout',
resampling_strategy
=
'
cv
'
,
resampling_strategy_arguments
=
{
'
folds
'
:
folds
},
# output_folder=output_folder,
# initial_configurations_via_metalearning=0,
# seed=SEED,
# time_left_for_this_task=3600*18,
# per_run_time_limit=360*18,
ml_memory_limit
=
28024
,
# delete_tmp_folder_after_terminate=False,
# delete_output_folder_after_terminate=False,
)
automl
.
fit
(
X_train
.
copy
(),
y_train
.
copy
())
if
automl
.
resampling_strategy
==
"
cv
"
:
automl
.
refit
(
X_train
.
copy
(),
y_train
.
copy
())
y_pred
=
automl
.
predict
(
X_test
,
n_jobs
=-
1
)
model_file_name
=
os
.
path
.
join
(
RESULTS_FOLDER
,
MODEL_FILE_SEP
.
join
([
title
,
"
-
"
.
join
(
params
),
str
(
class_count
),
"
-
"
.
join
(
discretisation
[
"
types
"
]),
str
(
discretisation
[
"
split
"
]),
"
folds
"
+
str
(
folds
),
])
+
"
.joblib
"
)
print
(
model_file_name
)
save_dataframe
(
automl
.
cv_results_
,
model_file_name
[:
-
7
])
joblib
.
dump
(
automl
,
model_file_name
)
test_battery_result
=
test_model_with_test_battery
(
model_file_name
)
results
=
[
title
,
"
Params:
"
+
str
(
params
),
"
Discretisation:
"
+
str
(
discretisation
),
"
Class count:
"
+
str
(
class_count
),
"
Folds:
"
+
str
(
folds
),
"
Accuracy score:
"
+
str
(
sklearn
.
metrics
.
accuracy_score
(
y_test
,
y_pred
)),
"
F1 score:
"
+
str
(
precision_recall_fscore_support
(
y_test
,
y_pred
,
average
=
"
weighted
"
)),
str
(
confusion_matrix
(
y_test
,
y_pred
)),
]
+
test_battery_result
+
[
str
(
automl
.
sprint_statistics
()),
str
(
automl
.
show_models
()),
# str(automl.cv_results_),
]
print
(
results
)
return
results
def
get_options
(
min_length
=
0
):
class_count
=
3
# 10, 50, 100, 200
charge_options
=
[]
discharge_options
=
[]
impedance_options
=
[]
# charge_options = [("charge", param, get_charges, class_count) for param in combinations(charges_params) if len(param) >= min_length]
# charge_options = [("charge", charges_params, get_charges, class_count, {"types": discre_type, "split":100}) for discre_type in combinations(DISCRETISATION_TYPES)]
# discharge_options = [("discharge", charges_params, get_discharges, class_count, {"types": discre_type, "split":100}) for discre_type in combinations(DISCRETISATION_TYPES)]
# discharge_options = [("discharge", param, get_discharges, class_count) for param in combinations(charges_params) if len(param) >= min_length]
# impedance_options = [("impedance", param, get_impedance, class_count) for param in combinations(impedance_params) if len(param) >= min_length]
# charge_options = [("charge", charges_params, get_charges, class_count, {"types": DISCRETISATION_TYPES, "split":split}) for split in range(1,11)]
charge_options
=
[
# ("charge", charges_params, get_charges, class_count, {"types": DISCRETISATION_TYPES, "split":10}, 10),
(
"
charge
"
,
charges_params
,
get_charges
,
class_count
,
{
"
types
"
:
DISCRETISATION_TYPES
,
"
split
"
:
6
},
5
),
(
"
charge
"
,
charges_params
,
get_charges
,
class_count
,
{
"
types
"
:
DISCRETISATION_TYPES
,
"
split
"
:
6
},
10
),
(
"
charge
"
,
charges_params
,
get_charges
,
class_count
,
{
"
types
"
:
DISCRETISATION_TYPES
,
"
split
"
:
6
},
15
),
(
"
charge
"
,
charges_params
,
get_charges
,
class_count
,
{
"
types
"
:
DISCRETISATION_TYPES
,
"
split
"
:
6
},
20
),
# ("charge", charges_params, get_charges, class_count, {"types": DISCRETISATION_TYPES, "split":50}, 10),
# ("charge", charges_params, get_charges, class_count, {"types": DISCRETISATION_TYPES, "split":100}, 10),
# # ("charge", charges_params, get_charges, class_count, {"types": DISCRETISATION_TYPES, "split":1}, 10),
# ("charge", charges_params, get_charges, class_count, {"types": DISCRETISATION_TYPES, "split":200}, 10),
]
# discharge_options = [("discharge", charges_params, get_discharges, class_count, {"types": DISCRETISATION_TYPES, "split":100})]
# impedance_options = [("impedance", impedance_params, get_impedance, class_count, {"types": DISCRETISATION_TYPES, "split":20})]
all_options
=
discharge_options
+
impedance_options
+
charge_options
return
all_options
def
writeln
(
file
,
text
):
file
.
write
(
text
+
"
\n
"
)
def
init_folder
():
if
os
.
path
.
exists
(
RESULTS_FOLDER
):
shutil
.
rmtree
(
RESULTS_FOLDER
)
if
not
os
.
path
.
exists
(
RESULTS_FOLDER
):
os
.
makedirs
(
RESULTS_FOLDER
)
results_file
=
os
.
path
.
join
(
RESULTS_FOLDER
,
"
result.txt
"
)
failed_file
=
os
.
path
.
join
(
RESULTS_FOLDER
,
"
failed.txt
"
)
open
(
results_file
,
'
w
'
).
close
()
open
(
failed_file
,
'
w
'
).
close
()
return
results_file
,
failed_file
def
execute_auto_ML
():
start_time
=
time
.
time
()
print
(
start_time
)
all_options
=
get_options
(
min_length
=
0
)
results_file
,
failed_file
=
init_folder
()
# results = list(map(auto_ML, all_options))
for
option
in
all_options
:
try
:
result
=
auto_ML
(
option
)
with
open
(
results_file
,
'
a
'
)
as
the_file
:
writeln
(
the_file
,
SEPARATOR
)
for
line
in
result
:
writeln
(
the_file
,
str
(
line
))
writeln
(
the_file
,
SEPARATOR
)
except
:
with
open
(
failed_file
,
'
a
'
)
as
the_file
:
writeln
(
the_file
,
SEPARATOR
)
writeln
(
the_file
,
str
(
option
))
writeln
(
the_file
,
str
(
traceback
.
format_exc
()))
writeln
(
the_file
,
SEPARATOR
)
seconds
=
time
.
time
()
-
start_time
print
(
"
execution time
"
,
int
(
seconds
/
60
/
60
),
"
h
"
,
int
(
seconds
/
60
)
%
60
,
"
m
"
,
int
(
seconds
)
%
60
,
"
s
"
)
def
test_model_with_test_battery
(
model_filepath
):
batteries
=
TEST_BATTERIES
clean_model_filepath
=
model_filepath
.
split
(
os
.
sep
)[
-
1
].
split
(
"
.
"
)[
0
].
split
(
MODEL_FILE_SEP
)
params
=
clean_model_filepath
[
1
].
split
(
"
-
"
)
class_count
=
int
(
clean_model_filepath
[
2
])
discretisation
=
{
"
types
"
:
clean_model_filepath
[
3
].
split
(
"
-
"
),
"
split
"
:
int
(
clean_model_filepath
[
4
]),
}
if
clean_model_filepath
[
0
]
==
"
charge
"
:
extract_function
=
get_charges
if
clean_model_filepath
[
0
]
==
"
discharge
"
:
extract_function
=
get_discharges
if
clean_model_filepath
[
0
]
==
"
impedance
"
:
extract_function
=
get_impedance
X_cycles
,
y_true
=
load_batteries
(
extract_function
,
params
,
batteries
,
class_count
,
discretisation
)
try
:
loaded_automl
=
joblib
.
load
(
model_filepath
)
except
:
print
(
"
Error in model loading->
"
+
model_filepath
)
print
(
loaded_automl
)
y_pred
=
loaded_automl
.
predict
(
X_cycles
,
n_jobs
=-
1
)
result
=
[
"
Batteries:
"
+
str
(
batteries
),
"
Accuracy score for battery:
"
+
str
(
sklearn
.
metrics
.
accuracy_score
(
y_true
,
y_pred
)),
str
(
precision_recall_fscore_support
(
y_true
,
y_pred
,
average
=
"
weighted
"
)),
str
(
confusion_matrix
(
y_true
,
y_pred
)),
]
return
result
def
test_models_with_test_battery
(
folder_to_test
=
RESULTS_FOLDER
):
# get all model inside the folder
for
model_filepath
in
glob
.
glob
(
folder_to_test
+
os
.
sep
+
"
*.joblib
"
):
results
=
test_model_with_test_battery
(
model_filepath
)
for
result
in
results
:
print
(
result
)
if
__name__
==
"
__main__
"
:
# print(test_model_with_test_battery("results/004_fixed_labels_1h_charge/charge___voltage_measured-current_measured-temperature_measured-current_charge-voltage_charge___3.joblib"))
execute_auto_ML
()
# print(discretize(np.array([1,2,3,4,5,6,7,8,9,10,11,12]), {"types": ["mean", "median", "standard deviation"], "split": 4}))
# test_models_with_test_battery("results/004_fixed_labels_1h_charge/")
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment