Skip to content
Projects
Groups
Snippets
Help
This project
Loading...
Sign in / Register
Toggle navigation
W
wafer_failure
Overview
Overview
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
lvzhengyang
wafer_failure
Commits
bfe16e7a
Commit
bfe16e7a
authored
Sep 06, 2022
by
lvzhengyang
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix dataset
parent
ee8f53f5
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
71 additions
and
0 deletions
+71
-0
dataset/LSWMD.pkl.zip
+0
-0
dataset/reorganize_dataset.py
+71
-0
No files found.
dataset/LSWMD.pkl.zip
deleted
100644 → 0
View file @
ee8f53f5
File deleted
dataset/reorganize_dataset.py
0 → 100644
View file @
bfe16e7a
"""
@brief: reorganize the dataset for few-shot learning and self-supervised learning
@author: Zhengyang Lyu
@date: 2022.8.26
@note: run this script under 'dataset' dir
"""
import
os
import
sys
sys
.
path
.
append
(
'..'
)
from
utils
import
read_pkl
,
get_df_of_label
import
pandas
as
pd
import
random
import
numpy
as
np
import
pdb
"""
Categories Training Set Testing Set
# Percent (
%
) # Percent (
%
)
Center 2576 2.48 1718 2.48
Donut 333 0.32 222 0.32
Edge-Loc 3113 2.99 2076 3.00
Edge-Ring 5808 5.60 3872 5.60
Location 2155 2.08 1438 2.08
Near-Full 89 0.09 60 0.09
Random 519 0.50 347 0.50
Scratch 715 0.69 478 0.69
None 88459 85.25 58972 85.25
Total 103767 100 69183 100
"""
def
reorganize_dataset
():
df_withlabel
,
df_nonlabel
,
df_withpattern
,
df_nonpattern
=
read_pkl
(
path
=
dataset_path
)
df_nonlabel
.
to_pickle
(
nonlabel_path
)
# --- following code is for df_withlabel ---
df_list
=
get_df_of_label
(
df_withlabel
)
df_len
=
[
len
(
df
)
for
df
in
df_list
]
# [4294, 555, 5189, 9680, 3593, 866, 1193, 149, 147431]
# divide training / validation set (0.6 : 0.4)
val_list
=
[
df
.
sample
(
frac
=
0.4
,
random_state
=
60
)
for
df
in
df_list
]
df_val
=
pd
.
concat
(
val_list
)
.
reset_index
(
drop
=
True
)
df_val
.
to_pickle
(
withlabel_test_path
)
train_list
=
[
df_list
[
i
]
.
drop
(
val_list
[
i
]
.
index
)
.
reset_index
(
drop
=
True
)
for
i
in
range
(
len
(
val_list
))]
train_len
=
[
len
(
df
)
for
df
in
train_list
]
# [2576, 333, 3113, 5808, 2156, 520, 716, 89, 88459]
non_zero
,
_
=
np
.
nonzero
(
train_len
)
while
non_zero
.
size
>=
2
:
num_classes
=
random
.
randint
(
2
,
non_zero
.
size
)
random
.
sample
(
non_zero
,
num_classes
)
train_len
=
[
len
(
df
)
for
df
in
train_list
]
non_zero
,
_
=
np
.
nonzero
(
train_len
)
pdb
.
set_trace
()
if
__name__
==
'__main__'
:
dataset_path
=
'/lustre/S/lvzhengyang/wafer_failure/dataset/LSWMD.pkl'
nonlabel_dir
=
os
.
path
.
join
(
'.'
,
'nonlabel'
)
if
not
os
.
path
.
exists
(
nonlabel_dir
):
os
.
mkdir
(
nonlabel_dir
)
nonlabel_path
=
os
.
path
.
join
(
nonlabel_dir
,
'nonlabel.pkl'
)
withlabel_dir
=
os
.
path
.
join
(
'.'
,
'withlabel'
)
if
not
os
.
path
.
exists
(
withlabel_dir
):
os
.
mkdir
(
withlabel_dir
)
withlabel_dir_train
=
os
.
path
.
join
(
withlabel_dir
,
'train'
)
if
not
os
.
path
.
exists
(
withlabel_dir_train
):
os
.
mkdir
(
withlabel_dir_train
)
withlabel_dir_test
=
os
.
path
.
join
(
withlabel_dir
,
'test'
)
if
not
os
.
path
.
exists
(
withlabel_dir_test
):
os
.
mkdir
(
withlabel_dir_test
)
withlabel_test_path
=
os
.
path
.
join
(
withlabel_dir_test
,
'withlabel_test.pkl'
)
reorganize_dataset
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment