# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
for filename in filenames:
print(os.path.join(dirname, filename))
# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
/kaggle/input/lego-sets-and-themes-database/lego_sets_and_themes.csv
安装依赖和导入库¶
!pip install tensorflow-addons
!pip install tfa-nightly
import os
import tensorflow as tf
try:
# 尝试让 TF 可见并使用所有物理 GPU
gpus = tf.config.list_physical_devices('GPU')
for gpu in gpus:
tf.config.experimental.set_memory_growth(gpu, True)
print("Using GPU:", gpu)
logical_gpus = tf.config.list_logical_devices('GPU')
print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs")
except Exception as e:
print("GPU 初始化失败,使用 CPU。错误:", e)
# 屏蔽 GPU
tf.config.set_visible_devices([], 'GPU')
# 后续代码继续正常运行
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
Requirement already satisfied: tensorflow-addons in /usr/local/lib/python3.11/dist-packages (0.23.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorflow-addons) (24.2)
Requirement already satisfied: typeguard<3.0.0,>=2.7 in /usr/local/lib/python3.11/dist-packages (from tensorflow-addons) (2.13.3)
Requirement already satisfied: tfa-nightly in /usr/local/lib/python3.11/dist-packages (0.23.0.dev20240415222534)
Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tfa-nightly) (24.2)
Requirement already satisfied: typeguard<3.0.0,>=2.7 in /usr/local/lib/python3.11/dist-packages (from tfa-nightly) (2.13.3)
Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
Using GPU: PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')
2 Physical GPUs, 0 Logical GPUs
读取并清洗数据¶
df = pd.read_csv('/kaggle/input/lego-sets-and-themes-database/lego_sets_and_themes.csv')
print(df.columns.tolist())
# 取出包含 image_url 且有套装编号(假设列名叫 'set_number')的行
df = df.dropna(subset=['image_url', 'set_name'])
df = df.rename(columns={'Sets URL':'image_url', 'Sets Name':'set_number'})
# 映射 set_number 到连续的标签 ID
set_list = df['set_number'].unique().tolist()
label_map = {name: idx for idx, name in enumerate(set_list)}
df['label_multi'] = df['set_number'].map(label_map)
df['label_binary'] = 1 # 所有这些都是正例;后面可加负例样本
print(f"共 {len(set_list)} 种 LEGO 套装")
['set_number', 'set_name', 'year_released', 'number_of_parts', 'image_url', 'theme_name']
共 21496 种 LEGO 套装
构建支持 HTTPS 下载的 tf.data.Dataset
¶
IMG_SIZE = (224, 224)
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE
def parse_row(url, lb_bin, lb_multi):
# 利用 tf.py_function + get_file 下载并缓存,再解码
def _download_and_decode(u):
path = tf.keras.utils.get_file(
fname=os.path.basename(u.numpy().decode()),
origin=u.numpy().decode()
) # 缓存到 ~/.keras/datasets :contentReference[oaicite:4]{index=4}
img = tf.io.read_file(path)
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.resize(img, IMG_SIZE)
return img
image = tf.py_function(
func=_download_and_decode,
inp=[url],
Tout=tf.float32 # py_function 嵌入 Python 下载逻辑 :contentReference[oaicite:5]{index=5}
)
image.set_shape((*IMG_SIZE, 3))
image = image / 255.0
return image, {'binary': lb_bin, 'multi': lb_multi}
# 构造 Dataset
ds = tf.data.Dataset.from_tensor_slices((
df['image_url'].values,
df['label_binary'].values,
df['label_multi'].values
))
debug_count = 100
# debug_count = len(df)
ds = ds.shuffle(10000, seed=42)
ds = ds.take(debug_count)
ds = ds.map(parse_row, num_parallel_calls=AUTOTUNE)
ds = ds.apply(tf.data.experimental.ignore_errors())
ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE) # 高效流水线 :contentReference[oaicite:6]{index=6}
# 划分训练/验证集
val_size = int(0.2 * debug_count)
ds_val = ds.take(val_size)
ds_train = ds.skip(val_size)
steps_per_epoch = debug_count - val_size
validation_steps = val_size
print(f"total = {debug_count}, steps_per_epoch = {steps_per_epoch}, validation_steps = {validation_steps}.")
print("构建完毕。")
total = 100, steps_per_epoch = 80, validation_steps = 20.
构建完毕。
定义多任务模型¶
# 主干网络:EfficientNetB0
base_model = keras.applications.EfficientNetB0(
include_top=False, input_shape=(*IMG_SIZE, 3), pooling='avg'
)
base_model.trainable = False # 先冻结预训练权重
inputs = keras.Input(shape=(*IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.Dropout(0.2)(x)
# 二分类头
bin_output = layers.Dense(1, activation='sigmoid', name='binary')(x)
# 多分类头
multi_output = layers.Dense(len(set_list), activation='softmax', name='multi')(x)
model = keras.Model(inputs=inputs, outputs=[bin_output, multi_output])
model.compile(
optimizer='adam',
loss= {
'binary': 'binary_crossentropy',
'multi': 'sparse_categorical_crossentropy'
},
metrics={
'binary': ['accuracy'],
'multi': ['accuracy']
}
)
model.summary()
Loading...
训练模型¶
callbacks = [
keras.callbacks.ModelCheckpoint('best_model.keras',
monitor='val_multi_accuracy',
save_best_only=True,
mode='max'),
keras.callbacks.EarlyStopping(monitor='val_multi_accuracy',
patience=5,
restore_best_weights=True,
mode='max')
]
history = model.fit(
ds_train,
validation_data=ds_val,
epochs=20,
steps_per_epoch = steps_per_epoch,
validation_steps = validation_steps,
callbacks=callbacks
)
print("训练完毕")
Fetching long content....
在验证集上评估¶
# 在验证集上评估
model.evaluate(ds_val)
# 推理示例:
def predict_from_url(url):
img = tf.keras.utils.get_file(fname=os.path.basename(url), origin=url)
img = tf.io.read_file(img)
img = tf.image.decode_jpeg(img, channels=3)
img = tf.image.resize(img, IMG_SIZE)[tf.newaxis,...] / 255.0
bin_pred, multi_pred = model.predict(img)
is_lego = (bin_pred[0][0] > 0.5)
set_idx = np.argmax(multi_pred[0])
return is_lego, (set_list[set_idx] if is_lego else None)
print(predict_from_url('https://cdn.rebrickable.com/media/sets/21034-1.jpg'))
# from kaggle.api.kaggle_api_extended import KaggleApi
# api = KaggleApi();
# api.authenticate()
# api.dataset_create_version(dataset='username/your-model-dataset',
# files=['/kaggle/working/model.keras'],
# version_notes='epoch20 checkpoint')
Downloading data from https://cdn.rebrickable.com/media/sets/6056-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/75160-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/8668-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/7599-1.jpg
43813/43813 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/30030-1.jpg
36482/36482 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
31914/31914 ━━━━━━━━━━━━━━━━━━━━ 0s 3us/step
150855/150855 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/4906-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/75044-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/5880-1.jpg
462749/462749 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/850939-1.jpg
33233/33233 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/30072-1.jpg
39691/39691 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
422897/422897 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/70727-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3745-1.jpg
32762/32762 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
24576/408410 ━━━━━━━━━━━━━━━━━━━━ 1s 4us/stepDownloading data from https://cdn.rebrickable.com/media/sets/952111-1.jpg
408410/408410 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/5158-1.jpg
468958/468958 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
32945/32945 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/5169-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/5006-1.jpg
34113/34113 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepe
305494/305494 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/3347-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/75980-1.jpg
33422/33422 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/kabrace-1.jpg
44234/44234 ━━━━━━━━━━━━━━━━━━━━ 0s 2us/step
Downloading data from https://cdn.rebrickable.com/media/sets/7893-1.jpg
31986/31986 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/60306-1.jpg
33757/33757 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
102710/102710 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
202863/202863 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/5200-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3629-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/llca30-1.jpg
25304/25304 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepe
174723/174723 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
0/26252 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/stepDownloading data from https://cdn.rebrickable.com/media/sets/75291-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/75078-1.jpg
26252/26252 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/40403-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/8263-1.jpg
126116/126116 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
63437/63437 ━━━━━━━━━━━━━━━━━━━━ 0s 2us/step
239826/239826 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/6395-1.jpg
243856/243856 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/75081-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3683-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/7073-1.jpg
41183/41183 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/fllcclasspack2022-1.jpg
35415/35415 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/4191-1.jpg
212253/212253 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/911725-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1857-1.jpg
601744/601744 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
942663/942663 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
111112/111112 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
301883/301883 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/952301-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3872-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/311-4.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3758-1.jpg
39859/39859 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
0/24416 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/stepepDownloading data from https://cdn.rebrickable.com/media/sets/flldclassguide2022-1.jpg
24416/24416 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/9674-1.jpg
346726/346726 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/70748-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/7148-1.jpg
276951/276951 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/1220-1.jpg
130987/130987 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
98304/362451 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepDownloading data from https://cdn.rebrickable.com/media/sets/60036-1.jpg
362451/362451 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
182940/182940 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
293904/293904 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/6705-1.jpg
519648/519648 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/377-2.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3567-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/40404-1.jpg
26160/26160 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepe
32687/32687 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
394109/394109 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/30579-1.jpg
210730/210730 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/71720-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/60119-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/k8588-1.jpg
127072/127072 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/8527-1.jpg
15142/15142 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/stepe
Downloading data from https://cdn.rebrickable.com/media/sets/5391-1.jpg
243266/243266 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
197808/197808 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/76422-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/10020-2.jpg
393602/393602 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/42125-1.jpg
18695/18695 ━━━━━━━━━━━━━━━━━━━━ 0s 5us/step
Downloading data from https://cdn.rebrickable.com/media/sets/5975-1.jpg
28780/28780 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepste
3797096/3797096 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
157199/157199 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
181726/181726 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/8591-2.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/8457-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/5415-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/hpg03-1.jpg
34011/34011 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/2259-1.jpg
30013/30013 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/2032-1.jpg
149065/149065 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/75352-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/66389-1.jpg
2610957/2610957 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
287132/287132 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
30313/30313 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
24576/4305382 ━━━━━━━━━━━━━━━━━━━━ 15s 4us/stepDownloading data from https://cdn.rebrickable.com/media/sets/8253-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/7099-1.jpg
36229/36229 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepste
45765/45765 ━━━━━━━━━━━━━━━━━━━━ 0s 2us/steps
4305382/4305382 ━━━━━━━━━━━━━━━━━━━━ 1s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/7575-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/b003-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/k8924-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1143-1.jpg
12684/12684 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
0/210883 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/stepDownloading data from https://cdn.rebrickable.com/media/sets/k8927-1.jpg
123499/123499 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
38644/38644 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
210883/210883 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
0/204433 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/stepDownloading data from https://cdn.rebrickable.com/media/sets/70596-1.jpg
204433/204433 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
444968/444968 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/10278-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/8581-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/631-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/503-1.jpg
7037/7037 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/1310-1.jpg
29185/29185 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
82563/82563 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
32865/32865 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepe
230561/230561 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/70412-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1382-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1226-2.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1241-2.jpg
32193/32193 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/952108-1.jpg
39328/39328 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
177260/177260 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/7700-1.jpg
477278/477278 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
136271/136271 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
352581/352581 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
3/3 ━━━━━━━━━━━━━━━━━━━━ 26s 7s/step - binary_accuracy: 0.0000e+00 - binary_loss: 0.7013 - loss: 10.6773 - multi_accuracy: 0.0000e+00 - multi_loss: 9.9759
/usr/lib/python3.11/contextlib.py:158: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.
self.gen.throw(typ, value, traceback)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 264ms/step
(False, None)