Skip to article frontmatterSkip to article content

安装依赖和导入库

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
/kaggle/input/lego-sets-and-themes-database/lego_sets_and_themes.csv

安装依赖和导入库

!pip install tensorflow-addons
!pip install tfa-nightly

import os

import tensorflow as tf

try:
    # 尝试让 TF 可见并使用所有物理 GPU
    gpus = tf.config.list_physical_devices('GPU')
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
        print("Using GPU:", gpu)
    
    logical_gpus = tf.config.list_logical_devices('GPU')
    print(f"{len(gpus)} Physical GPUs, {len(logical_gpus)} Logical GPUs")
except Exception as e:
    print("GPU 初始化失败,使用 CPU。错误:", e)
    # 屏蔽 GPU
    tf.config.set_visible_devices([], 'GPU')

# 后续代码继续正常运行


from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
Requirement already satisfied: tensorflow-addons in /usr/local/lib/python3.11/dist-packages (0.23.0)
Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tensorflow-addons) (24.2)
Requirement already satisfied: typeguard<3.0.0,>=2.7 in /usr/local/lib/python3.11/dist-packages (from tensorflow-addons) (2.13.3)
Requirement already satisfied: tfa-nightly in /usr/local/lib/python3.11/dist-packages (0.23.0.dev20240415222534)
Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from tfa-nightly) (24.2)
Requirement already satisfied: typeguard<3.0.0,>=2.7 in /usr/local/lib/python3.11/dist-packages (from tfa-nightly) (2.13.3)
Using GPU: PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')
Using GPU: PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')
2 Physical GPUs, 0 Logical GPUs

读取并清洗数据

df = pd.read_csv('/kaggle/input/lego-sets-and-themes-database/lego_sets_and_themes.csv')

print(df.columns.tolist())

# 取出包含 image_url 且有套装编号(假设列名叫 'set_number')的行
df = df.dropna(subset=['image_url', 'set_name'])
df = df.rename(columns={'Sets URL':'image_url', 'Sets Name':'set_number'})

# 映射 set_number 到连续的标签 ID
set_list = df['set_number'].unique().tolist()
label_map = {name: idx for idx, name in enumerate(set_list)}
df['label_multi'] = df['set_number'].map(label_map)
df['label_binary'] = 1  # 所有这些都是正例;后面可加负例样本

print(f"共 {len(set_list)} 种 LEGO 套装")
['set_number', 'set_name', 'year_released', 'number_of_parts', 'image_url', 'theme_name']
共 21496 种 LEGO 套装

构建支持 HTTPS 下载的 tf.data.Dataset

IMG_SIZE = (224, 224)
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE

def parse_row(url, lb_bin, lb_multi):
    # 利用 tf.py_function + get_file 下载并缓存,再解码
    def _download_and_decode(u):
        path = tf.keras.utils.get_file(
            fname=os.path.basename(u.numpy().decode()),
            origin=u.numpy().decode()
        )                                                  # 缓存到 ~/.keras/datasets :contentReference[oaicite:4]{index=4}
        img = tf.io.read_file(path)
        img = tf.image.decode_jpeg(img, channels=3)
        img = tf.image.resize(img, IMG_SIZE)
        return img

    image = tf.py_function(
        func=_download_and_decode,
        inp=[url],
        Tout=tf.float32                                  # py_function 嵌入 Python 下载逻辑 :contentReference[oaicite:5]{index=5}
    )
    image.set_shape((*IMG_SIZE, 3))
    image = image / 255.0

    return image, {'binary': lb_bin, 'multi': lb_multi}

# 构造 Dataset
ds = tf.data.Dataset.from_tensor_slices((
    df['image_url'].values,
    df['label_binary'].values,
    df['label_multi'].values
))
debug_count = 100
# debug_count = len(df)
ds = ds.shuffle(10000, seed=42)
ds = ds.take(debug_count)
ds = ds.map(parse_row, num_parallel_calls=AUTOTUNE)
ds = ds.apply(tf.data.experimental.ignore_errors())
ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)               # 高效流水线 :contentReference[oaicite:6]{index=6}

# 划分训练/验证集
val_size = int(0.2 * debug_count)
ds_val   = ds.take(val_size)
ds_train = ds.skip(val_size)



steps_per_epoch = debug_count - val_size
validation_steps = val_size

print(f"total = {debug_count}, steps_per_epoch = {steps_per_epoch}, validation_steps = {validation_steps}.")


print("构建完毕。")
total = 100, steps_per_epoch = 80, validation_steps = 20.
构建完毕。

定义多任务模型

# 主干网络:EfficientNetB0
base_model = keras.applications.EfficientNetB0(
    include_top=False, input_shape=(*IMG_SIZE, 3), pooling='avg'
)
base_model.trainable = False  # 先冻结预训练权重

inputs = keras.Input(shape=(*IMG_SIZE, 3))
x = base_model(inputs, training=False)
x = layers.Dropout(0.2)(x)

# 二分类头
bin_output = layers.Dense(1, activation='sigmoid', name='binary')(x)
# 多分类头
multi_output = layers.Dense(len(set_list), activation='softmax', name='multi')(x)

model = keras.Model(inputs=inputs, outputs=[bin_output, multi_output])

model.compile(
    optimizer='adam',
    loss= {
        'binary': 'binary_crossentropy',
        'multi':  'sparse_categorical_crossentropy'
    },
    metrics={
        'binary': ['accuracy'],
        'multi':  ['accuracy']
    }
)

model.summary()
Loading...

训练模型

callbacks = [
    keras.callbacks.ModelCheckpoint('best_model.keras',
                                    monitor='val_multi_accuracy',
                                    save_best_only=True,
                                       mode='max'),
    keras.callbacks.EarlyStopping(monitor='val_multi_accuracy',
                                  patience=5,
                                  restore_best_weights=True,
                                     mode='max')
]

history = model.fit(
    ds_train,
    validation_data=ds_val,
    epochs=20,
    steps_per_epoch = steps_per_epoch,
    validation_steps = validation_steps,
    callbacks=callbacks
)

print("训练完毕")
Fetching long content....

在验证集上评估

# 在验证集上评估
model.evaluate(ds_val)

# 推理示例:
def predict_from_url(url):
    img = tf.keras.utils.get_file(fname=os.path.basename(url), origin=url)
    img = tf.io.read_file(img)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, IMG_SIZE)[tf.newaxis,...] / 255.0
    bin_pred, multi_pred = model.predict(img)
    is_lego = (bin_pred[0][0] > 0.5)
    set_idx = np.argmax(multi_pred[0])
    return is_lego, (set_list[set_idx] if is_lego else None)

print(predict_from_url('https://cdn.rebrickable.com/media/sets/21034-1.jpg'))

# from kaggle.api.kaggle_api_extended import KaggleApi
# api = KaggleApi(); 
# api.authenticate()
# api.dataset_create_version(dataset='username/your-model-dataset',
#                            files=['/kaggle/working/model.keras'],
#                            version_notes='epoch20 checkpoint')

Downloading data from https://cdn.rebrickable.com/media/sets/6056-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/75160-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/8668-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/7599-1.jpg
43813/43813 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/30030-1.jpg
36482/36482 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
31914/31914 ━━━━━━━━━━━━━━━━━━━━ 0s 3us/step
150855/150855 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/4906-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/75044-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/5880-1.jpg
462749/462749 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/850939-1.jpg
33233/33233 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/30072-1.jpg
39691/39691 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
422897/422897 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/70727-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3745-1.jpg
32762/32762 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
 24576/408410 ━━━━━━━━━━━━━━━━━━━ 1s 4us/stepDownloading data from https://cdn.rebrickable.com/media/sets/952111-1.jpg
408410/408410 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/5158-1.jpg
468958/468958 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
32945/32945 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/5169-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/5006-1.jpg
34113/34113 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepe
305494/305494 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/3347-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/75980-1.jpg
33422/33422 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/kabrace-1.jpg
44234/44234 ━━━━━━━━━━━━━━━━━━━━ 0s 2us/step
Downloading data from https://cdn.rebrickable.com/media/sets/7893-1.jpg
31986/31986 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/60306-1.jpg
33757/33757 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
102710/102710 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
202863/202863 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/5200-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3629-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/llca30-1.jpg
25304/25304 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepe
174723/174723 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
    0/26252 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/stepDownloading data from https://cdn.rebrickable.com/media/sets/75291-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/75078-1.jpg
26252/26252 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/40403-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/8263-1.jpg
126116/126116 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
63437/63437 ━━━━━━━━━━━━━━━━━━━━ 0s 2us/step
239826/239826 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/6395-1.jpg
243856/243856 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/75081-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3683-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/7073-1.jpg
41183/41183 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/fllcclasspack2022-1.jpg
35415/35415 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/4191-1.jpg
212253/212253 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/911725-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1857-1.jpg
601744/601744 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
942663/942663 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
111112/111112 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
301883/301883 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/952301-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3872-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/311-4.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3758-1.jpg
39859/39859 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
    0/24416 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/stepepDownloading data from https://cdn.rebrickable.com/media/sets/flldclassguide2022-1.jpg
24416/24416 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/9674-1.jpg
346726/346726 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/70748-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/7148-1.jpg
276951/276951 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/1220-1.jpg
130987/130987 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
 98304/362451 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepDownloading data from https://cdn.rebrickable.com/media/sets/60036-1.jpg
362451/362451 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
182940/182940 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
293904/293904 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/6705-1.jpg
519648/519648 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/377-2.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/3567-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/40404-1.jpg
26160/26160 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepe
32687/32687 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
394109/394109 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/30579-1.jpg
210730/210730 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/71720-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/60119-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/k8588-1.jpg
127072/127072 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/8527-1.jpg
15142/15142 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/stepe
Downloading data from https://cdn.rebrickable.com/media/sets/5391-1.jpg
243266/243266 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
197808/197808 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/76422-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/10020-2.jpg
393602/393602 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/42125-1.jpg
18695/18695 ━━━━━━━━━━━━━━━━━━━━ 0s 5us/step
Downloading data from https://cdn.rebrickable.com/media/sets/5975-1.jpg
28780/28780 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepste
3797096/3797096 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
157199/157199 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
181726/181726 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/8591-2.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/8457-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/5415-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/hpg03-1.jpg
34011/34011 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/2259-1.jpg
30013/30013 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/2032-1.jpg
149065/149065 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/75352-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/66389-1.jpg
2610957/2610957 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
287132/287132 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
30313/30313 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
  24576/4305382 ━━━━━━━━━━━━━━━━━━━━ 15s 4us/stepDownloading data from https://cdn.rebrickable.com/media/sets/8253-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/7099-1.jpg
36229/36229 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepste
45765/45765 ━━━━━━━━━━━━━━━━━━━━ 0s 2us/steps
4305382/4305382 ━━━━━━━━━━━━━━━━━━━━ 1s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/7575-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/b003-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/k8924-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1143-1.jpg
12684/12684 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
     0/210883 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/stepDownloading data from https://cdn.rebrickable.com/media/sets/k8927-1.jpg
123499/123499 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
38644/38644 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
210883/210883 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
     0/204433 ━━━━━━━━━━━━━━━━━━━━ 0s 0s/stepDownloading data from https://cdn.rebrickable.com/media/sets/70596-1.jpg
204433/204433 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
444968/444968 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/10278-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/8581-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/631-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/503-1.jpg
7037/7037 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step
Downloading data from https://cdn.rebrickable.com/media/sets/1310-1.jpg
29185/29185 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
82563/82563 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
32865/32865 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/stepe
230561/230561 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/70412-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1382-1.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1226-2.jpg
Downloading data from https://cdn.rebrickable.com/media/sets/1241-2.jpg
32193/32193 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/952108-1.jpg
39328/39328 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
177260/177260 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
Downloading data from https://cdn.rebrickable.com/media/sets/7700-1.jpg
477278/477278 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
136271/136271 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
352581/352581 ━━━━━━━━━━━━━━━━━━━━ 0s 1us/step
3/3 ━━━━━━━━━━━━━━━━━━━━ 26s 7s/step - binary_accuracy: 0.0000e+00 - binary_loss: 0.7013 - loss: 10.6773 - multi_accuracy: 0.0000e+00 - multi_loss: 9.9759
/usr/lib/python3.11/contextlib.py:158: UserWarning: Your input ran out of data; interrupting training. Make sure that your dataset or generator can generate at least `steps_per_epoch * epochs` batches. You may need to use the `.repeat()` function when building your dataset.
  self.gen.throw(typ, value, traceback)
1/1 ━━━━━━━━━━━━━━━━━━━━ 0s 264ms/step
(False, None)