From a6b9531df51dc30003b08f63cbeadeaf8666a791 Mon Sep 17 00:00:00 2001
From: flt6 <1404262047@qq.com>
Date: Fri, 16 May 2025 15:46:34 +0800
Subject: [PATCH] hsv_1

Former-commit-id: 57a8fb7799ea7543b4af1ea49626346d50546f82
---
 .gitattributes           |   3 +
 Auto_Ctrl/.gitattributes |   3 -
 Picture_Train/main.py    |  72 ++++++++++++++++++++----
 Picture_Train/model.pkl  |   3 +
 Picture_Train/show.py    |  29 ++++++++++
 Picture_Train/train_2.py | 118 +++++++++++++++++++++++++++++++++++++++
 6 files changed, 215 insertions(+), 13 deletions(-)
 delete mode 100644 Auto_Ctrl/.gitattributes
 create mode 100644 Picture_Train/model.pkl
 create mode 100644 Picture_Train/show.py
 create mode 100644 Picture_Train/train_2.py

diff --git a/.gitattributes b/.gitattributes
index dfe0770..40304a7 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,2 +1,5 @@
 # Auto detect text files and perform LF normalization
 * text=auto
+*.exe filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
diff --git a/Auto_Ctrl/.gitattributes b/Auto_Ctrl/.gitattributes
deleted file mode 100644
index 02329e6..0000000
--- a/Auto_Ctrl/.gitattributes
+++ /dev/null
@@ -1,3 +0,0 @@
-*.exe filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
diff --git a/Picture_Train/main.py b/Picture_Train/main.py
index eb389a0..82f7da2 100644
--- a/Picture_Train/main.py
+++ b/Picture_Train/main.py
@@ -1,16 +1,68 @@
-# 这是一个示例 Python 脚本。
+import cv2
+import numpy as np
+from pathlib import Path
+from train_2 import train_model,prepare_data,predict
+import joblib
 
-# 按 Shift+F10 执行或将其替换为您的代码。
-# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。
+inp = Path("data/train")
+val = Path("data/val")
+proc = Path("data/proc")
+proc.mkdir(exist_ok=True)
 
 
-def print_hi(name):
-    # 在下面的代码行中使用断点来调试脚本。
-    print(f'Hi, {name}')  # 按 Ctrl+F8 切换断点。
+def preproc(dir:Path):
+    d={}
+    for file in dir.glob("*/*.jpg"):
+        im = cv2.imread(file)
+        if im is None:
+            print(f"Error reading image: {file}")
+            continue
+        cl = file.parents[0].name
+        if cl not in d:
+            d[cl] = []
+        hsv = cv2.cvtColor(im,cv2.COLOR_BGR2HSV)
+        mask = hsv[:,:,1] > 150
+        # cor = np.argwhere(mask)
+        # y_min,x_min = cor.min(axis=0)
+        # y_max,x_max = cor.max(axis=0)
+        mask = mask[:,:,np.newaxis]
+        # cv2.findCoun
+        # im = im[y_min:y_max,x_min:x_max]
+        cnt = np.count_nonzero(mask)
+        hsv*=mask
+        h = round(np.sum(hsv[:,:,0])/cnt)
+        s = round(np.sum(hsv[:,:,1])/cnt)
+        v = round(np.sum(hsv[:,:,2])/cnt)
+        name = f"{h}_{s}_{v}.jpg"
+        d[cl].append((h,s,v))
+        # (proc/cl).mkdir(exist_ok=True)
+        # cv2.imwrite(proc/cl/name,cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR))
+    return d
 
+d:dict[str,list[tuple[int,int,int]]] = preproc(inp)
+val:dict[str,list[tuple[int,int,int]]] = preproc(val)
 
-# 按装订区域中的绿色按钮以运行脚本。
-if __name__ == '__main__':
-    print_hi('PyCharm')
+print("数据预处理完成")
 
-# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助
+model, label_map = train_model(d)
+print("训练完成")
+joblib.dump(model, "model.pkl")
+
+# model = joblib.load("model.pkl")
+X_train, y_train = prepare_data(d)
+print(predict(model, label_map, d))
+print(predict(model, label_map, val))
+# print(f"\n训练集准确率: {model.score(X_train, y_train):.5f}")
+# X_train, y_train = prepare_data(val)
+# print(f"\n训练集准确率: {model.score(X_train, y_train):.5f}")
+# model.predict()
+from src_predict import predictor
+suc = cnt = 0
+for file in Path("data/train").glob("*/*.jpg"):
+    cnt+=1
+    pcl,_=predictor(file)
+    acl = file.parents[0].name
+    if acl == pcl:
+        suc+=1
+
+print(f"预测准确率: {suc/cnt:.4f}")
diff --git a/Picture_Train/model.pkl b/Picture_Train/model.pkl
new file mode 100644
index 0000000..06a50a5
--- /dev/null
+++ b/Picture_Train/model.pkl
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:409653310924dc977ea2ae5dd46042ef144f4c8500c460ba5ca5b0f5ce68bed8
+size 535673
diff --git a/Picture_Train/show.py b/Picture_Train/show.py
new file mode 100644
index 0000000..94538aa
--- /dev/null
+++ b/Picture_Train/show.py
@@ -0,0 +1,29 @@
+
+import matplotlib.pyplot as plt
+import random
+
+
+# 创建一个 3D 图形
+fig = plt.figure()
+ax = fig.add_subplot(111, projection='3d')
+
+# 为每个 key 分配一个随机颜色
+colors = {"orange":(1,0,0),"yellow":(0,1,0)}
+# for key in d.keys():
+#     colors[key] = (random.random(), random.random(), random.random())  # 随机 RGB 颜色
+
+# 绘制每个 key 的点
+for key, points in d.items():
+    x_vals = [point[0] for point in points]
+    y_vals = [point[1] for point in points]
+    z_vals = [point[2] for point in points]
+    ax.scatter(x_vals, y_vals, z_vals, label=key, color=colors[key])
+
+# 添加图例和标签
+ax.set_xlabel('X轴')
+ax.set_ylabel('Y轴')
+ax.set_zlabel('Z轴')
+ax.legend()
+
+# 显示图形
+plt.show()
diff --git a/Picture_Train/train_2.py b/Picture_Train/train_2.py
new file mode 100644
index 0000000..8b841ed
--- /dev/null
+++ b/Picture_Train/train_2.py
@@ -0,0 +1,118 @@
+
+import numpy as np
+from sklearn.ensemble import RandomForestClassifier
+from typing import Dict, List, Tuple
+import joblib
+
+def prepare_data(data: Dict[str, List[np.ndarray]]) -> Tuple[np.ndarray, np.ndarray]:
+    """
+    将dict[str, list[ndarray]]格式的数据转换为模型可用的特征矩阵和标签向量
+    
+    参数:
+    data: 格式为 dict[str, list[ndarray]] 的数据，其中键为类别名，值为对应类别的特征数组列表
+    
+    返回:
+    X: 特征矩阵
+    y: 标签向量
+    """
+    features = []
+    labels = []
+    
+    # 为每个类别分配一个数字标签
+    label_map = {class_name: i for i, class_name in enumerate(data.keys())}
+    
+    for class_name, arrays_list in data.items():
+        label = label_map[class_name]
+        for arr in arrays_list:
+            # 处理每个数组中的每个样本
+            features.append(np.array(arr))
+            labels.append(label)
+            # if len(arr.shape) > 1:
+            #     for sample in arr:
+            #         features.append(sample)
+            #         labels.append(label)
+            # else:
+            #     # 处理单个样本的情况
+            #     features.append(arr)
+            #     labels.append(label)
+    
+    return np.array(features), np.array(labels)
+
+def train_model(data: Dict[str, List[np.ndarray]]):
+    """
+    训练分类模型
+    
+    参数:
+    data: 训练数据，格式为 dict[str, list[ndarray]]
+    
+    返回:
+    训练好的模型和标签映射字典
+    """
+    X, y = prepare_data(data)
+    
+    # 创建并训练模型
+    model = RandomForestClassifier(n_estimators=100, random_state=42)
+    model.fit(X, y)
+    
+    # 创建逆向映射，用于将数字标签转回类别名
+    label_map = {i: class_name for i, class_name in enumerate(data.keys())}
+    
+    return model, label_map
+
+def predict(model, label_map: Dict[int, str], val_data: Dict[str, List[np.ndarray]]) -> Dict[str, List[List[str]]]:
+    """
+    使用训练好的模型对验证数据进行预测
+    
+    参数:
+    model: 训练好的模型
+    label_map: 标签映射字典，用于将数字标签转换回类别名
+    val_data: 验证数据，格式为 dict[str, list[ndarray]]
+    
+    返回:
+    预测结果字典，格式为 dict[str, list[list[str]]]，表示每个输入数组中样本的预测类别
+    """
+    results = {}
+    
+    suc = 0
+    cnt = 0
+    for class_name, arrays_list in val_data.items():
+        class_predictions = []
+        for arr in arrays_list:
+            # 确保数据格式正确
+            arr = np.array(arr)
+            cnt+=1
+            if len(arr.shape) == 1:
+                arr = arr.reshape(1, -1)
+            
+            # 进行预测并转换为类别名
+            pred_labels = model.predict(arr)
+            pred_classes = [label_map[label] for label in pred_labels]
+            if len(pred_classes) > 1:continue
+            if class_name==pred_classes[0]:
+                suc+=1
+            # class_predictions.append(pred_classes)
+        
+        results[class_name] = class_predictions
+    
+    return suc/cnt
+
+if __name__ == "__main__":
+    exit()
+    # 训练模型
+    model, label_map = train_model(d)
+    print("训练完成")
+    joblib.dump(model, "model.pkl")
+    
+    # 在验证数据上进行预测
+    # predictions = predict(model, label_map, val)
+    
+    # 输出预测结果
+    # print("预测结果:")
+    # for class_name, class_preds in predictions.items():
+    #     print(f"{class_name}:")
+    #     for i, arr_preds in enumerate(class_preds):
+    #         print(f"  数组 {i}: {arr_preds}")
+    
+    # 输出模型性能评估
+    # X_train, y_train = prepare_data(val)
+    # print(f"\n训练集准确率: {model.score(X_train, y_train):.4f}")
\ No newline at end of file