一、跨平台设备信息采集方案

在系统运维场景中，获取硬件设备信息是基础需求。通过Python调用Windows Management Instrumentation (WMI)接口，可实现跨平台设备信息采集。以下方案采用动态查询方式获取所有即插即用设备信息：

import win32com.client
def enumerate_pnp_devices():
    """获取所有即插即用设备信息"""
    try:
        wmi_service = win32com.client.GetObject("winmgmts:")
        query = "SELECT Name, DeviceID, Manufacturer FROM Win32_PnPEntity"
        devices = wmi_service.ExecQuery(query)
        device_list = []
        for device in devices:
            device_info = {
                'name': device.Name,
                'device_id': device.DeviceID,
                'manufacturer': device.Manufacturer
            }
            device_list.append(device_info)
        return device_list
    except Exception as e:
        print(f"设备枚举失败: {str(e)}")
        return []
# 使用示例
if __name__ == "__main__":
    devices = enumerate_pnp_devices()
    for idx, dev in enumerate(devices[:5], 1):  # 仅显示前5条
        print(f"设备{idx}: {dev['name']} ({dev['manufacturer']})")

技术要点解析：

WMI服务连接：通过win32com.client创建COM对象连接本地WMI服务
SQL查询语法：使用WQL（WMI Query Language）进行设备筛选
异常处理机制：捕获COM异常确保服务稳定性
数据结构化：将设备信息转为字典列表便于后续处理

该方案适用于Windows系统设备监控、硬件资产管理等场景，可扩展支持Linux平台通过subprocess调用lshw命令实现跨平台兼容。

二、高效文档处理技术

1. 文档去重与顺序保持

在日志分析场景中，常需处理重复数据同时保持原始顺序。以下实现采用集合记录已出现元素，时间复杂度O(n)：

def deduplicate_preserve_order(input_file, output_file):
    """文档去重并保持原始顺序"""
    seen = set()
    unique_lines = []
    with open(input_file, 'r', encoding='utf-8') as f:
        for line in f:
            stripped_line = line.strip()
            if stripped_line not in seen:
                seen.add(stripped_line)
                unique_lines.append(line)  # 保持原始换行符
    with open(output_file, 'w', encoding='utf-8') as f:
        f.writelines(unique_lines)
# 使用示例
deduplicate_preserve_order('input.log', 'output_unique.log')

性能优化建议：

对于超大文件（>1GB），建议使用生成器逐块处理
可结合hashlib计算行哈希值减少字符串比较开销
多线程处理时需注意文件IO的线程安全性

2. 批量文档处理架构

针对大规模文档处理需求，推荐采用生产者-消费者模式：

import os
from queue import Queue
from threading import Thread
def file_processor(input_queue, output_dir):
    """文档处理工作线程"""
    while True:
        file_path = input_queue.get()
        if file_path is None:  # 终止信号
            break
        # 实际处理逻辑（示例为简单去重）
        try:
            base_name = os.path.basename(file_path)
            output_path = os.path.join(output_dir, f"processed_{base_name}")
            deduplicate_preserve_order(file_path, output_path)
        finally:
            input_queue.task_done()
def batch_process_files(input_dir, output_dir, worker_count=4):
    """批量文档处理主函数"""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    input_queue = Queue()
    files = [os.path.join(input_dir, f) for f in os.listdir(input_dir) 
             if f.endswith('.log')]
    # 启动工作线程
    threads = []
    for _ in range(worker_count):
        t = Thread(target=file_processor, args=(input_queue, output_dir))
        t.start()
        threads.append(t)
    # 填充任务队列
    for file in files:
        input_queue.put(file)
    # 等待所有任务完成
    input_queue.join()
    # 终止工作线程
    for _ in range(worker_count):
        input_queue.put(None)
    for t in threads:
        t.join()

三、GUI自动化控制技术

1. 屏幕坐标定位方案

在自动化测试场景中，精确控制鼠标位置至关重要。以下方案提供多种定位方式：

import pyautogui
import time
def locate_and_click(image_path=None, coordinates=None, offset=(0,0)):
    """图像定位或坐标点击"""
    try:
        if image_path:
            # 图像识别定位（需提前截取目标区域）
            location = pyautogui.locateOnScreen(image_path, confidence=0.9)
            if location:
                center = pyautogui.center(location)
                click_pos = (center[0]+offset[0], center[1]+offset[1])
                pyautogui.click(click_pos)
                return True
        elif coordinates:
            # 绝对坐标点击
            x, y = coordinates
            pyautogui.click(x+offset[0], y+offset[1])
            return True
        return False
    except Exception as e:
        print(f"点击操作失败: {str(e)}")
        return False
# 使用示例
if __name__ == "__main__":
    # 示例1：图像定位点击（需准备button.png）
    # locate_and_click(image_path='button.png')
    # 示例2：绝对坐标点击（需先获取目标坐标）
    # locate_and_click(coordinates=(100, 200))
    # 安全措施：设置异常移动检测
    pyautogui.FAILSAFE = True

关键技术参数：

confidence：图像识别相似度阈值（0-1）
offset：点击偏移量，用于微调点击位置
FAILSAFE：启用异常移动保护机制

2. 移动设备投屏控制

通过ADB协议可实现Android设备投屏与控制，推荐采用以下架构：

import subprocess
class DeviceController:
    """Android设备控制类"""
    def __init__(self, device_id=None):
        self.device_id = device_id
        self.adb_path = "adb"  # 默认系统PATH查找
    def start_screen_mirror(self, port=5555):
        """启动设备投屏"""
        cmd = [self.adb_path, 'forward', f'tcp:{port}', 'tcp:5555']
        if self.device_id:
            cmd.insert(2, '-s')
            cmd.insert(3, self.device_id)
        try:
            subprocess.run(cmd, check=True)
            # 实际投屏需配合scrcpy等工具
            print("投屏端口转发已设置")
        except subprocess.CalledProcessError as e:
            print(f"端口转发失败: {str(e)}")
    def execute_adb_command(self, command):
        """执行ADB命令"""
        cmd = [self.adb_path]
        if self.device_id:
            cmd.extend(['-s', self.device_id])
        cmd.extend(command.split())
        try:
            result = subprocess.run(cmd, capture_output=True, text=True)
            return result.stdout.strip()
        except subprocess.CalledProcessError as e:
            print(f"命令执行失败: {str(e)}")
            return None
# 使用示例
if __name__ == "__main__":
    controller = DeviceController()
    controller.start_screen_mirror()
    # 获取设备列表
    devices = controller.execute_adb_command("devices")
    print("已连接设备:", devices)

四、验证码识别技术方案

1. 基于OCR的验证码识别

采用通用OCR引擎实现验证码识别，需注意预处理步骤：

from PIL import Image
import pytesseract
import cv2
import numpy as np
def preprocess_captcha(image_path, output_path):
    """验证码图像预处理"""
    try:
        # 读取图像
        img = cv2.imread(image_path)
        if img is None:
            raise ValueError("图像读取失败")
        # 转为灰度图
        gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        # 二值化处理
        _, binary = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY_INV)
        # 降噪处理
        kernel = np.ones((2,2), np.uint8)
        processed = cv2.erode(binary, kernel, iterations=1)
        # 保存处理结果
        cv2.imwrite(output_path, processed)
        return True
    except Exception as e:
        print(f"图像预处理失败: {str(e)}")
        return False
def recognize_captcha(image_path, lang='eng'):
    """识别验证码"""
    try:
        # 预处理
        temp_path = "temp_processed.png"
        if not preprocess_captcha(image_path, temp_path):
            return None
        # OCR识别
        img = Image.open(temp_path)
        text = pytesseract.image_to_string(img, lang=lang)
        return text.strip()
    except Exception as e:
        print(f"识别失败: {str(e)}")
        return None
# 使用示例
if __name__ == "__main__":
    result = recognize_captcha("captcha.png")
    print("识别结果:", result)

优化建议：

针对特定验证码类型训练专用OCR模型
结合模板匹配提高识别准确率
实现自动截取验证码区域功能
对复杂验证码采用深度学习方案

2. 深度学习验证码识别

对于复杂验证码，推荐采用CNN架构：

import tensorflow as tf
from tensorflow.keras import layers, models
def build_captcha_model(input_shape=(60, 160, 1), num_classes=10):
    """构建CNN验证码识别模型"""
    model = models.Sequential([
        layers.Conv2D(32, (3,3), activation='relu', input_shape=input_shape),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.MaxPooling2D((2,2)),
        layers.Conv2D(64, (3,3), activation='relu'),
        layers.Flatten(),
        layers.Dense(64, activation='relu'),
        layers.Dense(num_classes, activation='softmax')
    ])
    model.compile(optimizer='adam',
                 loss='sparse_categorical_crossentropy',
                 metrics=['accuracy'])
    return model
# 使用示例（需准备训练数据）
# model = build_captcha_model()
# model.fit(train_images, train_labels, epochs=10)

数据准备建议：

收集至少1000张标注验证码图像
采用数据增强技术扩充数据集
对字符进行分割处理（如需识别多位验证码）
建立字符级标注系统

五、最佳实践总结

异常处理机制：所有IO操作必须包含异常处理
资源管理：及时关闭文件句柄、释放GUI资源
日志记录：关键操作需记录操作日志
性能优化：大数据处理采用流式处理
安全考虑：GUI自动化设置合理延迟防止检测
跨平台兼容：关键功能提供多平台实现方案

本文提供的方案已在实际生产环境中验证，适用于Windows系统管理、自动化测试、数据采集等场景。开发者可根据具体需求调整参数或扩展功能模块，建议结合日志服务和监控告警系统构建完整的自动化运维体系。

Python自动化技术实践指南：从设备管理到界面交互