[Python] 纯文本查看 复制代码
import ctypes
import time
import json
def PointerToText(dll_path, ptr, length=65535):
"""
调用DLL中的指针到文本转换功能
:param dll_path: DLL文件路径
:param ptr: 内存指针(整数地址或ctypes指针对象)
:param length: 可选,指定字节长度(用于非字符串指针)
:return: 转换后的文本或None
"""
try:
# 加载DLL
dll = ctypes.WinDLL(dll_path)
# 定义DLL函数接口(假设DLL提供了PointerToText函数)
dll.PointerToText.argtypes = [ctypes.c_void_p, ctypes.c_int] # 接受指针地址和长度
dll.PointerToText.restype = ctypes.c_char_p # 返回char*类型
# 调用DLL函数
if isinstance(ptr, ctypes._Pointer):
ptr = ctypes.addressof(ptr.contents) # 提取指针地址
text_ptr = dll.PointerToText(ptr, length)
if text_ptr:
return text_ptr.decode('utf-8')
else:
print("DLL返回空指针")
return None
except Exception as e:
print(f"调用DLL指针转换失败: {e}")
return None
def StartBrowser(dll_path, browser_path, port, cache_dir, clear_cache, launch_args, browser_info):
try:
# 加载DLL
dll = ctypes.WinDLL(dll_path)
# 定义函数参数类型
dll.StartBrowser.argtypes = [
ctypes.c_char_p, # 浏览器路径
ctypes.c_int, # 端口
ctypes.c_char_p, # 缓存目录
ctypes.c_int, # 清空缓存
ctypes.c_char_p, # 启动参数
ctypes.c_int # 浏览器信息
]
dll.StartBrowser.restype = ctypes.c_int
# 调用函数
result = dll.StartBrowser(
browser_path.encode('utf-8'),
port,
cache_dir.encode('utf-8'),
clear_cache,
launch_args.encode('utf-8'),
browser_info
)
if result == 0:
print("浏览器启动成功")
else:
print(f"浏览器启动失败,错误码: {result}")
except Exception as e:
print(f"调用DLL时发生错误: {e}")
def GetActiveTabID(dll_path, port):
"""
获取激活标签页ID
:param dll_path: DLL文件路径
:param port: 端口
:return: 激活标签页ID
"""
try:
# 加载DLL
dll = ctypes.WinDLL(dll_path)
# 定义函数参数类型
dll.GetActiveTabID.argtypes = [
ctypes.c_int # 端口
]
dll.GetActiveTabID.restype = ctypes.c_char_p
# 调用函数
tab_id = dll.GetActiveTabID(port)
if tab_id:
return tab_id.decode('utf-8')
else:
print("获取标签页ID失败")
return None
except Exception as e:
print(f"调用DLL时发生错误: {e}")
return None
def scrapeAttributesByClass(dll_path, port, tab_id, class_name, attribute_value, context_id):
try:
# 加载DLL
dll = ctypes.WinDLL(dll_path)
# 定义函数参数类型
dll.scrapeAttributesByClass.argtypes = [
ctypes.c_int, # 端口
ctypes.c_char_p, # 标签页ID
ctypes.c_char_p, # Class名称
ctypes.c_char_p, # 属性值
ctypes.c_int # 上下文ID
]
dll.scrapeAttributesByClass.restype = ctypes.c_int64
# 调用函数
result_ptr = dll.scrapeAttributesByClass(
port,
tab_id.encode('utf-8'),
class_name.encode('utf-8'),
attribute_value.encode('utf-8'),
context_id
)
if result_ptr == 0:
print("失败");
else:
return result_ptr
except Exception as e:
print(f"失败: {e}")
if __name__ == "__main__":
dll_path = "F:\\Python\\jb\\Bt_ChromeFormBot_64.dll" #dll文件完整的路径
browser_path = "C:\\Program Files\\Google\\Chrome\\Application\\chrome.exe"
port = 9200
cache_dir = "C:\\Temp\\BrowserCache"
clear_cache = 1
launch_args = " https://www.xiaohongshu.com --no-first-run"
browser_info = 0
# 启动浏览器
StartBrowser(dll_path, browser_path, port, cache_dir, clear_cache, launch_args, browser_info)
time.sleep(2)
# 取激活标签页ID
tab_id = GetActiveTabID(dll_path, port)
print(tab_id)
time.sleep(2)
class_name = "footer"
attribute_value = "innerText"
context_id = 0
if tab_id:
#调用元素爬虫函数
int64_ptr = scrapeAttributesByClass(dll_path, port, tab_id, class_name, attribute_value, context_id)
text = PointerToText(dll_path,int64_ptr)
data = json.loads(json.loads(text)["result"]["result"]["value"])
for idx, item in enumerate(data, start=1):
print(f"{idx}. {item['textContent']}")