[Python] 纯文本查看 复制代码 import struct
import os
def get_pdb_info(file_path):
with open(file_path, 'rb') as f:
# 1. 解析 DOS 头
dos_header = f.read(64)
if dos_header[:2] != b'MZ':
return None
e_lfanew = struct.unpack('<I', dos_header[60:64])[0]
# 2. 解析 NT 头 (PE Signature + FileHeader + OptionalHeader)
f.seek(e_lfanew)
nt_signature = f.read(4)
if nt_signature != b'PE\x00\x00':
return None
# 跳转到 Optional Header (20字节的 FileHeader 之后)
f.seek(e_lfanew + 4 + 20)
magic = struct.unpack('<H', f.read(2))[0]
# 判断是 PE32 (0x10B) 还是 PE32+ (0x20B)
is_64bit = magic == 0x20b
# DataDirectory 的偏移 (Optional Header 中部)
# PE32: 96 字节处开始, PE32+: 112 字节处开始
data_dir_offset = e_lfanew + 4 + 20 + (112 if is_64bit else 96)
# Debug Directory 是第 7 个条目 (Index 6)
f.seek(data_dir_offset + 6 * 8)
debug_vaddr, debug_size = struct.unpack('<II', f.read(8))
if debug_vaddr == 0 or debug_size == 0:
return None
# 3. 将虚拟地址 (RVA) 转换为文件偏移 (File Offset)
# 简单起见,遍历 Section Headers
num_sections = struct.unpack('<H', dos_header[e_lfanew+6:e_lfanew+8])[0]
# Section Header 在 Optional Header 之后
# Optional Header 大小在 File Header 中定义
opt_header_size = struct.unpack('<H', dos_header[e_lfanew+20:e_lfanew+22])[0]
section_table_offset = e_lfanew + 4 + 20 + opt_header_size
f.seek(section_table_offset)
debug_file_offset = 0
for _ in range(num_sections):
sect = f.read(40)
s_vaddr = struct.unpack('<I', sect[12:16])[0]
s_vsize = struct.unpack('<I', sect[8:12])[0]
s_rawptr = struct.unpack('<I', sect[20:24])[0]
if s_vaddr <= debug_vaddr < s_vaddr + s_vsize:
debug_file_offset = s_rawptr + (debug_vaddr - s_vaddr)
break
if debug_file_offset == 0: return None
# 4. 解析 Debug Directory 数组
f.seek(debug_file_offset)
for _ in range(debug_size // 28): # 每条 IMAGE_DEBUG_DIRECTORY 是 28 字节
data = f.read(28)
dtype = struct.unpack('<I', data[12:16])[0]
if dtype == 2: # IMAGE_DEBUG_TYPE_CODEVIEW
raw_ptr = struct.unpack('<I', data[24:28])[0]
f.seek(raw_ptr)
cv_sig = f.read(4)
if cv_sig == b'RSDS':
guid = f.read(16)
age = struct.unpack('<I', f.read(4))[0]
pdb_name = f.read(256).split(b'\x00')[0].decode('utf-8')
# 格式化 GUID
g = struct.unpack('<IHH8B', guid)
guid_str = f"{g[0]:08X}{g[1]:04X}{g[2]:04X}{''.join(f'{x:02X}' for x in g[3:])}"
return {
"pdb_name": os.path.basename(pdb_name),
"guid": guid_str,
"age": age,
"download_url": f"https://msdl.microsoft.com/download/symbols/{os.path.basename(pdb_name)}/{guid_str}{age:X}/{os.path.basename(pdb_name)}"
}
return None
# 测试
path = r"C:\Windows\System32\ntoskrnl.exe"
info = get_pdb_info(path)
if info:
print(f"PDB Name: {info['pdb_name']}")
print(f"GUID: {info['guid']}")
print(f"Age: {info['age']:X}")
print(f"URL: {info['download_url']}")
照着写就行 就是文件二进制数据的处理和转换
补充内容 (2026-3-28 11:02):
提取64位系统system32目录里文件的时候调用dll关闭重定向或者用C:\Windows\Sysnative替代 |