2025年度企业参与研发项目科技人员汇总表_姓名重复已标注中有无姓名重复？如果还有，请予以标注。

Creato il: 22 gennaio 2026

Risposto usando GPT-5.2 Thinking da Chat01

Domanda

2025年度企业参与研发项目科技人员汇总表_姓名重复已标注.xlsx

Python

python
import openpyxl, os, re, pandas as pd, numpy as np, collections, math, datetime, statistics
path="/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复已标注.xlsx"
wb=openpyxl.load_workbook(path)
wb.sheetnames


['Sheet1', '重复姓名汇总']

Python

python
ws=wb['Sheet1']
# find header row and name column indices containing "姓名"
header_candidates=[]
for r in range(1,11):
    row=[ws.cell(r,c).value for c in range(1,ws.max_column+1)]
    if row and any(isinstance(v,str) and "姓名" in v for v in row):
        header_candidates.append(r)
header_candidates[:5], ws.max_row, ws.max_column


([2], 159, 14)

Python

python
header_row=2
headers={}
for c in range(1,ws.max_column+1):
    v=ws.cell(header_row,c).value
    headers[c]=v
headers


{1: '序号',
 2: '单位',
 3: '部门、车间',
 4: '姓名',
 5: '性别',
 6: '身份证号',
 7: '职务（工种）',
 8: '参加工作时间',
 9: '毕业院校',
 10: '专业',
 11: '学历\n（文化程度）',
 12: '职称\n（技术等级）',
 13: '人员分类\n（干/工）',
 14: '参与研发项目'}

Python

python
name_col=4
names=[]
for r in range(header_row+1, ws.max_row+1):
    v=ws.cell(r,name_col).value
    if v is None:
        continue
    if isinstance(v,str):
        norm=re.sub(r"\s+","",v)  # remove whitespace
    else:
        norm=str(v).strip()
    names.append((r,v,norm))
len(names), names[:5], names[-5:]


(157,
 [(3, '路  严', '路严'),
  (4, '崔恒庆', '崔恒庆'),
  (5, '杨兴坤', '杨兴坤'),
  (6, '王  瑶', '王瑶'),
  (7, '张永堂', '张永堂')],
 [(155, '张濮显', '张濮显'),
  (156, '董  旭', '董旭'),
  (157, '庄添隆', '庄添隆'),
  (158, '夏长林', '夏长林'),
  (159, '岳增光', '岳增光')])

Python

python
from collections import defaultdict, Counter
counter=Counter(norm for _,_,norm in names if norm)
dupes=[n for n,c in counter.items() if c>1]
len(dupes), dupes[:20]


(2, ['路严', '王瑶'])

Python

python
dup_info={n:[r for r,_,norm in names if norm==n] for n in dupes}
dup_info


{'路严': [3, 110], '王瑶': [6, 112]}

Python

python
from openpyxl.styles.colors import Color
def cell_style_sig(cell):
    fill=cell.fill
    font=cell.font
    return {
        "fill_type": fill.patternType,
        "fill_fg": fill.fgColor.rgb if fill.fgColor else None,
        "fill_bg": fill.bgColor.rgb if fill.bgColor else None,
        "font_color": font.color.rgb if font.color else None,
        "font_bold": font.bold,
        "font_italic": font.italic,
        "font_underline": font.underline,
    }
for name, rows in dup_info.items():
    for r in rows:
        cell=ws.cell(r,name_col)
        print(name,r,cell.value,cell_style_sig(cell))

Python

python
ws2=wb['重复姓名汇总']
ws2.max_row, ws2.max_column


(64, 5)

Python

python
# view header row
for r in range(1,6):
    print(r,[ws2.cell(r,c).value for c in range(1,6)])

Python

python
# list max of serial number values
serials=[]
for r in range(3, ws.max_row+1):
    v=ws.cell(r,1).value
    if v is not None:
        try:
            serials.append(int(str(v).strip()))
        except:
            pass
max(serials), min(serials), len(serials), serials[:10], serials[-10:]


(157,
 1,
 157,
 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10],
 [148, 149, 150, 151, 152, 153, 154, 155, 156, 157])

Python

python
def row_values(r):
    return [ws.cell(r,c).value for c in range(1,ws.max_column+1)]
for r in [3,110,6,112]:
    print(r,row_values(r))

Python

python
# search for a specific from ws2 first row
target="于洪涛"
found=[]
for r in range(1,ws.max_row+1):
    for c in range(1,ws.max_column+1):
        v=ws.cell(r,c).value
        if isinstance(v,str) and target in v:
            found.append((r,c,v))
found[:10], len(found)


([(46, 4, '于洪涛')], 1)

Python

python
len(ws.merged_cells.ranges), list(ws.merged_cells.ranges)[:10]


(1, [<MergedCellRange A1:M1>])

Python

python
cf = ws.conditional_formatting
len(cf), list(cf.cf_rules)[:5] if hasattr(cf,'cf_rules') else None


(0, None)

Python

python
# scan name column for non-default style (fill patternType not None or font color not None or bold)
nondefault=[]
for r in range(3, ws.max_row+1):
    cell=ws.cell(r,name_col)
    if (cell.fill.patternType is not None and cell.fill.patternType!='none') or (cell.font.color is not None) or (cell.font.bold):
        nondefault.append((r, cell.value, cell.fill.patternType, cell.fill.fgColor.rgb if cell.fill.fgColor else None, cell.font.color.rgb if cell.font.color else None, cell.font.bold))
nondefault[:10], len(nondefault)


([], 0)

Python

python
blank_rows=[]
for r in range(3, ws.max_row+1):
    v=ws.cell(r,name_col).value
    if v is None or (isinstance(v,str) and v.strip()== ""):
        blank_rows.append(r)
len(blank_rows), blank_rows[:20]


(0, [])

Python

python
import pathlib, textwrap, json, os, re, pandas as pd
skill_path="/home/oai/skills/spreadsheets/skill.md"
print(open(skill_path,'r',encoding='utf-8').read()[:2000])

Python

python
print(open(skill_path,'r',encoding='utf-8').read()[2000:4000])

Python

python
import openpyxl
from openpyxl.styles import PatternFill, Font
from openpyxl.comments import Comment
path="/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复已标注.xlsx"
wb=openpyxl.load_workbook(path)
ws=wb['Sheet1']
header_row=2
name_col=4
# Build list
names=[]
for r in range(header_row+1, ws.max_row+1):
    v=ws.cell(r,name_col).value
    if v is None: 
        continue
    if isinstance(v,str):
        norm=re.sub(r"[\s\u3000]+","",v)  # remove normal and full-width spaces
    else:
        norm=str(v).strip()
    names.append((r,v,norm))
counter=Counter(norm for _,_,norm in names if norm)
dupes=[n for n,c in counter.items() if c>1]
dup_rows={n:[r for r,_,norm in names if norm==n] for n in dupes}
dup_rows


{'路严': [3, 110], '王瑶': [6, 112]}

Python

python
from copy import deepcopy
dup_fill=PatternFill(patternType="solid", fgColor="FFFFC7CE")  # light red
dup_font_color="FF9C0006"  # dark red
for name, rows in dup_rows.items():
    # serial numbers
    serials=[ws.cell(r,1).value for r in rows]
    for r in rows:
        cell=ws.cell(r,name_col)
        # apply fill
        cell.fill=dup_fill
        # apply font color while retaining other settings
        f=cell.font
        # copy font with color and bold
        new_font=Font(name=f.name, sz=f.sz, b=True, i=f.i, u=f.u, strike=f.strike, color=dup_font_color,
                      vertAlign=f.vertAlign, outline=f.outline, shadow=f.shadow, charset=f.charset, family=f.family)
        cell.font=new_font
        # add comment
        other=[rr for rr in rows if rr!=r]
        other_serial=[ws.cell(rr,1).value for rr in other]
        text=f"姓名重复（去空格后）：{name}\n本行：序号{ws.cell(r,1).value}，行号{r}\n其他位置：序号{ '、'.join(str(s) for s in other_serial)}，行号{ '、'.join(str(x) for x in other)}"
        # Avoid overwriting existing comment
        if cell.comment is None:
            cell.comment=Comment(text, "自动检查")
        else:
            cell.comment.text = cell.comment.text + "\n\n" + text
# create new summary sheet
summary_title="重复姓名汇总_自动检查"
if summary_title in wb.sheetnames:
    del wb[summary_title]
ws_sum=wb.create_sheet(summary_title)
headers=["姓名(去空格)", "出现次数", "序号列表", "行号列表", "姓名原样(示例)"]
for c,h in enumerate(headers,1):
    ws_sum.cell(1,c,value=h)
# add rows
rowi=2
for name in sorted(dupes):
    rows=dup_rows[name]
    serials=[ws.cell(r,1).value for r in rows]
    example=next((ws.cell(r,name_col).value for r in rows), name)
    ws_sum.cell(rowi,1,value=name)
    ws_sum.cell(rowi,2,value=len(rows))
    ws_sum.cell(rowi,3,value="、".join(str(s) for s in serials))
    ws_sum.cell(rowi,4,value="、".join(str(r) for r in rows))
    ws_sum.cell(rowi,5,value=example)
    rowi+=1
# style header bold and freeze
from openpyxl.styles import Alignment
for c in range(1,len(headers)+1):
    cell=ws_sum.cell(1,c)
    cell.font=Font(b=True)
    cell.alignment=Alignment(horizontal="center", vertical="center")
ws_sum.freeze_panes="A2"
# auto width
for col in range(1, len(headers)+1):
    maxlen=0
    for r in range(1, rowi):
        v=ws_sum.cell(r,col).value
        if v is None: continue
        l=len(str(v))
        if l>maxlen: maxlen=l
    ws_sum.column_dimensions[openpyxl.utils.get_column_letter(col)].width=min(max(10, maxlen+2), 40)

# Save
out_path="/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复复核标注.xlsx"
wb.save(out_path)
out_path


'/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复复核标注.xlsx'

Python

python
from artifact_tool import SpreadsheetArtifact
artifact = SpreadsheetArtifact.load(out_path)
# Recalculate
artifact.recalculate()
render_paths = artifact.render(output_dir="/mnt/data/rendered_dupes_check")
render_paths

Python

python
api_path="/home/oai/skills/spreadsheets/artifact_tool_spreadsheets_api.md"
print(open(api_path,'r',encoding='utf-8').read()[:2000])

Python

python
text=open(api_path,'r',encoding='utf-8').read()
import re
m=re.search(r"render", text)
m.start() if m else None


181

Python

python
idx=[i for i,line in enumerate(text.splitlines()) if "render" in line]
idx[:20], len(idx)


([3, 322, 526, 701, 1005], 5)

Python

python
for i in idx:
    print(i, text.splitlines()[i][:120])

Python

python
start=300; end=370
print("\n".join(text.splitlines()[start:end]))

Python

python
from artifact_tool import SpreadsheetArtifact
artifact=SpreadsheetArtifact.load(out_path)
artifact.recalculate()
paths=artifact.render(output="/mnt/data/rendered_dupes_check", sheet_name="Sheet1", include_headers=False, scale=1.2)
paths

Python

python
artifact=SpreadsheetArtifact.load(out_path)
artifact.recalculate()
paths1=artifact.render(output="/mnt/data/render_check_sheet1_top.png", sheet_name="Sheet1", cell_range="A1:N20", include_headers=False, scale=1.5)
paths1

Python

python
import os, pathlib
outdir="/mnt/data/rendered_dupes_check"
os.makedirs(outdir, exist_ok=True)
artifact=SpreadsheetArtifact.load(out_path)
artifact.recalculate()
paths1=artifact.render(output=outdir, sheet_name="Sheet1", cell_range="A1:N20", include_headers=False, scale=1.5)
paths1


(PosixPath('/mnt/data/rendered_dupes_check/Sheet1__range-A1-N20-no-header.png'),)

Python

python
paths2=artifact.render(output=outdir, sheet_name="Sheet1", cell_range="A104:N116", include_headers=False, scale=1.5)
paths2


(PosixPath('/mnt/data/rendered_dupes_check/Sheet1__range-A104-N116-no-header.png'),)

Python

python
paths3=artifact.render(output=outdir, sheet_name="重复姓名汇总_自动检查", cell_range="A1:E10", include_headers=False, scale=1.6)
paths3


(PosixPath('/mnt/data/rendered_dupes_check/_____range-A1-E10-no-header.png'),)

Python

python
artifact=SpreadsheetArtifact.load(out_path)
artifact.recalculate()
exported=artifact.export(filename=out_path, overwrite=True)
exported, os.path.getsize(out_path)


(PosixPath('/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复复核标注.xlsx'), 28368)

Pensiero

artifact=SpreadsheetArtifact.load(out_path) artifact.recalculate() exported=artifact.export(filename=out_path, overwrite=True) exported, os.path.getsize(out_path)

(PosixPath('/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复复核标注.xlsx'), 28368)

Thought for 8m 0s

Risposta

经核查（按“姓名”列去空格后比对），仍存在姓名重复，共 2 个姓名各重复 2 次：

路严：序号 1、108（行号 3、110）
王瑶：序号 4、110（行号 6、112）

我已在 Sheet1 的“姓名”单元格中对以上重复姓名进行了红底标注，并为对应单元格添加了备注说明；同时新增了工作表 “重复姓名汇总_自动检查” 汇总重复情况与位置。

下载已复核并标注的文件

Domanda

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Python

Pensiero

Risposta

Condividi questa Q&A