2025年度企业参与研发项目科技人员汇总表_姓名重复已标注中有无姓名重复?如果还有,请予以标注。

Creato il: 22 gennaio 2026

Risposto usando GPT-5.2 Thinking da Chat01

Domanda

2025年度企业参与研发项目科技人员汇总表_姓名重复已标注.xlsx

2025年度企业参与研发项目科技人员汇总表_姓名重复已标注中有无姓名重复?如果还有,请予以标注。

Python

python
import openpyxl, os, re, pandas as pd, numpy as np, collections, math, datetime, statistics path="/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复已标注.xlsx" wb=openpyxl.load_workbook(path) wb.sheetnames ['Sheet1', '重复姓名汇总']

Python

python
ws=wb['Sheet1'] # find header row and name column indices containing "姓名" header_candidates=[] for r in range(1,11): row=[ws.cell(r,c).value for c in range(1,ws.max_column+1)] if row and any(isinstance(v,str) and "姓名" in v for v in row): header_candidates.append(r) header_candidates[:5], ws.max_row, ws.max_column ([2], 159, 14)

Python

python
header_row=2 headers={} for c in range(1,ws.max_column+1): v=ws.cell(header_row,c).value headers[c]=v headers {1: '序号', 2: '单位', 3: '部门、车间', 4: '姓名', 5: '性别', 6: '身份证号', 7: '职务(工种)', 8: '参加工作时间', 9: '毕业院校', 10: '专业', 11: '学历\n(文化程度)', 12: '职称\n(技术等级)', 13: '人员分类\n(干/工)', 14: '参与研发项目'}

Python

python
name_col=4 names=[] for r in range(header_row+1, ws.max_row+1): v=ws.cell(r,name_col).value if v is None: continue if isinstance(v,str): norm=re.sub(r"\s+","",v) # remove whitespace else: norm=str(v).strip() names.append((r,v,norm)) len(names), names[:5], names[-5:] (157, [(3, '路 严', '路严'), (4, '崔恒庆', '崔恒庆'), (5, '杨兴坤', '杨兴坤'), (6, '王 瑶', '王瑶'), (7, '张永堂', '张永堂')], [(155, '张濮显', '张濮显'), (156, '董 旭', '董旭'), (157, '庄添隆', '庄添隆'), (158, '夏长林', '夏长林'), (159, '岳增光', '岳增光')])

Python

python
from collections import defaultdict, Counter counter=Counter(norm for _,_,norm in names if norm) dupes=[n for n,c in counter.items() if c>1] len(dupes), dupes[:20] (2, ['路严', '王瑶'])

Python

python
dup_info={n:[r for r,_,norm in names if norm==n] for n in dupes} dup_info {'路严': [3, 110], '王瑶': [6, 112]}

Python

python
from openpyxl.styles.colors import Color def cell_style_sig(cell): fill=cell.fill font=cell.font return { "fill_type": fill.patternType, "fill_fg": fill.fgColor.rgb if fill.fgColor else None, "fill_bg": fill.bgColor.rgb if fill.bgColor else None, "font_color": font.color.rgb if font.color else None, "font_bold": font.bold, "font_italic": font.italic, "font_underline": font.underline, } for name, rows in dup_info.items(): for r in rows: cell=ws.cell(r,name_col) print(name,r,cell.value,cell_style_sig(cell))

Python

python
ws2=wb['重复姓名汇总'] ws2.max_row, ws2.max_column (64, 5)

Python

python
# view header row for r in range(1,6): print(r,[ws2.cell(r,c).value for c in range(1,6)])

Python

python
# list max of serial number values serials=[] for r in range(3, ws.max_row+1): v=ws.cell(r,1).value if v is not None: try: serials.append(int(str(v).strip())) except: pass max(serials), min(serials), len(serials), serials[:10], serials[-10:] (157, 1, 157, [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], [148, 149, 150, 151, 152, 153, 154, 155, 156, 157])

Python

python
def row_values(r): return [ws.cell(r,c).value for c in range(1,ws.max_column+1)] for r in [3,110,6,112]: print(r,row_values(r))

Python

python
# search for a specific from ws2 first row target="于洪涛" found=[] for r in range(1,ws.max_row+1): for c in range(1,ws.max_column+1): v=ws.cell(r,c).value if isinstance(v,str) and target in v: found.append((r,c,v)) found[:10], len(found) ([(46, 4, '于洪涛')], 1)

Python

python
len(ws.merged_cells.ranges), list(ws.merged_cells.ranges)[:10] (1, [<MergedCellRange A1:M1>])

Python

python
cf = ws.conditional_formatting len(cf), list(cf.cf_rules)[:5] if hasattr(cf,'cf_rules') else None (0, None)

Python

python
# scan name column for non-default style (fill patternType not None or font color not None or bold) nondefault=[] for r in range(3, ws.max_row+1): cell=ws.cell(r,name_col) if (cell.fill.patternType is not None and cell.fill.patternType!='none') or (cell.font.color is not None) or (cell.font.bold): nondefault.append((r, cell.value, cell.fill.patternType, cell.fill.fgColor.rgb if cell.fill.fgColor else None, cell.font.color.rgb if cell.font.color else None, cell.font.bold)) nondefault[:10], len(nondefault) ([], 0)

Python

python
blank_rows=[] for r in range(3, ws.max_row+1): v=ws.cell(r,name_col).value if v is None or (isinstance(v,str) and v.strip()== ""): blank_rows.append(r) len(blank_rows), blank_rows[:20] (0, [])

Python

python
import pathlib, textwrap, json, os, re, pandas as pd skill_path="/home/oai/skills/spreadsheets/skill.md" print(open(skill_path,'r',encoding='utf-8').read()[:2000])

Python

python
print(open(skill_path,'r',encoding='utf-8').read()[2000:4000])

Python

python
import openpyxl from openpyxl.styles import PatternFill, Font from openpyxl.comments import Comment path="/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复已标注.xlsx" wb=openpyxl.load_workbook(path) ws=wb['Sheet1'] header_row=2 name_col=4 # Build list names=[] for r in range(header_row+1, ws.max_row+1): v=ws.cell(r,name_col).value if v is None: continue if isinstance(v,str): norm=re.sub(r"[\s\u3000]+","",v) # remove normal and full-width spaces else: norm=str(v).strip() names.append((r,v,norm)) counter=Counter(norm for _,_,norm in names if norm) dupes=[n for n,c in counter.items() if c>1] dup_rows={n:[r for r,_,norm in names if norm==n] for n in dupes} dup_rows {'路严': [3, 110], '王瑶': [6, 112]}

Python

python
from copy import deepcopy dup_fill=PatternFill(patternType="solid", fgColor="FFFFC7CE") # light red dup_font_color="FF9C0006" # dark red for name, rows in dup_rows.items(): # serial numbers serials=[ws.cell(r,1).value for r in rows] for r in rows: cell=ws.cell(r,name_col) # apply fill cell.fill=dup_fill # apply font color while retaining other settings f=cell.font # copy font with color and bold new_font=Font(name=f.name, sz=f.sz, b=True, i=f.i, u=f.u, strike=f.strike, color=dup_font_color, vertAlign=f.vertAlign, outline=f.outline, shadow=f.shadow, charset=f.charset, family=f.family) cell.font=new_font # add comment other=[rr for rr in rows if rr!=r] other_serial=[ws.cell(rr,1).value for rr in other] text=f"姓名重复(去空格后):{name}\n本行:序号{ws.cell(r,1).value},行号{r}\n其他位置:序号{ '、'.join(str(s) for s in other_serial)},行号{ '、'.join(str(x) for x in other)}" # Avoid overwriting existing comment if cell.comment is None: cell.comment=Comment(text, "自动检查") else: cell.comment.text = cell.comment.text + "\n\n" + text # create new summary sheet summary_title="重复姓名汇总_自动检查" if summary_title in wb.sheetnames: del wb[summary_title] ws_sum=wb.create_sheet(summary_title) headers=["姓名(去空格)", "出现次数", "序号列表", "行号列表", "姓名原样(示例)"] for c,h in enumerate(headers,1): ws_sum.cell(1,c,value=h) # add rows rowi=2 for name in sorted(dupes): rows=dup_rows[name] serials=[ws.cell(r,1).value for r in rows] example=next((ws.cell(r,name_col).value for r in rows), name) ws_sum.cell(rowi,1,value=name) ws_sum.cell(rowi,2,value=len(rows)) ws_sum.cell(rowi,3,value="、".join(str(s) for s in serials)) ws_sum.cell(rowi,4,value="、".join(str(r) for r in rows)) ws_sum.cell(rowi,5,value=example) rowi+=1 # style header bold and freeze from openpyxl.styles import Alignment for c in range(1,len(headers)+1): cell=ws_sum.cell(1,c) cell.font=Font(b=True) cell.alignment=Alignment(horizontal="center", vertical="center") ws_sum.freeze_panes="A2" # auto width for col in range(1, len(headers)+1): maxlen=0 for r in range(1, rowi): v=ws_sum.cell(r,col).value if v is None: continue l=len(str(v)) if l>maxlen: maxlen=l ws_sum.column_dimensions[openpyxl.utils.get_column_letter(col)].width=min(max(10, maxlen+2), 40) # Save out_path="/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复复核标注.xlsx" wb.save(out_path) out_path '/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复复核标注.xlsx'

Python

python
from artifact_tool import SpreadsheetArtifact artifact = SpreadsheetArtifact.load(out_path) # Recalculate artifact.recalculate() render_paths = artifact.render(output_dir="/mnt/data/rendered_dupes_check") render_paths

Python

python
api_path="/home/oai/skills/spreadsheets/artifact_tool_spreadsheets_api.md" print(open(api_path,'r',encoding='utf-8').read()[:2000])

Python

python
text=open(api_path,'r',encoding='utf-8').read() import re m=re.search(r"render", text) m.start() if m else None 181

Python

python
idx=[i for i,line in enumerate(text.splitlines()) if "render" in line] idx[:20], len(idx) ([3, 322, 526, 701, 1005], 5)

Python

python
for i in idx: print(i, text.splitlines()[i][:120])

Python

python
start=300; end=370 print("\n".join(text.splitlines()[start:end]))

Python

python
from artifact_tool import SpreadsheetArtifact artifact=SpreadsheetArtifact.load(out_path) artifact.recalculate() paths=artifact.render(output="/mnt/data/rendered_dupes_check", sheet_name="Sheet1", include_headers=False, scale=1.2) paths

Python

python
artifact=SpreadsheetArtifact.load(out_path) artifact.recalculate() paths1=artifact.render(output="/mnt/data/render_check_sheet1_top.png", sheet_name="Sheet1", cell_range="A1:N20", include_headers=False, scale=1.5) paths1

Python

python
import os, pathlib outdir="/mnt/data/rendered_dupes_check" os.makedirs(outdir, exist_ok=True) artifact=SpreadsheetArtifact.load(out_path) artifact.recalculate() paths1=artifact.render(output=outdir, sheet_name="Sheet1", cell_range="A1:N20", include_headers=False, scale=1.5) paths1 (PosixPath('/mnt/data/rendered_dupes_check/Sheet1__range-A1-N20-no-header.png'),)

Python

python
paths2=artifact.render(output=outdir, sheet_name="Sheet1", cell_range="A104:N116", include_headers=False, scale=1.5) paths2 (PosixPath('/mnt/data/rendered_dupes_check/Sheet1__range-A104-N116-no-header.png'),)

Python

python
paths3=artifact.render(output=outdir, sheet_name="重复姓名汇总_自动检查", cell_range="A1:E10", include_headers=False, scale=1.6) paths3 (PosixPath('/mnt/data/rendered_dupes_check/_____range-A1-E10-no-header.png'),)

Python

python
artifact=SpreadsheetArtifact.load(out_path) artifact.recalculate() exported=artifact.export(filename=out_path, overwrite=True) exported, os.path.getsize(out_path) (PosixPath('/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复复核标注.xlsx'), 28368)

Pensiero

artifact=SpreadsheetArtifact.load(out_path) artifact.recalculate() exported=artifact.export(filename=out_path, overwrite=True) exported, os.path.getsize(out_path)

(PosixPath('/mnt/data/2025年度企业参与研发项目科技人员汇总表_姓名重复复核标注.xlsx'), 28368)

Thought for 8m 0s

Risposta

经核查(按“姓名”列去空格后比对),仍存在姓名重复,共 2 个姓名各重复 2 次:

  • 路严:序号 1、108(行号 3、110
  • 王瑶:序号 4、110(行号 6、112

我已在 Sheet1 的“姓名”单元格中对以上重复姓名进行了红底标注,并为对应单元格添加了备注说明;同时新增了工作表 “重复姓名汇总_自动检查” 汇总重复情况与位置。

下载已复核并标注的文件

Condividi questa Q&A