Распознать текст и выдать в ворд

創建於:2026年3月24日

使用 GPT-5.4 Thinking 回答 Chat01

問題

IMG_0962.jpeg
IMG_0961.jpeg
IMG_0960.jpeg

Распознать текст и выдать в ворд

Python

python
from PIL import Image for fn in ['/mnt/data/IMG_0960.jpeg','/mnt/data/IMG_0961.jpeg','/mnt/data/IMG_0962.jpeg']: img=Image.open(fn) print(fn, img.size)

Python

python
from PIL import Image, ImageEnhance, ImageFilter import os, math for fn in ['/mnt/data/IMG_0960.jpeg','/mnt/data/IMG_0961.jpeg','/mnt/data/IMG_0962.jpeg']: img=Image.open(fn) # enlarge 2x img2=img.resize((img.width*2,img.height*2), Image.Resampling.LANCZOS) img2 = img2.filter(ImageFilter.SHARPEN) base=os.path.splitext(os.path.basename(fn))[0] out=f'/mnt/data/{base}_2x.png' img2.save(out) print(out) # split vertically into thirds h=img2.height for i in range(3): crop=img2.crop((0, i*h//3, img2.width, (i+1)*h//3)) crop.save(f'/mnt/data/{base}_part{i+1}.png

Python

python
from PIL import Image, ImageEnhance, ImageFilter import os, math for fn in ['/mnt/data/IMG_0960.jpeg','/mnt/data/IMG_0961.jpeg','/mnt/data/IMG_0962.jpeg']: img=Image.open(fn) # enlarge 2x img2=img.resize((img.width*2,img.height*2), Image.Resampling.LANCZOS) img2 = img2.filter(ImageFilter.SHARPEN) base=os.path.splitext(os.path.basename(fn))[0] out=f'/mnt/data/{base}_2x.png' img2.save(out) print(out) # split vertically into thirds h=img2.height for i in range(3): crop=img2.crop((0, i*h//3, img2.width, (i+1)*h//3)) crop.save(f'/mnt/data/{base}_part{i+1}.png <<ImageDisplayed>>

Python

python
from PIL import Image, ImageEnhance, ImageFilter import os, math for fn in ['/mnt/data/IMG_0960.jpeg','/mnt/data/IMG_0961.jpeg','/mnt/data/IMG_0962.jpeg']: img=Image.open(fn) # enlarge 2x img2=img.resize((img.width*2,img.height*2), Image.Resampling.LANCZOS) img2 = img2.filter(ImageFilter.SHARPEN) base=os.path.splitext(os.path.basename(fn))[0] out=f'/mnt/data/{base}_2x.png' img2.save(out) print(out) # split vertically into thirds h=img2.height for i in range(3): crop=img2.crop((0, i*h//3, img2.width, (i+1)*h//3)) crop.save(f'/mnt/data/{base}_part{i+1}.png <<ImageDisplayed>> <<ImageDisplayed>>

Python

python
from PIL import Image, ImageEnhance, ImageFilter import os, math for fn in ['/mnt/data/IMG_0960.jpeg','/mnt/data/IMG_0961.jpeg','/mnt/data/IMG_0962.jpeg']: img=Image.open(fn) # enlarge 2x img2=img.resize((img.width*2,img.height*2), Image.Resampling.LANCZOS) img2 = img2.filter(ImageFilter.SHARPEN) base=os.path.splitext(os.path.basename(fn))[0] out=f'/mnt/data/{base}_2x.png' img2.save(out) print(out) # split vertically into thirds h=img2.height for i in range(3): crop=img2.crop((0, i*h//3, img2.width, (i+1)*h//3)) crop.save(f'/mnt/data/{base}_part{i+1}.png <<ImageDisplayed>> <<ImageDisplayed>> <<ImageDisplayed>>

Python

python
import cv2, numpy as np, os, math from PIL import Image def order_points(pts): rect = np.zeros((4,2), dtype="float32") s = pts.sum(axis=1) rect[0] = pts[np.argmin(s)] rect[2] = pts[np.argmax(s)] diff = np.diff(pts, axis=1) rect[1] = pts[np.argmin(diff)] rect[3] = pts[np.argmax(diff)] return rect def four_point_transform(image, pts): rect = order_points(pts) (tl, tr, br, bl) = rect widthA = np.linalg.norm(br - bl) widthB = np.linalg.norm(tr - tl) maxWidth = max(int(widthA), int(widthB)) heightA = np.linalg.norm(tr - br) heightB = np.linalg.norm(tl - bl)

Python

python
from PIL import Image, ImageFilter, ImageEnhance, ImageOps crops = { 'IMG_0962': (140,110,640,910), 'IMG_0961': (140,150,650,930), 'IMG_0960': (150,160,650,930), } for base, box in crops.items(): img=Image.open(f'/mnt/data/{base}.jpeg').crop(box) img=img.resize((img.width*3, img.height*3), Image.Resampling.LANCZOS) gray=ImageOps.grayscale(img) gray=ImageEnhance.Contrast(gray).enhance(1.8) gray=gray.filter(ImageFilter.SHARPEN) out=f'/mnt/data/{base}_crop.png' gray.save(out) print(out, gray.size)

Python

python
from PIL import Image, ImageFilter, ImageEnhance, ImageOps crops = { 'IMG_0962': (140,110,640,910), 'IMG_0961': (140,150,650,930), 'IMG_0960': (150,160,650,930), } for base, box in crops.items(): img=Image.open(f'/mnt/data/{base}.jpeg').crop(box) img=img.resize((img.width*3, img.height*3), Image.Resampling.LANCZOS) gray=ImageOps.grayscale(img) gray=ImageEnhance.Contrast(gray).enhance(1.8) gray=gray.filter(ImageFilter.SHARPEN) out=f'/mnt/data/{base}_crop.png' gray.save(out) print(out, gray.size) <<ImageDisplayed>>

思考

Checking page 3

Page 2 looks good, so now it's time to review page 3. I'll make sure the formatting holds up, and the content is properly aligned. Once I'm sure everything is in place, I’ll get the document ready for the next steps!

Thought for 10m 25s

回答

分享這個問答