Python+OpenCV实战之实现文档扫描

2026-02-27 12:54:06

1.效果展示

网络摄像头扫描：

图片扫描：

最终扫描保存的图片：

（视频）

（图片）

2.项目准备

今天的项目文件只需要两个.py文件，其中一个.py文件是已经写好的函数，你将直接使用它，我不会在此多做讲解，因为我们将会在主要的.py文件import 导入它，如果想了解其中函数是如何写的，请自行学习。

utlis.py，需要添加的.py文件

import cv2
import numpy as np

# TO STACK ALL THE IMAGES IN ONE WINDOW
def stackImages(imgArray,scale,lables=[]):
    rows = len(imgArray)
    cols = len(imgArray[0])
    rowsAvailable = isinstance(imgArray[0], list)
    width = imgArray[0][0].shape[1]
    height = imgArray[0][0].shape[0]
    if rowsAvailable:
        for x in range ( 0, rows):
            for y in range(0, cols):
                imgArray[x][y] = cv2.resize(imgArray[x][y], (0, 0), None, scale, scale)
                if len(imgArray[x][y].shape) == 2: imgArray[x][y]= cv2.cvtColor( imgArray[x][y], cv2.COLOR_GRAY2BGR)
        imageBlank = np.zeros((height, width, 3), np.uint8)
        hor = [imageBlank]*rows
        hor_con = [imageBlank]*rows
        for x in range(0, rows):
            hor[x] = np.hstack(imgArray[x])
            hor_con[x] = np.concatenate(imgArray[x])
        ver = np.vstack(hor)
        ver_con = np.concatenate(hor)
    else:
        for x in range(0, rows):
            imgArray[x] = cv2.resize(imgArray[x], (0, 0), None, scale, scale)
            if len(imgArray[x].shape) == 2: imgArray[x] = cv2.cvtColor(imgArray[x], cv2.COLOR_GRAY2BGR)
        hor= np.hstack(imgArray)
        hor_con= np.concatenate(imgArray)
        ver = hor
    if len(lables) != 0:
        eachImgWidth= int(ver.shape[1] / cols)
        eachImgHeight = int(ver.shape[0] / rows)
        print(eachImgHeight)
        for d in range(0, rows):
            for c in range (0,cols):
                cv2.rectangle(ver,(c*eachImgWidth,eachImgHeight*d),(c*eachImgWidth+len(lables[d][c])*13+27,30+eachImgHeight*d),(255,255,255),cv2.FILLED)
                cv2.putText(ver,lables[d][c],(eachImgWidth*c+10,eachImgHeight*d+20),cv2.FONT_HERSHEY_COMPLEX,0.7,(255,0,255),2)
    return ver

def reorder(myPoints):

    myPoints = myPoints.reshape((4, 2))
    myPointsNew = np.zeros((4, 1, 2), dtype=np.int32)
    add = myPoints.sum(1)

    myPointsNew[0] = myPoints[np.argmin(add)]
    myPointsNew[3] =myPoints[np.argmax(add)]
    diff = np.diff(myPoints, axis=1)
    myPointsNew[1] =myPoints[np.argmin(diff)]
    myPointsNew[2] = myPoints[np.argmax(diff)]

    return myPointsNew

def biggestContour(contours):
    biggest = np.array([])
    max_area = 0
    for i in contours:
        area = cv2.contourArea(i)
        if area > 5000:
            peri = cv2.arcLength(i, True)
            approx = cv2.approxPolyDP(i, 0.02 * peri, True)
            if area > max_area and len(approx) == 4:
                biggest = approx
                max_area = area
    return biggest,max_area
def drawRectangle(img,biggest,thickness):
    cv2.line(img, (biggest[0][0][0], biggest[0][0][1]), (biggest[1][0][0], biggest[1][0][1]), (0, 255, 0), thickness)
    cv2.line(img, (biggest[0][0][0], biggest[0][0][1]), (biggest[2][0][0], biggest[2][0][1]), (0, 255, 0), thickness)
    cv2.line(img, (biggest[3][0][0], biggest[3][0][1]), (biggest[2][0][0], biggest[2][0][1]), (0, 255, 0), thickness)
    cv2.line(img, (biggest[3][0][0], biggest[3][0][1]), (biggest[1][0][0], biggest[1][0][1]), (0, 255, 0), thickness)

    return img

def nothing(x):
    pass

def initializeTrackbars(intialTracbarVals=0):
    cv2.namedWindow("Trackbars")
    cv2.resizeWindow("Trackbars", 360, 240)
    cv2.createTrackbar("Threshold1", "Trackbars", 200,255, nothing)
    cv2.createTrackbar("Threshold2", "Trackbars", 200, 255, nothing)

def valTrackbars():
    Threshold1 = cv2.getTrackbarPos("Threshold1", "Trackbars")
    Threshold2 = cv2.getTrackbarPos("Threshold2", "Trackbars")
    src = Threshold1,Threshold2
    return src

3.代码的讲解与展示

import cv2
import numpy as np
import utlis

########################################################################
webCamFeed = True                                                      #
pathImage = "1.jpg"                                                    #
cap = cv2.VideoCapture(1)                                              #
cap.set(10,160)                                                        #
heightImg = 640                                                        #
widthImg  = 480                                                        #
########################################################################

utlis.initializeTrackbars()
count=0

while True:

    if webCamFeed:
        ret, img = cap.read()
    else:
        img = cv2.imread(pathImage)
    img = cv2.resize(img, (widthImg, heightImg))
    imgBlank = np.zeros((heightImg,widthImg, 3), np.uint8)
    imgGray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    imgBlur = cv2.GaussianBlur(imgGray, (5, 5), 1) # 添加高斯模糊
    thres=utlis.valTrackbars() #获取阈值的轨迹栏值
    imgThreshold = cv2.Canny(imgBlur,thres[0],thres[1]) # 应用CANNY模糊
    kernel = np.ones((5, 5))
    imgDial = cv2.dilate(imgThreshold, kernel, iterations=2)
    imgThreshold = cv2.erode(imgDial, kernel, iterations=1)  

    # 查找所有轮廓
    imgContours = img.copy()
    imgBigContour = img.copy()
    contours, hierarchy = cv2.findContours(imgThreshold, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE) # FIND ALL CONTOURS
    cv2.drawContours(imgContours, contours, -1, (0, 255, 0), 10) # 绘制所有检测到的轮廓

    # 找到最大的轮廓
    biggest, maxArea = utlis.biggestContour(contours) # 找到最大的轮廓
    if biggest.size != 0:
        biggest=utlis.reorder(biggest)
        cv2.drawContours(imgBigContour, biggest, -1, (0, 255, 0), 20) # 画最大的轮廓
        imgBigContour = utlis.drawRectangle(imgBigContour,biggest,2)
        pts1 = np.float32(biggest) # 为扭曲准备点
        pts2 = np.float32([[0, 0],[widthImg, 0], [0, heightImg],[widthImg, heightImg]]) # 为扭曲准备点
        matrix = cv2.getPerspectiveTransform(pts1, pts2)
        imgWarpColored = cv2.warpPerspective(img, matrix, (widthImg, heightImg))

        #从每侧移除20个像素
        imgWarpColored=imgWarpColored[20:imgWarpColored.shape[0] - 20, 20:imgWarpColored.shape[1] - 20]
        imgWarpColored = cv2.resize(imgWarpColored,(widthImg,heightImg))

        # 应用自适应阈值
        imgWarpGray = cv2.cvtColor(imgWarpColored,cv2.COLOR_BGR2GRAY)
        imgAdaptiveThre= cv2.adaptiveThreshold(imgWarpGray, 255, 1, 1, 7, 2)
        imgAdaptiveThre = cv2.bitwise_not(imgAdaptiveThre)
        imgAdaptiveThre=cv2.medianBlur(imgAdaptiveThre,3)

        # 用于显示的图像阵列
        imageArray = ([img,imgGray,imgThreshold,imgContours],
                      [imgBigContour,imgWarpColored, imgWarpGray,imgAdaptiveThre])

    else:
        imageArray = ([img,imgGray,imgThreshold,imgContours],
                      [imgBlank, imgBlank, imgBlank, imgBlank])

    # 显示标签
    lables = [["Original","Gray","Threshold","Contours"],
              ["Biggest Contour","Warp Prespective","Warp Gray","Adaptive Threshold"]]

    stackedImage = utlis.stackImages(imageArray,0.75,lables)
    cv2.imshow("Result",stackedImage)

    # 按下“s”键时保存图像
    if cv2.waitKey(1) & 0xFF == ord('s'):
        cv2.imwrite("Scanned/myImage"+str(count)+".jpg",imgWarpColored)
        cv2.rectangle(stackedImage, ((int(stackedImage.shape[1] / 2) - 230), int(stackedImage.shape[0] / 2) + 50),
                      (1100, 350), (0, 255, 0), cv2.FILLED)
        cv2.putText(stackedImage, "Scan Saved", (int(stackedImage.shape[1] / 2) - 200, int(stackedImage.shape[0] / 2)),
                    cv2.FONT_HERSHEY_DUPLEX, 3, (0, 0, 255), 5, cv2.LINE_AA)
        cv2.imshow('Result', stackedImage)
        cv2.waitKey(300)
        count += 1
    elif cv2.waitKey(1) & 0xFF == 27:
        break

今天需要要讲解的还是主函数Main.py，由我来讲解，其实我也有点压力，因为这个项目它涉及了Opencv核心知识点，有的地方我也需要去查找，因为学久必会忘，更何况我也是刚刚起步的阶段，所以我会尽我所能的去讲清楚。

注意：我是以网络摄像头为例，读取图片的方式，同理可得。

首先，请看＃号框内，我们将从这里开始起，设立变量webCamFeed，用其表示是否打开摄像头，接着亮度，宽，高的赋值。utlis.initializeTrackbars()是utlis.py文件当中的轨迹栏初始化函数。
然后，我们依次对图像进行大小调整、灰度图像、高斯模糊、Canny边缘检测、扩张、侵蚀。
之后，找出图像可以检测的所有轮廓，并找到最大的轮廓并且画出来，同时要为扫描到的文档找到四个顶点，也就是扭曲点，用cv2.getPerspectiveTransform()函数找到点的坐标，用cv2.warpPerspective()函数输出图像，如果到了这一步，我们去运行一下会发现有边角是桌子的颜色但并没有很多，所以我们需要从每侧移除20个像素，应用自适应阈值让图像变得较为清晰——黑色的文字更加的明显。
接着，配置utlis.stackImages()需要的参数——图像（列表的形式），规模，标签（列表的形式，可以不用标签，程序一样可以正确运行），展示窗口。
最后，如果你觉得比较满意，按下s键，即可保存，并在图中央出现有"Scan Saved"的矩形框。点击Esc键即可退出程序。

4.项目资源

GitHUb：Opencv-project-training/Opencv project training/06 Document Scanner at main · Auorui/Opencv-project-training · GitHub

5.项目总结与评价

它是一个很好的项目，要知道我们要实现这种效果，即修正文档，还得清晰，要么有VIP，兑换积分，看广告等。如果你发现扫描的文档不清晰，请修改合适的分辨率。以我个人来看，它的实用性很高。本来今天是想要做人脸识别的项目的，但后面我一直没有解决下载几个包错误的问题（现在已经解决），文档扫描是明天的项目，今天是赶着做好的，那么希望你在今天的项目中玩得开心！

到此这篇关于Python+OpenCV实战之实现文档扫描的文章就介绍到这了,更多相关Python OpenCV文档扫描内容请搜索我们以前的文章或继续浏览下面的相关文章希望大家以后多多支持我们！

Python+Opencv实现物体尺寸测量的方法详解

目录 1.效果展示 2.项目介绍 3.项目搭建 4.utils.py文件代码展示与讲解 5.项目代码展示与讲解 6.项目资源 7.项目总结 1.效果展示我们将以两种方式来展示我们这个项目的效果. 下面这是视频的实时检测,我分别用了盒子和盖子来检测,按理来说效果不应该怎么差的,但我实在没有找到合适的背景与物体.且我的摄像头使用的是外设,我不得不手持,所以存在一点点的抖动,但我可以保证,它是缺少了适合检测物体与背景. 我使用手机拍了一张照片并经过了ps修改了背景,效果不错. 2.项目介绍本项目中
Python+OpenCV实战之拖拽虚拟方块的实现

目录一.项目效果二.核心流程三.代码流程 1. 读取摄像头视频,画矩形 2. 导入mediapipe处理手指坐标 3. 位置计算完整代码一.项目效果学校宿舍今天搬家,累麻了,突然发现展示处理的也很粗糙,就这样吧嘿嘿~~~ 二.核心流程 1.openCV读取视频流.在每一帧图片上画一个矩形. 2.使用mediapipe获取手指关键点坐标. 3.根据手指坐标位置和矩形的坐标位置,判断手指点是否在矩形上,如果在则矩形跟随手指移动. 三.代码流程环境准备: python: 3.8.8 op
Python Opencv实战之文字检测OCR

目录 1.相关函数的讲解 2.代码展示 Detecting Words Detecting ONLY Digits 3.问题叙述 4.image_to_data()配置讲解 5.项目拓展 6.总结与评价 1.相关函数的讲解 image_to_data()的输出结果是表格形式,输出变量的类型依旧是字符串. 你会得到一个这样的列表['level', 'page_num', 'block_num', 'par_num', 'line_num', 'word_num', 'left', 'top', '
Python+OpenCV实战之实现文档扫描

目录 1.效果展示 2.项目准备 3.代码的讲解与展示 4.项目资源 5.项目总结与评价 1.效果展示网络摄像头扫描: 图片扫描: 最终扫描保存的图片: (视频) (图片) 2.项目准备今天的项目文件只需要两个.py文件,其中一个.py文件是已经写好的函数,你将直接使用它,我不会在此多做讲解,因为我们将会在主要的.py文件import 导入它,如果想了解其中函数是如何写的,请自行学习. utlis.py,需要添加的.py文件 import cv2 import numpy as np # T
python编写暴力破解zip文档程序的实例讲解

编写暴力破解Zip文件要从学习zipfile库的使用方法入手,首先打开Python解释器,用help('zipfile')命令来了解这个库并重点看一下ZipFile类中的extractall()这个方法 ZipFile extractall() 让我们来写一个脚本测试一下Zip文件库的用法,首先创建一个有密码的压缩文件,这里设置密码为awd,文件名为evil.zip,要解压这个zip文件,实例化一个新的ZipFile类,使用extractall()方法,在可选参数pwd上填上密码运行后你会发现
Python pyinotify模块实现对文档的实时监控功能方法

0x01 安装pyinotify >>> pip install pyinotify >>> import pyinotify 0x02 实现对文档的试试监控功能这个功能类似与Ubuntu里的rail -f功能,在对目标文件进行修改时,脚本可以实时监控并将新的修改打印出来. import pyinotify import time import os class ProcessTransientFile(pyinotify.ProcessEvent): def pro
python爬虫批量下载zabbix文档代码实例

这篇文章主要介绍了python爬虫批量下载zabbix文档代码实例,文中通过示例代码介绍的非常详细,对大家的学习或者工作具有一定的参考学习价值,需要的朋友可以参考下 # -*- coding: UTF-8 -*- import requests,re,time url = 'https://www.zabbix.com/documentation/3.4/zh/manual' base_url = 'https://www.zabbix.com/documentation/3.4/' seco
python实现的生成word文档功能示例

本文实例讲述了python实现的生成word文档功能.分享给大家供大家参考,具体如下: 每月1次的测试费用报销,需要做一个文档.干脆花点时间写个程序吧. # -*- coding: utf-8 -*- from tools import get_data from docx import Document def new_doc(fee_data,doc_path,fee):#新建一个word文档,写入汇总表的数据 document = Document() p_total = document
python 使用pdfminer3k 读取PDF文档的例子

1.安装 pdfminer3k 通过pip安装: pip install pdfminer3k 下载安装:在网页 https://pypi.org/project/pdfminer3k/1.3.1/#files 进行下载,解压.然后cmd命令进入到当前文件夹: 可以直接在资源管理器的路径栏直接输入cmd进入到当前目录.然后执行 python setup.py install 等待安装完成 2.读取pdf中的TXT代码示例: from pdfminer.converter import PDFPa
Python简单读写Xls格式文档的方法示例

本文实例讲述了Python简单读写Xls格式文档的方法.分享给大家供大家参考,具体如下: 1. 模块安装使用pip install命令安装, 即: pip install xlrd pip install xlwt 如下图: 2. python 代码 import xlrd import xlwt import datetime def set_style(name,height,format,bold=False): style = xlwt.XFStyle() if format.stri
Ubuntu下使用python读取doc和docx文档的内容方法

读取docx文档使用的包是python-docx 1. 安装python-docx包 sudo pip install python-docx 2. 使用python-docx包读取数据 #encoding:utf8 import docx doc = docx.Document('test.docx') docText = '\n'.join([paragraph.text for paragraph in doc.paragraphs]) #print(docText) python-do
python错误调试及单元文档测试过程解析

这篇文章主要介绍了python错误调试及单元文档测试过程解析,文中通过示例代码介绍的非常详细,对大家的学习或者工作具有一定的参考学习价值,需要的朋友可以参考下错误分为程序的错误和由用户错误的输入引起的错误,此外还有因为各种各样意外的情况导致的错误,比如在磁盘满的时候写入.从网络爬取东西的时候,网络断了.这类错误称为异常错误处理普通的错误处理机制就是在出错的时候返回一个错误代码,但是这样十分不方便,一是因为错误码是和正常结果一样的方式返回的,判断起来十分不方便,二是错误还需要一级一级的向上报