python3+PyQt5实现支持多线程的页面索引器应用程序

本文通过Python3+pyqt5实现了python Qt GUI 快速编程的19章的页面索引器应用程序例子。

/home/yrd/eric_workspace/chap19/walker_ans.py

#!/usr/bin/env python3

import codecs
import html.entities
import re
import sys
from PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt)

class Walker(QThread):
 finished = pyqtSignal(bool,int)
 indexed = pyqtSignal(str,int)
 COMMON_WORDS_THRESHOLD = 250
 MIN_WORD_LEN = 3
 MAX_WORD_LEN = 25
 INVALID_FIRST_OR_LAST = frozenset("0123456789_")
 STRIPHTML_RE = re.compile(r"<[^>]*?>", re.IGNORECASE|re.MULTILINE)
 ENTITY_RE = re.compile(r"&(\w+?);|&#(\d+?);")
 SPLIT_RE = re.compile(r"\W+", re.IGNORECASE|re.MULTILINE)

 def __init__(self, index, lock, files, filenamesForWords,
     commonWords, parent=None):
  super(Walker, self).__init__(parent)
  self.index = index
  self.lock = lock
  self.files = files
  self.filenamesForWords = filenamesForWords
  self.commonWords = commonWords
  self.stopped = False
  self.mutex = QMutex()
  self.completed = False

 def stop(self):
  try:
   self.mutex.lock()
   self.stopped = True
  finally:
   self.mutex.unlock()

 def isStopped(self):
  try:
   self.mutex.lock()
   return self.stopped
  finally:
   self.mutex.unlock()

 def run(self):
  self.processFiles()
  self.stop()
  self.finished.emit(self.completed,self.index)

 def processFiles(self):
  def unichrFromEntity(match):
   text = match.group(match.lastindex)
   if text.isdigit():
    return chr(int(text))
   u = html.entities.name2codepoint.get(text)
   return chr(u) if u is not None else ""

  for fname in self.files:
   if self.isStopped():
    return
   words = set()
   fh = None
   try:
    fh = codecs.open(fname, "r", "UTF8", "ignore")
    text = fh.read()
   except EnvironmentError as e:
    sys.stderr.write("Error: {0}\n".format(e))
    continue
   finally:
    if fh is not None:
     fh.close()
   if self.isStopped():
    return
   text = self.STRIPHTML_RE.sub("", text)
   text = self.ENTITY_RE.sub(unichrFromEntity, text)
   text = text.lower()
   for word in self.SPLIT_RE.split(text):
    if (self.MIN_WORD_LEN <= len(word) <=
     self.MAX_WORD_LEN and
     word[0] not in self.INVALID_FIRST_OR_LAST and
     word[-1] not in self.INVALID_FIRST_OR_LAST):
     try:
      self.lock.lockForRead()
      new = word not in self.commonWords
     finally:
      self.lock.unlock()
     if new:
      words.add(word)
   if self.isStopped():
    return
   for word in words:
    try:
     self.lock.lockForWrite()
     files = self.filenamesForWords[word]
     if len(files) > self.COMMON_WORDS_THRESHOLD:
      del self.filenamesForWords[word]
      self.commonWords.add(word)
     else:
      files.add(str(fname))
    finally:
     self.lock.unlock()
   self.indexed.emit(fname,self.index)
  self.completed = True

/home/yrd/eric_workspace/chap19/pageindexer_ans.pyw

#!/usr/bin/env python3

import collections
import os
import sys
from PyQt5.QtCore import (QDir, QReadWriteLock, QMutex,Qt)
from PyQt5.QtWidgets import (QApplication, QDialog, QFileDialog, QFrame,
        QHBoxLayout, QLCDNumber, QLabel, QLineEdit, QListWidget,
        QPushButton, QVBoxLayout)
import walker_ans as walker

def isAlive(qobj):
 import sip
 try:
  sip.unwrapinstance(qobj)
 except RuntimeError:
  return False
 return True

class Form(QDialog):

 def __init__(self, parent=None):
  super(Form, self).__init__(parent)

  self.mutex = QMutex()
  self.fileCount = 0
  self.filenamesForWords = collections.defaultdict(set)
  self.commonWords = set()
  self.lock = QReadWriteLock()
  self.path = QDir.homePath()
  pathLabel = QLabel("Indexing path:")
  self.pathLabel = QLabel()
  self.pathLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)
  self.pathButton = QPushButton("Set &Path...")
  self.pathButton.setAutoDefault(False)
  findLabel = QLabel("&Find word:")
  self.findEdit = QLineEdit()
  findLabel.setBuddy(self.findEdit)
  commonWordsLabel = QLabel("&Common words:")
  self.commonWordsListWidget = QListWidget()
  commonWordsLabel.setBuddy(self.commonWordsListWidget)
  filesLabel = QLabel("Files containing the &word:")
  self.filesListWidget = QListWidget()
  filesLabel.setBuddy(self.filesListWidget)
  filesIndexedLabel = QLabel("Files indexed")
  self.filesIndexedLCD = QLCDNumber()
  self.filesIndexedLCD.setSegmentStyle(QLCDNumber.Flat)
  wordsIndexedLabel = QLabel("Words indexed")
  self.wordsIndexedLCD = QLCDNumber()
  self.wordsIndexedLCD.setSegmentStyle(QLCDNumber.Flat)
  commonWordsLCDLabel = QLabel("Common words")
  self.commonWordsLCD = QLCDNumber()
  self.commonWordsLCD.setSegmentStyle(QLCDNumber.Flat)
  self.statusLabel = QLabel("Click the 'Set Path' "
         "button to start indexing")
  self.statusLabel.setFrameStyle(QFrame.StyledPanel|QFrame.Sunken)

  topLayout = QHBoxLayout()
  topLayout.addWidget(pathLabel)
  topLayout.addWidget(self.pathLabel, 1)
  topLayout.addWidget(self.pathButton)
  topLayout.addWidget(findLabel)
  topLayout.addWidget(self.findEdit, 1)
  leftLayout = QVBoxLayout()
  leftLayout.addWidget(filesLabel)
  leftLayout.addWidget(self.filesListWidget)
  rightLayout = QVBoxLayout()
  rightLayout.addWidget(commonWordsLabel)
  rightLayout.addWidget(self.commonWordsListWidget)
  middleLayout = QHBoxLayout()
  middleLayout.addLayout(leftLayout, 1)
  middleLayout.addLayout(rightLayout)
  bottomLayout = QHBoxLayout()
  bottomLayout.addWidget(filesIndexedLabel)
  bottomLayout.addWidget(self.filesIndexedLCD)
  bottomLayout.addWidget(wordsIndexedLabel)
  bottomLayout.addWidget(self.wordsIndexedLCD)
  bottomLayout.addWidget(commonWordsLCDLabel)
  bottomLayout.addWidget(self.commonWordsLCD)
  bottomLayout.addStretch()
  layout = QVBoxLayout()
  layout.addLayout(topLayout)
  layout.addLayout(middleLayout)
  layout.addLayout(bottomLayout)
  layout.addWidget(self.statusLabel)
  self.setLayout(layout)

  self.walkers = []
  self.completed = []
  self.pathButton.clicked.connect(self.setPath)
  self.findEdit.returnPressed.connect(self.find)
  self.setWindowTitle("Page Indexer")

 def stopWalkers(self):
  for walker in self.walkers:
   if isAlive(walker) and walker.isRunning():
    walker.stop()
  for walker in self.walkers:
   if isAlive(walker) and walker.isRunning():
    walker.wait()
  self.walkers = []
  self.completed = []

 def setPath(self):
  self.stopWalkers()
  self.pathButton.setEnabled(False)
  path = QFileDialog.getExistingDirectory(self,
     "Choose a Path to Index", self.path)
  if not path:
   self.statusLabel.setText("Click the 'Set Path' "
          "button to start indexing")
   self.pathButton.setEnabled(True)
   return
  self.statusLabel.setText("Scanning directories...")
  QApplication.processEvents() # Needed for Windows
  self.path = QDir.toNativeSeparators(path)
  self.findEdit.setFocus()
  self.pathLabel.setText(self.path)
  self.statusLabel.clear()
  self.filesListWidget.clear()
  self.fileCount = 0
  self.filenamesForWords = collections.defaultdict(set)
  self.commonWords = set()
  nofilesfound = True
  files = []
  index = 0
  for root, dirs, fnames in os.walk(str(self.path)):
   for name in [name for name in fnames
       if name.endswith((".htm", ".html"))]:
    files.append(os.path.join(root, name))
    if len(files) == 1000:
     self.processFiles(index, files[:])
     files = []
     index += 1
     nofilesfound = False
  if files:
   self.processFiles(index, files[:])
   nofilesfound = False
  if nofilesfound:
   self.finishedIndexing()
   self.statusLabel.setText(
     "No HTML files found in the given path")

 def processFiles(self, index, files):
  thread = walker.Walker(index, self.lock, files,
    self.filenamesForWords, self.commonWords, self)
  thread.indexed[str,int].connect(self.indexed)
  thread.finished[bool,int].connect(self.finished)
  thread.finished.connect(thread.deleteLater)
  self.walkers.append(thread)
  self.completed.append(False)
  thread.start()
  thread.wait(300) # Needed for Windows

 def find(self):
  word = str(self.findEdit.text())
  if not word:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Enter a word to find in files")
   finally:
    self.mutex.unlock()
   return
  try:
   self.mutex.lock()
   self.statusLabel.clear()
   self.filesListWidget.clear()
  finally:
   self.mutex.unlock()
  word = word.lower()
  if " " in word:
   word = word.split()[0]
  try:
   self.lock.lockForRead()
   found = word in self.commonWords
  finally:
   self.lock.unlock()
  if found:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Common words like '{0}' "
      "are not indexed".format(word))
   finally:
    self.mutex.unlock()
   return
  try:
   self.lock.lockForRead()
   files = self.filenamesForWords.get(word, set()).copy()
  finally:
   self.lock.unlock()
  if not files:
   try:
    self.mutex.lock()
    self.statusLabel.setText("No indexed file contains "
      "the word '{0}'".format(word))
   finally:
    self.mutex.unlock()
   return
  files = [QDir.toNativeSeparators(name) for name in
     sorted(files, key=str.lower)]
  try:
   self.mutex.lock()
   self.filesListWidget.addItems(files)
   self.statusLabel.setText(
     "{0} indexed files contain the word '{1}'".format(
     len(files), word))
  finally:
   self.mutex.unlock()

 def indexed(self, fname, index):
  try:
   self.mutex.lock()
   self.statusLabel.setText(fname)
   self.fileCount += 1
   count = self.fileCount
  finally:
   self.mutex.unlock()
  if count % 25 == 0:
   try:
    self.lock.lockForRead()
    indexedWordCount = len(self.filenamesForWords)
    commonWordCount = len(self.commonWords)
   finally:
    self.lock.unlock()
   try:
    self.mutex.lock()
    self.filesIndexedLCD.display(count)
    self.wordsIndexedLCD.display(indexedWordCount)
    self.commonWordsLCD.display(commonWordCount)
   finally:
    self.mutex.unlock()
  elif count % 101 == 0:
   try:
    self.lock.lockForRead()
    words = self.commonWords.copy()
   finally:
    self.lock.unlock()
   try:
    self.mutex.lock()
    self.commonWordsListWidget.clear()
    self.commonWordsListWidget.addItems(sorted(words))
   finally:
    self.mutex.unlock()

 def finished(self, completed, index):
  done = False
  if self.walkers:
   self.completed[index] = True
   if all(self.completed):
    try:
     self.mutex.lock()
     self.statusLabel.setText("Finished")
     done = True
    finally:
     self.mutex.unlock()
  else:
   try:
    self.mutex.lock()
    self.statusLabel.setText("Finished")
    done = True
   finally:
    self.mutex.unlock()
  if done:
   self.finishedIndexing()

 def reject(self):
  if not all(self.completed):
   self.stopWalkers()
   self.finishedIndexing()
  else:
   self.accept()

 def closeEvent(self, event=None):
  self.stopWalkers()

 def finishedIndexing(self):
  self.filesIndexedLCD.display(self.fileCount)
  self.wordsIndexedLCD.display(len(self.filenamesForWords))
  self.commonWordsLCD.display(len(self.commonWords))
  self.pathButton.setEnabled(True)
  QApplication.processEvents() # Needed for Windows

app = QApplication(sys.argv)
form = Form()
form.show()
app.exec_()

运行结果:

以上就是本文的全部内容,希望对大家的学习有所帮助,也希望大家多多支持我们。

(0)

相关推荐

  • Python3多线程爬虫实例讲解代码

    多线程概述 多线程使得程序内部可以分出多个线程来做多件事情,充分利用CPU空闲时间,提升处理效率.python提供了两个模块来实现多线程thread 和threading ,thread 有一些缺点,在threading 得到了弥补.并且在Python3中废弃了thread模块,保留了更强大的threading模块. 使用场景 在python的原始解释器CPython中存在着GIL(Global Interpreter Lock,全局解释器锁),因此在解释执行python代码时,会产生互斥锁来限

  • python3多线程知识点总结

    多线程类似于同时执行多个不同程序,多线程运行有如下优点: 使用线程可以把占据长时间的程序中的任务放到后台去处理. 用户界面可以更加吸引人,比如用户点击了一个按钮去触发某些事件的处理,可以弹出一个进度条来显示处理的进度. 程序的运行速度可能加快. 在一些等待的任务实现上如用户输入.文件读写和网络收发数据等,线程就比较有用了.在这种情况下我们可以释放一些珍贵的资源如内存占用等等. 每个独立的线程有一个程序运行的入口.顺序执行序列和程序的出口.但是线程不能够独立执行,必须依存在应用程序中,由应用程序提

  • python3+PyQt5 创建多线程网络应用-TCP客户端和TCP服务器实例

    本文在上文的基础上重新实现支持多线程的服务器. 以下为TCP客户端的程序代码: #!/usr/bin/env python3 import sys from PyQt5.QtCore import (QByteArray, QDataStream, QDate, QIODevice, QRegExp, Qt) from PyQt5.QtWidgets import (QApplication, QDateEdit, QFrame, QGridLayout, QHBoxLayout, QLabel

  • Python3多线程基础知识点

    多线程类似于同时执行多个不同程序,多线程运行有如下优点: 使用线程可以把占据长时间的程序中的任务放到后台去处理. 用户界面可以更加吸引人,比如用户点击了一个按钮去触发某些事件的处理,可以弹出一个进度条来显示处理的进度 程序的运行速度可能加快 在一些等待的任务实现上如用户输入.文件读写和网络收发数据等,线程就比较有用了.在这种情况下我们可以释放一些珍贵的资源如内存占用等等. 线程在执行过程中与进程还是有区别的.每个独立的线程有一个程序运行的入口.顺序执行序列和程序的出口.但是线程不能够独立执行,必

  • Python3多线程操作简单示例

    本文实例讲述了Python3多线程操作.分享给大家供大家参考,具体如下: python3 线程中常用的两个模块为: _thread threading(推荐使用) thread 模块已被废弃.用户可以使用 threading 模块代替.所以,在 python3 中不能再使用"thread" 模块.为了兼容性,python3 将 thread 重命名为 "_thread". test.py # -*- coding:utf-8 -*- #!/usr/bin/pytho

  • Python3多线程版TCP端口扫描器

    本文实例为大家分享了Python3多线程版TCP端口扫描器的具体代码,供大家参考,具体内容如下 使用命令 python BannerDemo.py -H 192.168.200.101 -p 22,3306 代码如下 import optparse import socket from socket import * from threading import * screenLock = Semaphore(value=1) def connScan(tgtHost,tgtPort): try

  • Python3中多线程编程的队列运作示例

    Python3,开一个线程,间隔1秒把一个递增的数字写入队列,再开一个线程,从队列中取出数字并打印到终端 #! /usr/bin/env python3 import time import threading import queue # 一个线程,间隔一定的时间,把一个递增的数字写入队列 # 生产者 class Producer(threading.Thread): def __init__(self, work_queue): super().__init__() # 必须调用 self.

  • python3实现多线程聊天室

    使用python3创建多线程聊天室,供大家参考,具体内容如下 import threading import socket #socket udpSocket = None #计数器 num = 1 #1.创建接受,发送方法 def inMessage(): global num while True: #等待接收消息 data = udpSocket.recvfrom(1024) #4. 将接收到的数据再发送给对方 udpSocket.sendto(data[0], data[1]) #打印获

  • python3+PyQt5实现支持多线程的页面索引器应用程序

    本文通过Python3+pyqt5实现了python Qt GUI 快速编程的19章的页面索引器应用程序例子. /home/yrd/eric_workspace/chap19/walker_ans.py #!/usr/bin/env python3 import codecs import html.entities import re import sys from PyQt5.QtCore import (QMutex, QThread,pyqtSignal,Qt) class Walker

  • python3+PyQt5实现自定义窗口部件Counters

    本文通过Python3+PyQt5实现自定义部件–Counters自定 窗口部件.这个窗口是3*3的网格.本文有两个例子如下: /home/yrd/eric_workspace/chap11/counters.py. /home/yrd/eric_workspace/chap11/counters_dnd.py 第二个例子在第一个例子的基础上实现能通过鼠标拖拽球到不同的网格中. /home/yrd/eric_workspace/chap11/counters.py #!/usr/bin/env

  • python3+PyQt5+Qt Designer实现扩展对话框

    本文是对<Python Qt GUI快速编程>的第9章的扩展对话框例子Find and replace用Python3+PyQt5+Qt Designer进行改写. 第一部分无借用Qt Designer,完全用代码实现. 第二部分则借用Qt Designer,快速实现. 第一部分: import sys from PyQt5.QtCore import Qt,pyqtSignal from PyQt5.QtWidgets import (QApplication, QCheckBox, QDi

  • python3+PyQt5+Qt Designer实现堆叠窗口部件

    本文是对<Python Qt GUI快速编程>的第9章的堆叠窗口例子Vehicle Rental用Python3+PyQt5+Qt Designer进行改写. 第一部分无借用Qt Designer,完全用代码实现. 第二部分则借用Qt Designer,快速实现. 第一部分: import sys from PyQt5.QtCore import (Qt) from PyQt5.QtWidgets import (QApplication, QComboBox, QDialog, QDialo

  • python3+PyQt5重新实现QT事件处理程序

    本文是对<Python Qt GUI快速编程>的第10章的例子events用Python3+PyQt5进行改写,涉及到重新实现QWidget的事件处理程序.本例子涉及到上下文菜单,鼠标事件,键盘事件,可作为重新实现事件处理程序的参考. 注:在创建上下文菜单最简单的方式使用Qwidget.addAction()把动作添加到窗口部件中,再把窗口部件的上下文菜单策略设置为Qt.ActionsContextMenu即可,但是如果像本例子一样要根据不同的状态来提供不同的选项,则要重新实现上下文菜单事件处

  • python3+PyQt5重新实现自定义数据拖放处理

    本文分成两部分,第一部分通过python3+PyQt5实现自定义数据的拖放操作.第二部分则对第一部分的程序进行修改,增加拖放操作时,菜单提示是否移动或拷贝,还有可以通过ctrl键盘来设置移动过程中拷贝源而非会将源删除. 自定义数据MIME数据类型QMimeData,MIME是一种用于处理具有多个组成部分的自定义数据的标准化格式.MIME数据由一个数据类型和一个子类型构成–例如,text/plain,text/html,image/png,要处理自定义MIME数据,就必须要选用一种自定义数据类型和

  • python3+PyQt5泛型委托详解

    自定义委托可以让我们对视图中出现的数据项的外观和行为进行完全控制.如果有很多模型,可能会希望不是全部的大多数模型能够仅用一个自定义委托,如果不能这么做,那么对于这些自定义委托,将很有可能存在大量重复代码.为了使得维护工作变得轻松,更好的方法为不要为每个模型创建一个自定义委托,而是用一系列的通用组件来共同构成一个委托.本文通过Python3+pyqt5实现了python Qt GUI 快速编程的16章的泛型委托例子. /home/yrd/eric_workspace/chap16/richtext

  • python3+PyQt5图形项的自定义和交互 python3实现page Designer应用程序

    本文通过Python3+PyQt5实现<python Qt Gui 快速编程>这本书的page Designer应用程序,采用QGraphicsView,QGraphicsScene,QGraphicsItem,这个程序包含有多个文本,图片和框的页面.有些图形类在PyQt5已过时,所以本代码改动幅度比较大.主要的类或方法的改变如下: QMatrix==>QTransform setMatrix==>setTransform rotate ==> setRotation 本例中

  • python3+PyQt5实现使用剪贴板做复制与粘帖示例

    本文是对<Python Qt GUI快速编程>的第10章的例子剪贴板用Python3+PyQt5进行改写,分别对文本,图片和html文本的复制与粘帖,三种做法大同小异. #!/usr/bin/env python3 import os import sys from PyQt5.QtCore import (QMimeData, Qt) from PyQt5.QtWidgets import (QApplication, QDialog, QGridLayout, QLabel, QPushB

随机推荐