
  • 技术背景
  • 打格点算法实现
  • 打格点算法加速
  • 总结概要






from numba import jit
from numba import cuda
import numpy as np

def grid_by_cpu(crd, rxyz, atoms, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    for i in range(atoms):
        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])
        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])
        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])
    return grids

if __name__=='__main__':
    atoms = 4
    grid_size = 0.1
    crd = np.random.random((atoms,3)).astype(np.float32)
    xmin = min(crd[:,0])
    ymin = min(crd[:,1])
    zmin = min(crd[:,2])
    xmax = max(crd[:,0])
    ymax = max(crd[:,1])
    zmax = max(crd[:,2])
    xgrids = int((xmax-xmin)/grid_size)+1
    ygrids = int((ymax-ymin)/grid_size)+1
    zgrids = int((zmax-zmin)/grid_size)+1
    rxyz = np.array([xmin,ymin,zmin,grid_size], dtype=np.float32)

    grids = np.ones_like(crd)*(-1)
    grids = grids.astype(np.float32)
    grids_cpu = grid_by_cpu(crd, rxyz, atoms, grids)
    print (crd)
    print (grids_cpu)

    import matplotlib.pyplot as plt
    plt.plot(crd[:,0], crd[:,1], 'o', color='red')
    for grid in range(ygrids+1):
        plt.plot([xmin,xmin+grid_size*xgrids], [ymin+grid_size*grid,ymin+grid_size*grid], color='black')
    for grid in range(xgrids+1):
        plt.plot([xmin+grid_size*grid,xmin+grid_size*grid], [ymin,ymin+grid_size*ygrids], color='black')


$ python3
[[4.17021990e-01 7.20324516e-01 1.14374816e-04]
 [3.02332580e-01 1.46755889e-01 9.23385918e-02]
 [1.86260208e-01 3.45560730e-01 3.96767467e-01]
 [5.38816750e-01 4.19194520e-01 6.85219526e-01]]
[[2. 5. 0.]
 [1. 0. 0.]
 [0. 1. 3.]
 [3. 2. 6.]]






from numba import jit
from numba import cuda
import numpy as np

def grid_by_cpu(crd, rxyz, atoms, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    for i in range(atoms):
        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])
        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])
        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])
    return grids

def grid_by_jit(crd, rxyz, atoms, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    for i in range(atoms):
        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])
        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])
        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])
    return grids

def grid_by_gpu(crd, rxyz, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    i,j = cuda.grid(2)
    grids[i][j] = int((crd[i][j]-rxyz[j])/rxyz[3])

if __name__=='__main__':
    atoms = 4
    grid_size = 0.1
    crd = np.random.random((atoms,3)).astype(np.float32)
    xmin = min(crd[:,0])
    ymin = min(crd[:,1])
    zmin = min(crd[:,2])
    xmax = max(crd[:,0])
    ymax = max(crd[:,1])
    zmax = max(crd[:,2])
    xgrids = int((xmax-xmin)/grid_size)+1
    ygrids = int((ymax-ymin)/grid_size)+1
    zgrids = int((zmax-zmin)/grid_size)+1
    rxyz = np.array([xmin,ymin,zmin,grid_size], dtype=np.float32)
    crd_cuda = cuda.to_device(crd)
    rxyz_cuda = cuda.to_device(rxyz)

    grids = np.ones_like(crd)*(-1)
    grids = grids.astype(np.float32)
    grids_cpu = grid_by_cpu(crd, rxyz, atoms, grids)

    grids = np.ones_like(crd)*(-1)
    grids_jit = grid_by_jit(crd, rxyz, atoms, grids)

    grids = np.ones_like(crd)*(-1)
    grids_cuda = cuda.to_device(grids)


    print (crd)
    print (grids_cpu)
    print (grids_jit)
    print (grids_cuda.copy_to_host())


$ python3
/home/dechin/anaconda3/lib/python3.8/site-packages/numba/cuda/ NumbaPerformanceWarning: Grid size (12) < 2 * SM count (72) will likely result in GPU under utilization due to low occupancy.
[[4.17021990e-01 7.20324516e-01 1.14374816e-04]
 [3.02332580e-01 1.46755889e-01 9.23385918e-02]
 [1.86260208e-01 3.45560730e-01 3.96767467e-01]
 [5.38816750e-01 4.19194520e-01 6.85219526e-01]]
[[2. 5. 0.]
 [1. 0. 0.]
 [0. 1. 3.]
 [3. 2. 6.]]
[[2. 5. 0.]
 [1. 0. 0.]
 [0. 1. 3.]
 [3. 2. 6.]]
[[2. 5. 0.]
 [1. 0. 0.]
 [0. 1. 3.]
 [3. 2. 6.]]



from numba import jit
from numba import cuda
import numpy as np

def grid_by_cpu(crd, rxyz, atoms, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    for i in range(atoms):
        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])
        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])
        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])
    return grids

def grid_by_jit(crd, rxyz, atoms, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    for i in range(atoms):
        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])
        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])
        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])
    return grids

def grid_by_gpu(crd, rxyz, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    i,j = cuda.grid(2)
    grids[i][j] = int((crd[i][j]-rxyz[j])/rxyz[3])

if __name__=='__main__':
    import time
    from tqdm import trange

    atoms = 100000
    grid_size = 0.1
    crd = np.random.random((atoms,3)).astype(np.float32)
    xmin = min(crd[:,0])
    ymin = min(crd[:,1])
    zmin = min(crd[:,2])
    xmax = max(crd[:,0])
    ymax = max(crd[:,1])
    zmax = max(crd[:,2])
    xgrids = int((xmax-xmin)/grid_size)+1
    ygrids = int((ymax-ymin)/grid_size)+1
    zgrids = int((zmax-zmin)/grid_size)+1
    rxyz = np.array([xmin,ymin,zmin,grid_size], dtype=np.float32)
    crd_cuda = cuda.to_device(crd)
    rxyz_cuda = cuda.to_device(rxyz)

    cpu_time = 0
    jit_time = 0
    gpu_time = 0

    for i in trange(100):
        grids = np.ones_like(crd)*(-1)
        grids = grids.astype(np.float32)
        time0 = time.time()
        grids_cpu = grid_by_cpu(crd, rxyz, atoms, grids)
        time1 = time.time()

        grids = np.ones_like(crd)*(-1)
        time2 = time.time()
        grids_jit = grid_by_jit(crd, rxyz, atoms, grids)
        time3 = time.time()

        grids = np.ones_like(crd)*(-1)
        grids_cuda = cuda.to_device(grids)
        time4 = time.time()
        time5 = time.time()

        if i != 0:
            cpu_time += time1 - time0
            jit_time += time3 - time2
            gpu_time += time5 - time4

    print ('The time cost of CPU calculation is: {}s'.format(cpu_time))
    print ('The time cost of JIT calculation is: {}s'.format(jit_time))
    print ('The time cost of GPU calculation is: {}s'.format(gpu_time))


$ python3
100%|███████████████████████████| 100/100 [00:23<00:00,  4.18it/s]
The time cost of CPU calculation is: 23.01943016052246s
The time cost of JIT calculation is: 0.04810166358947754s
The time cost of GPU calculation is: 0.01806473731994629s



from numba import jit
from numba import cuda
import numpy as np

def grid_by_cpu(crd, rxyz, atoms, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    for i in range(atoms):
        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])
        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])
        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])
    return grids

def grid_by_jit(crd, rxyz, atoms, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    for i in range(atoms):
        grids[i][0] = int((crd[i][0]-rxyz[0])/rxyz[3])
        grids[i][1] = int((crd[i][1]-rxyz[1])/rxyz[3])
        grids[i][2] = int((crd[i][2]-rxyz[2])/rxyz[3])
    return grids

def grid_by_gpu(crd, rxyz, grids):
    """Transform coordinates [x,y,z] into grids [nx,ny,nz].
        crd(list): The 3-D coordinates of atoms.
        rxyz(list): The list includes xmin,ymin,zmin,grid_num.
        atoms(int): The total number of atoms.
        grids(list): The transformed grids matrix.
    i,j = cuda.grid(2)
    grids[i][j] = int((crd[i][j]-rxyz[j])/rxyz[3])

if __name__=='__main__':
    import time
    from tqdm import trange

    atoms = 5000000
    grid_size = 0.1
    crd = np.random.random((atoms,3)).astype(np.float32)
    xmin = min(crd[:,0])
    ymin = min(crd[:,1])
    zmin = min(crd[:,2])
    xmax = max(crd[:,0])
    ymax = max(crd[:,1])
    zmax = max(crd[:,2])
    xgrids = int((xmax-xmin)/grid_size)+1
    ygrids = int((ymax-ymin)/grid_size)+1
    zgrids = int((zmax-zmin)/grid_size)+1
    rxyz = np.array([xmin,ymin,zmin,grid_size], dtype=np.float32)
    crd_cuda = cuda.to_device(crd)
    rxyz_cuda = cuda.to_device(rxyz)

    jit_time = 0
    gpu_time = 0

    for i in trange(100):
        grids = np.ones_like(crd)*(-1)
        time2 = time.time()
        grids_jit = grid_by_jit(crd, rxyz, atoms, grids)
        time3 = time.time()

        grids = np.ones_like(crd)*(-1)
        grids_cuda = cuda.to_device(grids)
        time4 = time.time()
        time5 = time.time()

        if i != 0:
            jit_time += time3 - time2
            gpu_time += time5 - time4

    print ('The time cost of JIT calculation is: {}s'.format(jit_time))
    print ('The time cost of GPU calculation is: {}s'.format(gpu_time))


$ python3
100%|███████████████████████████| 100/100 [00:09<00:00, 10.15it/s]
The time cost of JIT calculation is: 2.3743042945861816s
The time cost of GPU calculation is: 0.022843599319458008s







  • Python3.0 实现决策树算法的流程

    决策树的一般流程 检测数据集中的每个子项是否属于同一个分类 if so return 类标签 Else 寻找划分数据集的最好特征 划分数据集 创建分支 节点 from math import log import operator #生成样本数据集 def createDataSet(): dataSet = [[1,1,'yes'], [1,1,'yes'], [1,0,'no'], [0,1,'no'], [0,1,'no']] labels = ['no surfacing','flipp

  • Python3 A*寻路算法实现方式

    我就废话不多说了,直接上代码吧! # -*- coding: utf-8 -*- import math import random import copy import time import sys import tkinter import threading # 地图 tm = [ '############################################################', '#S............................#........

  • Python3爬楼梯算法示例

    本文实例讲述了Python3爬楼梯算法.分享给大家供大家参考,具体如下: 假设你正在爬楼梯.需要 n 步你才能到达楼顶. 每次你可以爬 1 或 2 个台阶.你有多少种不同的方法可以爬到楼顶呢? 注意:给定 n 是一个正整数. 方案一:每一步都是前两步和前一步的和 class Solution(object): def climbStairs(self, n): """ :type n: int :rtype: int """ pre, cur =

  • Python3实现的判断环形链表算法示例

    本文实例讲述了Python3实现的判断环形链表算法.分享给大家供大家参考,具体如下: 给定一个链表,判断链表中是否有环. 方案一:快慢指针遍历,若出现相等的情况,说明有环 # Definition for singly-linked list. # class ListNode(object): # def __init__(self, x): # self.val = x # = None class Solution(object): def hasCycle(self,

  • Python3实现打格点算法的GPU加速实例详解

    目录 技术背景 打格点算法实现 打格点算法加速 总结概要 技术背景 在数学和物理学领域,总是充满了各种连续的函数模型.而当我们用现代计算机的技术去处理这些问题的时候,事实上是无法直接处理连续模型的,绝大多数的情况下都要转化成一个离散的模型再进行数值的计算.比如计算数值的积分,计算数值的二阶导数(海森矩阵)等等.这里我们所介绍的打格点的算法,正是一种典型的离散化方法.这个对空间做离散化的方法,可以在很大程度上简化运算量.比如在分子动力学模拟中,计算近邻表的时候,如果不采用打格点的方法,那么就要针对

  • Go Java算法之简化路径实例详解

    目录 简化路径 方法一:栈(Java) 方法二:标准库(Go) 简化路径 给你一个字符串 path ,表示指向某一文件或目录的 Unix 风格 绝对路径 (以 '/' 开头),请你将其转化为更加简洁的规范路径. 在 Unix 风格的文件系统中,一个点(.)表示当前目录本身:此外,两个点 (..) 表示将目录切换到上一级(指向父目录):两者都可以是复杂相对路径的组成部分. 任意多个连续的斜杠(即,'//')都被视为单个斜杠 '/' . 对于此问题,任何其他格式的点(例如,'...')均被视为文件/

  • Python3.5 Pandas模块缺失值处理和层次索引实例详解

    本文实例讲述了Python3.5 Pandas模块缺失值处理和层次索引.分享给大家供大家参考,具体如下: 1.pandas缺失值处理 import numpy as np import pandas as pd from pandas import Series,DataFrame df3 = DataFrame([ ["Tom",np.nan,456.67,"M"], ["Merry",34,345.56,np.nan], [np.nan,np

  • Python3中的列表生成式、生成器与迭代器实例详解

    本文实例讲述了Python3中的列表生成式.生成器与迭代器.分享给大家供大家参考,具体如下: 列表生成式 Python内置的一种极其强大的生成列表 list 的表达式.返回结果必须是列表. 基本语法: [ 变量表达式 for 变量 in 表达式 ] 示例 a = [x ** 2 for x in range(1, 10)] b = [x * x for x in range(1, 11) if x % 2 == 0] c = [m + n for m in 'ABC' for n in '123

  • Python3.5基础之函数的定义与使用实例详解【参数、作用域、递归、重载等】

    本文实例讲述了Python3.5函数的定义与使用.分享给大家供大家参考,具体如下: 1.函数学习框架 2.函数的定义与格式 (1)定义 (2)函数调用 注:函数名称不能以数字开头,建议函数名称的开头用小写的字母 (3)函数有四种格式,分别是:无参数无返回值,有参数无返回值.无参数有返回值.有参数有返回值 #!/usr/bin/env python # -*- coding:utf-8 -*- # Author:ZhengzhengLiu # 无参数无返回值 def hello(): # 函数体/

  • Java 蒙特卡洛算法求圆周率近似值实例详解

    起源 [1946: John von Neumann, Stan Ulam, and Nick Metropolis, all at the Los Alamos Scientific Laboratory, cook up the Metropolis algorithm, also known as the Monte Carlo method.]1946年,美国拉斯阿莫斯国家实验室的三位科学家John von Neumann,Stan Ulam 和 Nick Metropolis共同发明,

  • Python机器学习k-近邻算法(K Nearest Neighbor)实例详解

    本文实例讲述了Python机器学习k-近邻算法.分享给大家供大家参考,具体如下: 工作原理 存在一份训练样本集,并且每个样本都有属于自己的标签,即我们知道每个样本集中所属于的类别.输入没有标签的新数据后,将新数据的每个特征与样本集中数据对应的特征进行比较,然后提取样本集中与之最相近的k个样本.观察并统计这k个样本的标签,选择数量最大的标签作为这个新数据的标签. 用以下这幅图可以很好的解释kNN算法: 不同形状的点,为不同标签的点.其中绿色点为未知标签的数据点.现在要对绿色点进行预测.由图不难得出

  • python目标检测SSD算法预测部分源码详解

    目录 学习前言 什么是SSD算法 ssd_vgg_300主体的源码 学习前言 ……学习了很多有关目标检测的概念呀,咕噜咕噜,可是要怎么才能进行预测呢,我看了好久的SSD源码,将其中的预测部分提取了出来,训练部分我还没看懂 什么是SSD算法 SSD是一种非常优秀的one-stage方法,one-stage算法就是目标检测和分类是同时完成的,其主要思路是均匀地在图片的不同位置进行密集抽样,抽样时可以采用不同尺度和长宽比,然后利用CNN提取特征后直接进行分类与回归,整个过程只需要一步,所以其优势是速度

  • python目标检测SSD算法训练部分源码详解

    目录 学习前言 讲解构架 模型训练的流程 1.设置参数 2.读取数据集 3.建立ssd网络. 4.预处理数据集 5.框的编码 6.计算loss值 7.训练模型并保存 开始训练 学习前言 ……又看了很久的SSD算法,今天讲解一下训练部分的代码.预测部分的代码可以参照 讲解构架 本次教程的讲解主要是对训练部分的代码进行讲解,该部分讲解主要是对训练函数的执行过程与执行思路进行详

  • Go Java算法之单词搜索示例详解

    目录 单词搜索 算法:DFS回溯(Java) 算法:DFS回溯(Go) 单词搜索 给定一个 m x n 二维字符网格 board 和一个字符串单词 word .如果 word 存在于网格中,返回 true :否则,返回 false . 单词必须按照字母顺序,通过相邻的单元格内的字母构成,其中“相邻”单元格是那些水平相邻或垂直相邻的单元格.同一个单元格内的字母不允许被重复使用. 示例 1: 输入:board = [["A","B","C",&quo
