Documente Academic
Documente Profesional
Documente Cultură
_
import java.io._
import jcuda._
import jcuda.driver._
//remove if not needed
import scala.collection.JavaConversions._
object JCudaVectorAdd {
def main(args: Array[String]) {
JCudaDriver.setExceptionsEnabled(true)
val ptxFileName = preparePtxFile("JCudaVectorAddKernel.cu")
cuInit(0)
val device = new CUdevice()
cuDeviceGet(device, 0)
val context = new CUcontext()
cuCtxCreate(context, 0, device)
val module = new CUmodule()
cuModuleLoad(module, ptxFileName)
val function = new CUfunction()
cuModuleGetFunction(function, module, "add")
val numElements = 512
val hostInputA = Array.ofDim[Float](numElements)
val hostInputB = Array.ofDim[Float](numElements)
for (i <- 0 until numElements) {
hostInputA(i) = i.toFloat
hostInputB(i) = i.toFloat
}
val deviceInputA = new CUdeviceptr()
cuMemAlloc(deviceInputA, numElements * Sizeof.FLOAT)
cuMemcpyHtoD(deviceInputA, Pointer.to(hostInputA), numElements * Sizeof.FLOA
T)
val deviceInputB = new CUdeviceptr()
cuMemAlloc(deviceInputB, numElements * Sizeof.FLOAT)
cuMemcpyHtoD(deviceInputB, Pointer.to(hostInputB), numElements * Sizeof.FLOA
T)
val deviceOutput = new CUdeviceptr()
cuMemAlloc(deviceOutput, numElements * Sizeof.FLOAT)
val kernelParameters = Pointer.to(Pointer.to(Array(numElements)), Pointer.to
(deviceInputA), Pointer.to(deviceInputB),
Pointer.to(deviceOutput))
val blockSizeX = 256
val gridSizeX = Math.ceil(numElements.toDouble / blockSizeX).toInt
cuLaunchKernel(function, gridSizeX, 1, 1, blockSizeX, 1, 1, 0, null, kernelP
arameters, null)
cuCtxSynchronize()
val hostOutput = Array.ofDim[Float](numElements)