<?xml version="1.0" encoding="UTF-8"?>
 <rdf:RDF xmlns="http://purl.org/rss/1.0/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://web.resource.org/cc/" xmlns:syn="http://purl.org/rss/1.0/modules/syndication/" xmlns:admin="http://webns.net/mvcb/">
  <channel rdf:about="http://pinboard.in">
    <title>Pinboard (Vaguery)</title>
    <link>https://pinboard.in/u:Vaguery/public/</link>
    <description>recent bookmarks from Vaguery</description>
    <items>
      <rdf:Seq>	<rdf:li rdf:resource="https://magicsquare6.net/doku.php?id=magicsquare6"/>
	<rdf:li rdf:resource="https://developer.apple.com/documentation/accelerate"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1708.03157"/>
	<rdf:li rdf:resource="https://arxiv.org/abs/1702.02939"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1410.4876"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1506.07933"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1303.3692"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1502.02389"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1501.04706"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1503.00576"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1304.2017"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/1302.7193"/>
	<rdf:li rdf:resource="http://developer.nvidia.com/cuda-downloads"/>
	<rdf:li rdf:resource="http://arxiv.org/abs/0906.0231"/>
	<rdf:li rdf:resource="http://games.venturebeat.com/2010/03/26/nvidia-gets-back-in-the-game-with-long-awaited-fermi-graphics-chip/?utm_source=feedburner&amp;utm_medium=feed&amp;utm_campaign=Feed%3A+Venturebeat+(VentureBeat)"/>
	<rdf:li rdf:resource="http://www.gpgpgpu.com/"/>
	<rdf:li rdf:resource="http://ruby-opengl.rubyforge.org/index.html"/>
	<rdf:li rdf:resource="http://www.khronos.org/news/press/releases/the_khronos_group_releases_opencl_1.0_specification/"/>
	<rdf:li rdf:resource="http://mathema.tician.de/software/pycuda"/>
	<rdf:li rdf:resource="http://www.txcorp.com/technologies/GPULib/index.php"/>
	<rdf:li rdf:resource="http://graphics.stanford.edu/projects/brookgpu/"/>
	<rdf:li rdf:resource="http://ati.amd.com/technology/streamcomputing/sdkdwnld.html"/>
	<rdf:li rdf:resource="http://www.cs.lth.se/home/Calle_Lejdfors/pygpu/"/>
	<rdf:li rdf:resource="http://www.cs.mun.ca/~banzhaf/contributions.html#Fast%20Genetic%20Programming%20on%20GPUs"/>
      </rdf:Seq>
    </items>
  </channel><item rdf:about="https://magicsquare6.net/doku.php?id=magicsquare6">
    <title>magicsquare6 [The number of magic squares of order 6]</title>
    <dc:date>2024-09-21T13:50:22+00:00</dc:date>
    <link>https://magicsquare6.net/doku.php?id=magicsquare6</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The number of magic squares of order six counted up to rotations and reflections

]]></description>
<dc:subject>enumeration cloud-computing looking-to-see algorithms rather-interesting hardware-faults GPU to-write-about consider:classification</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:20653753d39c/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:enumeration"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cloud-computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:looking-to-see"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:hardware-faults"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:classification"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://developer.apple.com/documentation/accelerate">
    <title>Accelerate | Apple Developer Documentation</title>
    <dc:date>2020-02-19T11:35:34+00:00</dc:date>
    <link>https://developer.apple.com/documentation/accelerate</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Make large-scale mathematical computations and image calculations, optimized for high performance and low-energy consumption.
]]></description>
<dc:subject>swift library to-understand image-processing GPU programming</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b4c7aa41de93/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:swift"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:library"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-understand"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1708.03157">
    <title>[1708.03157] TensorFlow Enabled Genetic Programming</title>
    <dc:date>2017-09-23T12:07:24+00:00</dc:date>
    <link>https://arxiv.org/abs/1708.03157</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Genetic Programming, a kind of evolutionary computation and machine learning algorithm, is shown to benefit significantly from the application of vectorized data and the TensorFlow numerical computation library on both CPU and GPU architectures. The open source, Python Karoo GP is employed for a series of 190 tests across 6 platforms, with real-world datasets ranging from 18 to 5.5M data points. This body of tests demonstrates that datasets measured in tens and hundreds of data points see 2-15x improvement when moving from the scalar/SymPy configuration to the vector/TensorFlow configuration, with a single core performing on par or better than multiple CPU cores and GPUs. A dataset composed of 90,000 data points demonstrates a single vector/TensorFlow CPU core performing 875x better than 40 scalar/Sympy CPU cores. And a dataset containing 5.5M data points sees GPU configurations out-performing CPU configurations on average by 1.3x.
]]></description>
<dc:subject>hey-I-know-this-guy genetic-programming symbolic-regression library GPU to-write-about</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:04638089cf01/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:hey-I-know-this-guy"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:symbolic-regression"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:library"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-write-about"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="https://arxiv.org/abs/1702.02939">
    <title>[1702.02939] cellGPU: massively parallel simulations of dynamic vertex models</title>
    <dc:date>2017-02-16T12:09:59+00:00</dc:date>
    <link>https://arxiv.org/abs/1702.02939</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Vertex models represent confluent tissue by polygonal or polyhedral tilings of space, with the individual cell interacting via force laws that depend on both the geometry of the cells and the topology of the tessellation. This dependence on the connectivity of the cellular network introduces several complications to performing molecular-dynamics-like simulations of vertex models, and in particular makes parallelizing the simulations difficult. cellGPU addresses this difficulty and lays the foundation for massively parallelized, GPU-based simulations of these models. This article discusses its implementation for a pair of two-dimensional models, and compares the typical performance that can be expected between running cellGPU entirely on the CPU versus its performance when running on a range of commercial and server-grade graphics cards. By implementing the calculation of topological changes and forces on cells in a highly parallelizable fashion, cellGPU enables researchers to simulate time- and length-scales previously inaccessible via existing single-threaded CPU implementations.
]]></description>
<dc:subject>tiling computational-geometry GPU algorithms rather-interesting horse-races computational-complexity</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:36ada8e6d7a9/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:tiling"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computational-geometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:horse-races"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computational-complexity"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1410.4876">
    <title>[1410.4876] A GPU-based parallel algorithm for enumerating all chordless cycles in graphs</title>
    <dc:date>2015-12-10T12:34:30+00:00</dc:date>
    <link>http://arxiv.org/abs/1410.4876</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In a finite undirected simple graph, a chordless cycle is an induced subgraph which is a cycle. We propose a GPU parallel algorithm for enumerating all chordless cycles of such a graph. The algorithm, implemented in OpenCL, is based on a previous sequential algorithm developed by the current authors for the same problem. It uses a more compact data structure for solution representation which is suitable for the memory-size limitation of a GPU. Moreover, for graphs with a sufficiently large amount of chordless cycles, the algorithm presents a significant improvement in execution time that outperforms the sequential method.
]]></description>
<dc:subject>graph-theory GPU algorithms feature-extraction nudge-targets rather-interesting combinatorics</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:6d06291b7941/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:graph-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feature-extraction"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:combinatorics"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1506.07933">
    <title>[1506.07933] AccFFT: A library for distributed-memory FFT on CPU and GPU architectures</title>
    <dc:date>2015-12-06T11:28:47+00:00</dc:date>
    <link>http://arxiv.org/abs/1506.07933</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We present a new library for parallel distributed Fast Fourier Transforms (FFT). Despite the large amount of work on FFTs, we show that significant speedups can be achieved for distributed transforms. The importance of FFT in science and engineering and the advances in high performance computing necessitate further improvements. AccFFT extends existing FFT libraries for x86 architectures (CPUs) and CUDA-enabled Graphics Processing Units (GPUs) to distributed memory clusters using the Message Passing Interface (MPI). Our library uses specifically optimized all-to-all communication algorithms, to efficiently perform the communication phase of the distributed FFT algorithm. The GPU based algorithm, effectively hides the overhead of PCIe transfers. We present numerical results on the Maverick and Stampede platforms at the Texas Advanced Computing Center (TACC) and on the Titan system at the Oak Ridge National Laboratory (ORNL). We compare the CPU version of AccFFT with P3DFFT and PFFT libraries and we show a consistent 2−3× speedup across a range of processor counts and problem sizes. The comparison of the GPU code with FFTE library shows a similar trend with a 2× speedup. The library is tested up to 131K cores and 4,096 GPUs of Titan, and up to 16K cores of Stampede.
]]></description>
<dc:subject>algorithms signal-processing GPU parallel nudge-targets consider:rediscovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:b3d38bcdc07a/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:signal-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:parallel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:rediscovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1303.3692">
    <title>[1303.3692] Ultra-fast Multiple Genome Sequence Matching Using GPU</title>
    <dc:date>2015-09-06T14:34:02+00:00</dc:date>
    <link>http://arxiv.org/abs/1303.3692</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[In this paper, a contrastive evaluation of massively parallel implementations of suffix tree and suffix array to accelerate genome sequence matching are proposed based on Intel Core i7 3770K quad-core and NVIDIA GeForce GTX680 GPU. Besides suffix array only held approximately 20%~30% of the space relative to suffix tree, the coalesced binary search and tile optimization make suffix array clearly outperform suffix tree using GPU. Consequently, the experimental results show that multiple genome sequence matching based on suffix array is more than 99 times speedup than that of CPU serial implementation. There is no doubt that massively parallel matching algorithm based on suffix array is an efficient approach to high-performance bioinformatics applications.
]]></description>
<dc:subject>GPU bioinformatics algorithms to-learn</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:467cff5c87f7/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:bioinformatics"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:to-learn"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1502.02389">
    <title>[1502.02389] Patterns and Rewrite Rules for Systematic Code Generation (From High-Level Functional Patterns to High-Performance OpenCL Code)</title>
    <dc:date>2015-09-06T12:09:37+00:00</dc:date>
    <link>http://arxiv.org/abs/1502.02389</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Computing systems have become increasingly complex with the emergence of heterogeneous hardware combining multicore CPUs and GPUs. These parallel systems exhibit tremendous computational power at the cost of increased programming effort. This results in a tension between achieving performance and code portability. Code is either tuned using device-specific optimizations to achieve maximum performance or is written in a high-level language to achieve portability at the expense of performance. 
We propose a novel approach that offers high-level programming, code portability and high-performance. It is based on algorithmic pattern composition coupled with a powerful, yet simple, set of rewrite rules. This enables systematic transformation and optimization of a high-level program into a low-level hardware specific representation which leads to high performance code. 
We test our design in practice by describing a subset of the OpenCL programming model with low-level patterns and by implementing a compiler which generates high performance OpenCL code. Our experiments show that we can systematically derive high-performance device-specific implementations from simple high-level algorithmic expressions. The performance of the generated OpenCL code is on par with highly tuned implementations for multicore CPUs and GPUs written by experts
]]></description>
<dc:subject>computer-science compilers nudge nudge-targets GPU rewriting-systems rather-interesting feasible</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:49d5376f4873/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computer-science"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:compilers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rewriting-systems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:rather-interesting"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:feasible"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1501.04706">
    <title>[1501.04706] A Novel Implementation of QuickHull Algorithm on the GPU</title>
    <dc:date>2015-04-10T12:20:45+00:00</dc:date>
    <link>http://arxiv.org/abs/1501.04706</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[We present a novel GPU-accelerated implementation of the QuickHull algorihtm for calculating convex hulls of planar point sets. We also describe a practical solution to demonstrate how to efficiently implement a typical Divide-and-Conquer algorithm on the GPU. We highly utilize the parallel primitives provided by the library Thrust such as the parallel segmented scan for better efficiency and simplicity. To evaluate the performance of our implementation, we carry out four groups of experimental tests using two groups of point sets in two modes on the GPU K20c. Experimental results indicate that: our implementation can achieve the speedups of up to 10.98x over the state-of-art CPU-based convex hull implementation Qhull [16]. In addition, our implementation can find the convex hull of 20M points in about 0.2 seconds.
]]></description>
<dc:subject>algorithms GPU computational-geometry nudge-targets consider:rediscovery</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:52b2b9402e11/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computational-geometry"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:rediscovery"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1503.00576">
    <title>[1503.00576] Counting Triangles in Large Graphs on GPU</title>
    <dc:date>2015-03-10T11:08:57+00:00</dc:date>
    <link>http://arxiv.org/abs/1503.00576</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[The clustering coefficient and the transitivity ratio are concepts often used in network analysis, which creates a need for fast practical algorithms for counting triangles in large graphs. Previous research in this area focused on sequential algorithms, MapReduce parallelization, and fast approximations. 
In this paper we propose a parallel triangle counting algorithm for CUDA GPU. We describe the implementation details necessary to achieve high performance and present the experimental evaluation of our approach. Our algorithm achieves 8 to 15 times speedup over the CPU implementation and is capable of finding 3.8 billion triangles in an 89 million edges graph in less than 10 seconds on the Nvidia Tesla C2050 GPU.
]]></description>
<dc:subject>graph-theory algorithms GPU parallel nudge-targets representation consider:adopting-representations</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:1ed1b2e575e6/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:graph-theory"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:parallel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:consider:adopting-representations"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1304.2017">
    <title>[1304.2017] Accelerating Image Reconstruction in Three-Dimensional Optoacoustic Tomography on Graphics Processing Units</title>
    <dc:date>2013-05-21T22:52:39+00:00</dc:date>
    <link>http://arxiv.org/abs/1304.2017</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Purpose: Optoacoustic tomography (OAT) is inherently a three-dimensional (3D) inverse problem. However, most studies of OAT image reconstruction still employ two-dimensional (2D) imaging models. One important reason is because 3D image reconstruction is computationally burdensome. The aim of this work is to accelerate existing image reconstruction algorithms for 3D OAT by use of parallel programming techniques. 
Methods: Parallelization strategies are proposed to accelerate a filtered backprojection (FBP) algorithm and two different pairs of projection/backprojection operations that correspond to two different numerical imaging models. The algorithms are designed to fully exploit the parallel computing power of graphic processing units (GPUs). In order to evaluate the parallelization strategies for the projection/backprojection pairs, an iterative image reconstruction algorithm is implemented. Computer-simulation and experimental studies are conducted to investigate the computational efficiency and numerical accuracy of the developed algorithms. 
Results: The GPU implementations improve the computational efficiency by factors of 1, 000, 125, and 250 for the FBP algorithm and the two pairs of projection/backprojection operators, respectively. Accurate images are reconstructed by use of the FBP and iterative image reconstruction algorithms from both computer-simulated and experimental data. 
Conclusions: Parallelization strategies for 3D OAT image reconstruction are proposed for the first time. These GPU-based implementations significantly reduce the computational time for 3D image reconstruction, complementing our earlier work on 3D OAT iterative image reconstruction.
]]></description>
<dc:subject>image-processing inverse-problems parallel distributed-processing GPU nudge-targets algorithms</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3d172d066daa/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:image-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:inverse-problems"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:parallel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:distributed-processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/1302.7193">
    <title>[1302.7193] Matrix-free GPU implementation of a preconditioned conjugate gradient solver for anisotropic elliptic PDEs</title>
    <dc:date>2013-03-07T00:36:39+00:00</dc:date>
    <link>http://arxiv.org/abs/1302.7193</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[Many problems in geophysical and atmospheric modelling require the fast solution of elliptic partial differential equations (PDEs) in "flat" three dimensional geometries. In particular, an anisotropic elliptic PDE for the pressure correction has to be solved at every time step in the dynamical core of many numerical weather prediction models, and equations of a very similar structure arise in global ocean models, subsurface flow simulations and gas and oil reservoir modelling. The elliptic solve is often the bottleneck of the forecast, and an algorithmically optimal method has to be used and implemented efficiently. Graphics Processing Units have been shown to be highly efficient for a wide range of applications in scientific computing, and recently iterative solvers have been parallelised on these architectures. We describe the GPU implementation and optimisation of a Preconditioned Conjugate Gradient (PCG) algorithm for the solution of a three dimensional anisotropic elliptic PDE for the pressure correction in NWP. Our implementation exploits the strong vertical anisotropy of the elliptic operator in the construction of a suitable preconditioner. As the algorithm is memory bound, performance can be improved significantly by reducing the amount of global memory access. We achieve this by using a matrix-free implementation which does not require explicit storage of the matrix and instead recalculates the local stencil. Global memory access can also be reduced by rewriting the algorithm using loop fusion and we show that this further reduces the runtime on the GPU. We demonstrate the performance of our matrix-free GPU code by comparing it to a sequential CPU implementation and to a matrix-explicit GPU code which uses existing libraries. The absolute performance of the algorithm for different problem sizes is quantified in terms of floating point throughput and global memory bandwidth.]]></description>
<dc:subject>representation GPU algorithms matrices nudge-targets</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:7b9cb681474d/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:representation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:matrices"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge-targets"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://developer.nvidia.com/cuda-downloads">
    <title>CUDA Downloads | NVIDIA Developer Zone</title>
    <dc:date>2012-06-24T14:17:34+00:00</dc:date>
    <link>http://developer.nvidia.com/cuda-downloads</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA[This release of the CUDA Toolkit  enables development using GPUs using the Kepler architecture, such as the GeForce GTX680. Feature and functionality builds on the foundation of the CUDA 4.1 release which introduced:

A new  LLVM-based CUDA compiler
1000+ new image processing functions
Redesigned Visual Profiler with automated performance analysis and integrated expert guidance]]></description>
<dc:subject>CUDA GPU programming library MacOS</dc:subject>
<dc:source>https://pinboard.in/</dc:source>
<dc:identifier>https://pinboard.in/u:Vaguery/b:3019d2256215/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:CUDA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:library"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:MacOS"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://arxiv.org/abs/0906.0231">
    <title>[0906.0231] Solving $k$-Nearest Neighbor Problem on Multiple Graphics Processors</title>
    <dc:date>2010-07-26T13:15:10+00:00</dc:date>
    <link>http://arxiv.org/abs/0906.0231</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["We introduced an effective algorithm for k-nearest neighbor problem which works on multiple GPUs. By an experiment, we have shown that it runs more than 330 times faster than an implementation on a single core of an up-to-date CPU. We have also shown that the algorithm is effective from the viewpoint of parallelism of GPUs. That is because 1) there is no synchronization between GPUs until the very end of the process and 2) the workload is well balanced."
]]></description>
<dc:subject>algorithms numerical-methods GPU CUDA machine-learning nudge</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:534b0f61ce60/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:algorithms"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:numerical-methods"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:CUDA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://games.venturebeat.com/2010/03/26/nvidia-gets-back-in-the-game-with-long-awaited-fermi-graphics-chip/?utm_source=feedburner&amp;utm_medium=feed&amp;utm_campaign=Feed%3A+Venturebeat+(VentureBeat)">
    <title>Nvidia gets back in the game with long-awaited Fermi graphics chip | VentureBeat</title>
    <dc:date>2010-04-01T12:00:46+00:00</dc:date>
    <link>http://games.venturebeat.com/2010/03/26/nvidia-gets-back-in-the-game-with-long-awaited-fermi-graphics-chip/?utm_source=feedburner&amp;utm_medium=feed&amp;utm_campaign=Feed%3A+Venturebeat+(VentureBeat)</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["Huang at Nvidia has said many times that CUDA is one of the most important advances in computing, and its promise will be evident as programmers learn how to make use of it. Nvidia is already getting lots of its chips designed into supercomputers and servers, thanks to CUDA. And because of CUDA, the 480 chip can do physics processing 2.5 times faster than the previous generation. That means that the environment in a game, such as water in a stream, behaves far more realistically, adding to the overall illusion of a graphics animation. The real question is whether CUDA is really helping or hurting Nvidia’s cause to bring better graphics to the entire world."
]]></description>
<dc:subject>Nvidia CUDA graphics-processing-unit GPU OpenCL</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:fcebb12d0306/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Nvidia"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:CUDA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:graphics-processing-unit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OpenCL"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.gpgpgpu.com/">
    <title>Genetic Programming on General Purpose Graphics Processing Units : gpgpgpu.com</title>
    <dc:date>2009-12-26T13:56:42+00:00</dc:date>
    <link>http://www.gpgpgpu.com/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["The use of Graphics Processing Units (GPUs) in scientific computing is becoming increasingly common. GPUs are low cost parallel processors that can readily be exploited for many types of general purpose computation. Recently, the computational intelligence community has started to develop for the GPU platform. This web page is primarily dedicated to the use of GPUs as a platform for Genetic Programming. "
]]></description>
<dc:subject>genetic-programming GPU grid-computing hardware papers GPGPU</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:1101e0aafdcb/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:grid-computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:hardware"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:papers"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPGPU"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://ruby-opengl.rubyforge.org/index.html">
    <title>ruby-opengl -- Home</title>
    <dc:date>2009-03-03T13:24:54+00:00</dc:date>
    <link>http://ruby-opengl.rubyforge.org/index.html</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["ruby-opengl consists of Ruby extension modules that are bindings for the OpenGL, GLU, and GLUT libraries. It is intended to be a replacement for -- and uses the code from -- Yoshi's ruby-opengl."
]]></description>
<dc:subject>Ruby OpenGL GPU Nudge programming library free API</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:f59a1374bc03/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Ruby"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OpenGL"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Nudge"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:library"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:free"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:API"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.khronos.org/news/press/releases/the_khronos_group_releases_opencl_1.0_specification/">
    <title>Khronos Press Releases - The Khronos Group Releases OpenCL 1.0 Specification</title>
    <dc:date>2008-12-12T12:18:27+00:00</dc:date>
    <link>http://www.khronos.org/news/press/releases/the_khronos_group_releases_opencl_1.0_specification/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["“The opportunity to effectively unlock the capabilities of new generations of programmable compute and graphics processors drove the unprecedented level of cooperation to refine the initial proposal from Apple into the ratified OpenCL 1.0 specification,” said Neil Trevett, chair of the OpenCL working group, president of the Khronos Group and vice president at NVIDIA. “As an open, cross-platform standard, OpenCL is a fundamental technology for next generation software development that will play a central role in the Khronos API ecosystem and we look forward to seeing implementations within the next year.”
]]></description>
<dc:subject>OpenCL via:logista GPU specification programming Snow-Leopard computation Nudge</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:36cd5f7560d2/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:OpenCL"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:via:logista"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:specification"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Snow-Leopard"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computation"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Nudge"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://mathema.tician.de/software/pycuda">
    <title>PyCuda | Andreas Klöckner's web page</title>
    <dc:date>2008-06-27T12:02:03+00:00</dc:date>
    <link>http://mathema.tician.de/software/pycuda</link>
    <dc:creator>Vaguery</dc:creator><dc:subject>GPU computing programming processing nudge CUDA Python</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:9a375c8c0286/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:CUDA"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Python"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.txcorp.com/technologies/GPULib/index.php">
    <title>GPULib : Technologies : Tech-X Corporation</title>
    <dc:date>2008-06-27T12:01:50+00:00</dc:date>
    <link>http://www.txcorp.com/technologies/GPULib/index.php</link>
    <dc:creator>Vaguery</dc:creator><dc:subject>GPU computing programming processing nudge</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:51f149c62808/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://graphics.stanford.edu/projects/brookgpu/">
    <title>BrooksGPU</title>
    <dc:date>2008-06-27T12:01:34+00:00</dc:date>
    <link>http://graphics.stanford.edu/projects/brookgpu/</link>
    <dc:creator>Vaguery</dc:creator><dc:subject>GPU computing programming processing nudge</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:498233fd0663/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://ati.amd.com/technology/streamcomputing/sdkdwnld.html">
    <title>AMD Stream SDK</title>
    <dc:date>2008-06-27T12:01:16+00:00</dc:date>
    <link>http://ati.amd.com/technology/streamcomputing/sdkdwnld.html</link>
    <dc:creator>Vaguery</dc:creator><dc:subject>GPU computing programming processing nudge</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:e3b53b314112/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:processing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:nudge"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.lth.se/home/Calle_Lejdfors/pygpu/">
    <title>PyGPU - Python for the GPU</title>
    <dc:date>2007-07-21T14:36:37+00:00</dc:date>
    <link>http://www.cs.lth.se/home/Calle_Lejdfors/pygpu/</link>
    <dc:creator>Vaguery</dc:creator><description><![CDATA["PyGPU is an embedded language in Python, that allow most of Python features (list-comprehensions, higher-order functions, iterators) to be used for constructing GPU algorithms. It uses a image abstraction to abstract away implementation details of the GPU, while still allowing translation to very efficient GPU native-code."
]]></description>
<dc:subject>GPU graphics-processing-unit python clustering parallel computing programming Moore's-Law Nudge</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:d0d5e457ba25/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:graphics-processing-unit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:python"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:clustering"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:parallel"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Moore's-Law"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Nudge"/>
</rdf:Bag></taxo:topics>
</item>
<item rdf:about="http://www.cs.mun.ca/~banzhaf/contributions.html#Fast%20Genetic%20Programming%20on%20GPUs">
    <title>W. Banzhaf, List of Conference Contributions: Fast Genetic Programming on GPUs</title>
    <dc:date>2007-07-21T14:26:59+00:00</dc:date>
    <link>http://www.cs.mun.ca/~banzhaf/contributions.html#Fast%20Genetic%20Programming%20on%20GPUs</link>
    <dc:creator>Vaguery</dc:creator><dc:subject>genetic-programming GPU graphics-processing-unit video cards cluster-computing research machine-learning Moore's-Law</dc:subject>
<dc:identifier>https://pinboard.in/u:Vaguery/b:daa025ce53df/</dc:identifier>
<taxo:topics><rdf:Bag>	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:genetic-programming"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:GPU"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:graphics-processing-unit"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:video"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cards"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:cluster-computing"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:research"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:machine-learning"/>
	<rdf:li rdf:resource="https://pinboard.in/u:Vaguery/t:Moore's-Law"/>
</rdf:Bag></taxo:topics>
</item>
</rdf:RDF>