Skip to content
Snippets Groups Projects
Commit 2c8dd6e5 authored by Georgiana Mania's avatar Georgiana Mania
Browse files

Merge branch 'master' into 'main'

initial commit

See merge request !1
parents bc6ed909 82662749
No related branches found
No related tags found
1 merge request!1initial commit
cmake_minimum_required(VERSION 3.20)
project(demo LANGUAGES CXX VERSION 0.0.1)
include(FetchContent)
# silence warning
if (POLICY CMP0135)
cmake_policy(SET CMP0135 NEW)
endif ()
# if using kokkos as shared library, -fPIC is needed
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fPIC")
# configure kokkos 4.2 repository link
FetchContent_Declare(kokkos
URL https://github.com/kokkos/kokkos/releases/download/4.4.01/kokkos-4.4.01.tar.gz)
if (("${MU_ARCH}" STREQUAL "x86_64") OR ("${MU_ARCH}" STREQUAL "arm"))
find_package(OpenMP)
if (OpenMP_FOUND)
set(Kokkos_ENABLE_OPENMP ON CACHE BOOL "" FORCE)
else()
set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE)
endif()
set(Kokkos_ARCH_NATIVE ON CACHE BOOL "" FORCE)
elseif("${MU_ARCH}" STREQUAL "a100")
set(Kokkos_ENABLE_SERIAL ON CACHE BOOL "" FORCE)
set(Kokkos_ENABLE_CUDA ON CACHE BOOL "" FORCE)
set(Kokkos_ENABLE_CUDA_LAMBDA ON CACHE BOOL "" FORCE)
set(Kokkos_ARCH_AMPERE80 ON CACHE BOOL "" FORCE)
else()
message(FATAL_ERROR "${MU_ARCH} is not a valid/tested configuration, select architecture, x86_64, a100, h100, mi250x, mi300a(-unified), h100(-unified)")
endif()
FetchContent_MakeAvailable(kokkos)
add_executable(demo main.cpp)
target_link_libraries(demo PUBLIC Kokkos::kokkos)
main.cpp 0 → 100644
#include <iostream>
#include <Kokkos_Core.hpp>
#include "Kokkos_Timer.hpp"
using space_t = Kokkos::DefaultExecutionSpace::memory_space;
typedef Kokkos::MDRangePolicy<Kokkos::DefaultExecutionSpace, Kokkos::IndexType<int>, Kokkos::Rank<3>> md_range_policy;
Kokkos::Timer timer;
//constexpr int nblocks = 2;
//constexpr int nlev = 90;
//constexpr int nproma = 55000;
static void validate(double* array, int nblocks, int nlev, int nproma) {
for (int i = 0; i < nblocks * nlev * nproma; ++i) {
assert(array[i] == static_cast<double>(i));
}
}
void scenario_1(double* array, int nblocks, int nlev, int nproma, bool print=true) {
if(print)
std::cout << "Default layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
Kokkos::View<double***, Kokkos::HostSpace, Kokkos::MemoryUnmanaged> view(array, nblocks, nlev, nproma);
using space_t = Kokkos::DefaultExecutionSpace::memory_space;
auto d_view = Kokkos::create_mirror_view_and_copy(space_t(), view);
timer.reset();
for (int jb = 0 ; jb < nblocks; ++jb)
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jb * nlev * nproma + jk * nproma + jc;
d_view(jb, jk, jc) = p;
}});
if(print)
printf("Time = %f ms\n\n", timer.seconds() * 1000);
Kokkos::deep_copy(view, d_view);
validate(array, nblocks, nlev, nproma);
}
void scenario_2(double* array, int nblocks, int nlev, int nproma, bool print=true) {
if(print)
std::cout << "Right layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
Kokkos::View<double***, Kokkos::LayoutRight, Kokkos::HostSpace, Kokkos::MemoryUnmanaged> view(array, nproma, nlev, nblocks);
using space_t = Kokkos::DefaultExecutionSpace::memory_space;
auto d_view = Kokkos::create_mirror_view_and_copy(space_t(), view);
timer.reset();
for (int jb = 0 ; jb < nblocks; ++jb)
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jc * nlev * nblocks + jk * nblocks + jb;
d_view(jc, jk, jb) = p;
// printf("%f ", d_view(jb, jk, jc));
}});
if(print)
printf("Time = %f ms\n\n", timer.seconds() * 1000);
Kokkos::deep_copy(view, d_view);
validate(array, nblocks, nlev, nproma);
}
void scenario_2b(double* array, int nblocks, int nlev, int nproma, bool print=true) {
if(print)
std::cout << "Right 2b layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
Kokkos::View<double***, Kokkos::LayoutRight, Kokkos::HostSpace, Kokkos::MemoryUnmanaged> view(array, nblocks, nlev, nproma);
using space_t = Kokkos::DefaultExecutionSpace::memory_space;
auto d_view = Kokkos::create_mirror_view_and_copy(space_t(), view);
timer.reset();
for (int jb = 0 ; jb < nblocks; ++jb)
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jb * nlev * nproma + jk * nproma + jc;
d_view(jb, jk, jc) = p;
// printf("%f ", d_view(jb, jk, jc));
}});
if(print)
printf("Time = %f ms\n\n", timer.seconds() * 1000);
Kokkos::deep_copy(view, d_view);
validate(array, nblocks, nlev, nproma);
}
void scenario_3(double* array, int nblocks, int nlev, int nproma, bool print=true) {
if(print)
std::cout << "Left layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
Kokkos::View<double***, Kokkos::LayoutLeft, Kokkos::HostSpace, Kokkos::MemoryUnmanaged> view(array, nproma, nlev, nblocks);
using space_t = Kokkos::DefaultExecutionSpace::memory_space;
auto d_view = Kokkos::create_mirror_view_and_copy(space_t(), view);
timer.reset();
for (int jb = 0 ; jb < nblocks; ++jb)
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jb * nlev * nproma + jk * nproma + jc;
d_view(jc, jk, jb) = p;
// printf("%f ", d_view(jb, jk, jc));
}});
if(print)
printf("Time = %f ms\n\n", timer.seconds() * 1000);
Kokkos::deep_copy(view, d_view);
validate(array, nblocks, nlev, nproma);
}
void scenario_4(double* array, int nblocks, int nlev, int nproma, bool print=true) {
if(print)
std::cout << "Default layout; view(array, nblocks, nlev, nproma); d_view(jb, jk, jc) ----- " << std::endl;
Kokkos::View<double***, Kokkos::HostSpace, Kokkos::MemoryUnmanaged> view(array, nproma, nlev, nblocks);
using space_t = Kokkos::DefaultExecutionSpace::memory_space;
auto d_view = Kokkos::create_mirror_view_and_copy(space_t(), view);
timer.reset();
for (int jb = 0 ; jb < nblocks; ++jb)
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
int p = jc * nlev * nblocks + jk * nblocks + jb;
d_view(jc, jk, jb) = p;
// printf("%f ", d_view(jb, jk, jc));
}});
if(print)
printf("Time = %f ms\n\n", timer.seconds() * 1000);
Kokkos::deep_copy(view, d_view);
validate(array, nblocks, nlev, nproma);
}
int main() {
int ncells = atoi(std::getenv("NCELLS"));
int nlev = atoi(std::getenv("NLEV"));
int nproma = atoi(std::getenv("NPROMA"));
int nblocks = (ncells - 1) / nproma + 1;
std::cout << "nblocks=" << nblocks << ", nlev=" << nlev << ", nproma=" << nproma << std::endl;
double array[nblocks * nlev * nproma];
/*
for (int jb = 0; jb < nblocks; ++jb)
for (int jk = 0; jk < nlev; ++jk)
for (int jc = 0; jc < nproma; ++jc) {
int p = jb * nlev * nproma + jk * nproma + jc;
array[p] = 1; //static_cast<double>(p);
}
*/
/*
for (int i = 0; i < nblocks * nlev * nproma; ++i)
std::cout << array[i] << " " ;
std::cout << "\n";
for (int jb = 0; jb < nblocks; ++jb)
for (int jk = 0; jk < nlev; ++jk){
for (int jc = 0; jc < nproma; ++jc)
std::cout << view(jb, jk, jc)<< " ";
std::cout << "\n";
}
*/
Kokkos::initialize();
{
scenario_1(array, nblocks, nlev, nproma, false);
scenario_1(array, nblocks, nlev, nproma);
scenario_2(array, nblocks, nlev, nproma);
scenario_2b(array, nblocks, nlev, nproma);
scenario_3(array, nblocks, nlev, nproma);
scenario_4(array, nblocks, nlev, nproma);
}
Kokkos::finalize();
return 0;
}
/**
*
* #if 0
Kokkos::parallel_for(
"print", md_range_policy({0, 0, 0}, {nblocks, nlev, nproma}),
KOKKOS_LAMBDA(const int jb, const int jk, const int jc) {
int p = jb * nlev * nproma + jk * nproma + jc;
d_view(jb, jk, jc) += p;
printf("%f ", d_view(jb, jk, jc));
});
std::cout << "\n";
#endif
for (int jb = 0 ; jb < nblocks; ++jb)
Kokkos::parallel_for("", Kokkos::RangePolicy<>(0, nproma), KOKKOS_LAMBDA (const int jc) {
for (int jk = 0; jk < nlev; ++jk) {
// int p = jb * nlev * nproma + jk * nproma + jc; left
int p = jc * nlev * nblocks + jk * nblocks + jb;
// d_view(jb, jk, jc) = p;
d_view(jc, jk, jb) = p;
// printf("%f ", d_view(jb, jk, jc));
}});
*/
\ No newline at end of file
#!/bin/bash
#gcc
#nvhpc/24.7-gcc-11.2.0
#export LD_LIBRARY_PATH
ulimit -s unlimited
if [ "$1" == 'gpu' ]
then
cmake -B build_gpu -S . -DMU_ARCH=a100 -DCMAKE_CXX_FLAGS="-O3"
cmake --build build_gpu --parallel
ncells=(5000000)
nlev=(90)
nproma=(10000 30000 50000 100000 1000000 5000000)
else
cmake -B build -S . -DMU_ARCH=x86_64 -DCMAKE_CXX_FLAGS="-O3"
cmake --build build --parallel
ncells=(5000000)
nlev=(90)
nproma=(32) # 64 96 128)
export OMP_PROC_BIND=close
export OMP_PLACES=cores
export OMP_NUM_THREADS=8
fi
for jb in ${ncells[*]}; do
for jk in ${nlev[*]}; do
for jc in ${nproma[*]}; do
export NPROMA=$jc
export NLEV=$jk
export NCELLS=$jb
if [ "$1" == 'gpu' ]
then
./build_gpu/demo
else
./build/demo
fi
done
done
done
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment