|
#include <iostream>
|
|
#include <chrono>
|
|
#include <fcntl.h>
|
|
#include <sys/mman.h>
|
|
#include <unistd.h>
|
|
#include <cstring>
|
|
#include <cerrno>
|
|
#include <getopt.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/socket.h>
|
|
#include <sys/stat.h>
|
|
#include <time.h>
|
|
#include <stdbool.h>
|
|
#include <assert.h>
|
|
#include <endian.h>
|
|
#include <errno.h>
|
|
#include <fcntl.h>
|
|
#include <stdarg.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <sys/ioctl.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <sys/poll.h>
|
|
#include <unistd.h>
|
|
#include <stdbool.h>
|
|
#include <sys/eventfd.h>
|
|
|
|
#include <sched.h>
|
|
#include <pthread.h>
|
|
|
|
#include <stdint.h>
|
|
#include <memory.h>
|
|
#include <sys/time.h>
|
|
|
|
#include <arm_neon.h>
|
|
|
|
#define DMA_HEAP_UNCACHE_PATH "/dev/dma_heap/system-uncached"
|
|
#define DMA_HEAP_PATH "/dev/dma_heap/system"
|
|
#define DMA_HEAP_DMA32_UNCACHED_PATH "/dev/dma_heap/system-uncached-dma32"
|
|
#define DMA_HEAP_DMA32_PATH "/dev/dma_heap/system-dma32"
|
|
#define CMA_HEAP_UNCACHED_PATH "/dev/dma_heap/cma-uncached"
|
|
#define RV1106_CMA_HEAP_PATH "/dev/rk_dma_heap/rk-dma-heap-cma"
|
|
#define DMA_TYPE DMA_HEAP_DMA32_UNCACHED_PATH
|
|
|
|
int dma_sync_device_to_cpu(int fd);
|
|
int dma_sync_cpu_to_device(int fd);
|
|
|
|
int dma_buf_alloc(const char *path, size_t size, int *fd, void **va);
|
|
void dma_buf_free(size_t size, int *fd, void *va);
|
|
|
|
#define DMA_BUF_SYNC_READ (1 << 0)
|
|
#define DMA_BUF_SYNC_WRITE (2 << 0)
|
|
#define DMA_BUF_SYNC_RW (DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE)
|
|
#define DMA_BUF_SYNC_START (0 << 2)
|
|
#define DMA_BUF_SYNC_END (1 << 2)
|
|
|
|
struct dma_buf_sync {
|
|
__u64 flags;
|
|
};
|
|
|
|
#define DMA_BUF_BASE 'b'
|
|
#define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync)
|
|
|
|
#define CMA_HEAP_SIZE 1024 * 1024
|
|
|
|
typedef unsigned long long __u64;
|
|
typedef unsigned int __u32;
|
|
|
|
struct dma_heap_allocation_data {
|
|
__u64 len;
|
|
__u32 fd;
|
|
__u32 fd_flags;
|
|
__u64 heap_flags;
|
|
};
|
|
|
|
|
|
#define DMA_HEAP_IOC_MAGIC 'H'
|
|
#define DMA_HEAP_IOCTL_ALLOC _IOWR(DMA_HEAP_IOC_MAGIC, 0x0,\
|
|
struct dma_heap_allocation_data)
|
|
|
|
int dma_sync_device_to_cpu(int fd) {
|
|
struct dma_buf_sync sync = {0};
|
|
|
|
sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
|
|
return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
|
|
}
|
|
|
|
int dma_sync_cpu_to_device(int fd) {
|
|
struct dma_buf_sync sync = {0};
|
|
|
|
sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
|
|
return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
|
|
}
|
|
|
|
int dma_buf_alloc(const char *path, size_t size, int *fd, void **va) {
|
|
int ret;
|
|
int prot;
|
|
void *mmap_va;
|
|
int dma_heap_fd = -1;
|
|
struct dma_heap_allocation_data buf_data;
|
|
|
|
/* open dma_heap fd */
|
|
dma_heap_fd = open(path, O_RDWR);
|
|
if (dma_heap_fd < 0) {
|
|
printf("open %s fail!\n", path);
|
|
return dma_heap_fd;
|
|
}
|
|
|
|
/* alloc buffer */
|
|
memset(&buf_data, 0x0, sizeof(struct dma_heap_allocation_data));
|
|
|
|
buf_data.len = size;
|
|
buf_data.fd_flags = O_CLOEXEC | O_RDWR;
|
|
ret = ioctl(dma_heap_fd, DMA_HEAP_IOCTL_ALLOC, &buf_data);
|
|
if (ret < 0) {
|
|
printf("RK_DMA_HEAP_ALLOC_BUFFER failed\n");
|
|
return ret;
|
|
}
|
|
|
|
/* mmap va */
|
|
if (fcntl(buf_data.fd, F_GETFL) & O_RDWR)
|
|
prot = PROT_READ | PROT_WRITE;
|
|
else
|
|
prot = PROT_READ;
|
|
|
|
/* mmap contiguors buffer to user */
|
|
mmap_va = (void *)mmap(NULL, buf_data.len, prot, MAP_SHARED, buf_data.fd, 0);
|
|
if (mmap_va == MAP_FAILED) {
|
|
printf("mmap failed: %s\n", strerror(errno));
|
|
return -errno;
|
|
}
|
|
|
|
*va = mmap_va;
|
|
*fd = buf_data.fd;
|
|
|
|
close(dma_heap_fd);
|
|
|
|
return 0;
|
|
}
|
|
|
|
void dma_buf_free(size_t size, int *fd, void *va) {
|
|
int len;
|
|
|
|
len = size;
|
|
munmap(va, len);
|
|
|
|
close(*fd);
|
|
*fd = -1;
|
|
}
|
|
|
|
#define SIZE 13 * 1024 * 1024 // 13MB
|
|
|
|
void neon_memcpy(void* dest, const void* src, size_t n) {
|
|
const uint8_t* src_ptr = (const uint8_t*)src;
|
|
uint8_t* dest_ptr = (uint8_t*)dest;
|
|
|
|
// 确保源和目标指针都是16字节对齐
|
|
if (((uintptr_t)src_ptr % 16 != 0) || ((uintptr_t)dest_ptr % 16 != 0)) {
|
|
// 如果不对齐,先处理未对齐的字节
|
|
size_t offset = 0;
|
|
while (offset < n && ((uintptr_t)(src_ptr + offset) % 16 != 0 || (uintptr_t)(dest_ptr + offset) % 16 != 0)) {
|
|
dest_ptr[offset] = src_ptr[offset];
|
|
offset++;
|
|
std::cout << "offset: " << offset << std::endl;
|
|
}
|
|
// 现在 offset 已经对齐,可以使用 NEON 进行拷贝
|
|
for (size_t i = offset; i < n / 16 * 16; i += 16) {
|
|
uint8x16_t data = vld1q_u8(src_ptr + i);
|
|
vst1q_u8(dest_ptr + i, data);
|
|
}
|
|
// 处理剩余的字节
|
|
for (size_t i = (n / 16) * 16; i < n; i++) {
|
|
dest_ptr[i] = src_ptr[i];
|
|
}
|
|
return;
|
|
}
|
|
|
|
// 每次拷贝16字节
|
|
for (size_t i = 0; i < n / 16; i++) {
|
|
// 加载16字节
|
|
uint8x16_t data = vld1q_u8(src_ptr + i * 16);
|
|
// 存储16字节
|
|
vst1q_u8(dest_ptr + i * 16, data);
|
|
}
|
|
|
|
// 处理剩余的字节
|
|
for (size_t i = (n / 16) * 16; i < n; i++) {
|
|
dest_ptr[i] = src_ptr[i];
|
|
}
|
|
}
|
|
|
|
int test_mem_copy()
|
|
{
|
|
int dma_fd = open(DMA_TYPE, O_RDWR);
|
|
if (dma_fd < 0) {
|
|
std::cerr << "文件描述符无效, 错误代码: " << errno << std::endl;
|
|
close(dma_fd);
|
|
return -1;
|
|
}
|
|
|
|
struct dma_heap_allocation_data buf_data;
|
|
/* alloc buffer */
|
|
memset(&buf_data, 0x0, sizeof(struct dma_heap_allocation_data));
|
|
buf_data.len = SIZE;
|
|
buf_data.fd_flags = O_CLOEXEC | O_RDWR;
|
|
int ret = ioctl(dma_fd, DMA_HEAP_IOCTL_ALLOC, &buf_data);
|
|
if (ret < 0) {
|
|
std::cerr << "RK_DMA_HEAP_ALLOC_BUFFER failed" << std::endl;
|
|
return ret;
|
|
}
|
|
|
|
/* mmap va */
|
|
int prot;
|
|
if (fcntl(buf_data.fd, F_GETFL) & O_RDWR)
|
|
prot = PROT_READ | PROT_WRITE;
|
|
else
|
|
prot = PROT_READ;
|
|
|
|
// std::cout << "DMA FD: " << dma_fd << ", SIZE: " << SIZE << std::endl;
|
|
void* dma_memory = mmap(nullptr, SIZE, prot, MAP_SHARED, buf_data.fd, 0);
|
|
if (dma_memory == MAP_FAILED)
|
|
{
|
|
std::cerr << "DMA内存映射失败, 错误代码: " << errno << std::endl;
|
|
close(dma_fd);
|
|
return -1;
|
|
}
|
|
|
|
int shm_fd = open("/dev/shm/test_shared_memory", O_RDWR | O_CREAT, 0666);
|
|
if (shm_fd < 0)
|
|
{
|
|
std::cerr << "无法打开 /dev/shm/test_shared_memory" << std::endl;
|
|
munmap(dma_memory, SIZE);
|
|
close(dma_fd);
|
|
return -1;
|
|
}
|
|
|
|
// std::cout << "SHM FD: " << shm_fd << ", SIZE: " << SIZE << std::endl;
|
|
ftruncate(shm_fd, SIZE);
|
|
void* shm_memory = mmap(nullptr, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
|
|
if (shm_memory == MAP_FAILED)
|
|
{
|
|
std::cerr << "SHM内存映射失败, 错误代码: " << errno << std::endl;
|
|
close(shm_fd);
|
|
munmap(dma_memory, SIZE);
|
|
close(dma_fd);
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
count = 10: 发现neon并没有比memcpy快
|
|
copy1[10]拷贝使用的时间1: 10.2568 毫秒
|
|
copy2[10]拷贝使用的时间2: 10.5191 毫秒
|
|
copy3[10]拷贝使用的时间3: 86.7563 毫秒
|
|
*/
|
|
int count = 10;
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
for(int i = 0; i < count; i++)
|
|
{
|
|
std::memcpy(shm_memory, dma_memory, SIZE);
|
|
}
|
|
auto end = std::chrono::high_resolution_clock::now();
|
|
std::chrono::duration<double, std::milli> duration = end - start;
|
|
std::cout << "copy1[" << count << "]" << "拷贝使用的时间1: " << duration.count()*1.0/count << " 毫秒" << std::endl;
|
|
start = std::chrono::high_resolution_clock::now();
|
|
for(int i = 0; i < count; i++)
|
|
{
|
|
char *ch = new char[SIZE];
|
|
std::memcpy(ch, dma_memory, SIZE);
|
|
std::memcpy(shm_memory, ch, SIZE);
|
|
delete[] ch;
|
|
}
|
|
end = std::chrono::high_resolution_clock::now();
|
|
duration = end - start;
|
|
std::cout << "copy2[" << count << "]" << "拷贝使用的时间2: " << duration.count()*1.0/count << " 毫秒" << std::endl;
|
|
start = std::chrono::high_resolution_clock::now();
|
|
for(int i = 0; i < count; i++)
|
|
{
|
|
neon_memcpy(shm_memory, dma_memory, SIZE);
|
|
}
|
|
end = std::chrono::high_resolution_clock::now();
|
|
duration = end - start;
|
|
std::cout << "copy3[" << count << "]" << "拷贝使用的时间3: " << duration.count()*1.0/count << " 毫秒" << std::endl;
|
|
|
|
munmap(shm_memory, SIZE);
|
|
munmap(dma_memory, SIZE);
|
|
close(shm_fd);
|
|
close(dma_fd);
|
|
close(buf_data.fd);
|
|
return 0;
|
|
}
|
|
|
|
#define COPY_DIRECT 1
|
|
#define COPY_CACHE 2
|
|
int test_mem_copy1(int flag)
|
|
{
|
|
int count = 100;
|
|
auto start = std::chrono::high_resolution_clock::now();
|
|
for(int i = 0; i < count; i++)
|
|
{
|
|
int dma_fd = open(DMA_TYPE, O_RDWR);
|
|
if (dma_fd < 0) {
|
|
std::cerr << "文件描述符无效, 错误代码: " << errno << std::endl;
|
|
close(dma_fd);
|
|
return -1;
|
|
}
|
|
|
|
struct dma_heap_allocation_data buf_data;
|
|
/* alloc buffer */
|
|
memset(&buf_data, 0x0, sizeof(struct dma_heap_allocation_data));
|
|
buf_data.len = SIZE;
|
|
buf_data.fd_flags = O_CLOEXEC | O_RDWR;
|
|
int ret = ioctl(dma_fd, DMA_HEAP_IOCTL_ALLOC, &buf_data);
|
|
if (ret < 0) {
|
|
std::cerr << "RK_DMA_HEAP_ALLOC_BUFFER failed" << std::endl;
|
|
return ret;
|
|
}
|
|
|
|
/* mmap va */
|
|
int prot;
|
|
if (fcntl(buf_data.fd, F_GETFL) & O_RDWR)
|
|
prot = PROT_READ | PROT_WRITE;
|
|
else
|
|
prot = PROT_READ;
|
|
|
|
// std::cout << "DMA FD: " << dma_fd << ", SIZE: " << SIZE << std::endl;
|
|
void* dma_memory = mmap(nullptr, SIZE, prot, MAP_SHARED, buf_data.fd, 0);
|
|
if (dma_memory == MAP_FAILED)
|
|
{
|
|
std::cerr << "DMA内存映射失败, 错误代码: " << errno << std::endl;
|
|
close(dma_fd);
|
|
return -1;
|
|
}
|
|
|
|
int shm_fd = open("/dev/shm/test_shared_memory", O_RDWR | O_CREAT, 0666);
|
|
if (shm_fd < 0)
|
|
{
|
|
std::cerr << "无法打开 /dev/shm/test_shared_memory" << std::endl;
|
|
munmap(dma_memory, SIZE);
|
|
close(dma_fd);
|
|
return -1;
|
|
}
|
|
|
|
// std::cout << "SHM FD: " << shm_fd << ", SIZE: " << SIZE << std::endl;
|
|
ftruncate(shm_fd, SIZE);
|
|
void* shm_memory = mmap(nullptr, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0);
|
|
if (shm_memory == MAP_FAILED)
|
|
{
|
|
std::cerr << "SHM内存映射失败, 错误代码: " << errno << std::endl;
|
|
close(shm_fd);
|
|
munmap(dma_memory, SIZE);
|
|
close(dma_fd);
|
|
return -1;
|
|
}
|
|
|
|
if(flag == COPY_DIRECT)
|
|
{
|
|
std::memcpy(shm_memory, dma_memory, SIZE);
|
|
}
|
|
else
|
|
{
|
|
char *ch = new char[SIZE];
|
|
std::memcpy(ch, dma_memory, SIZE);
|
|
std::memcpy(shm_memory, ch, SIZE);
|
|
delete[] ch;
|
|
}
|
|
|
|
munmap(shm_memory, SIZE);
|
|
munmap(dma_memory, SIZE);
|
|
close(shm_fd);
|
|
close(dma_fd);
|
|
close(buf_data.fd);
|
|
}
|
|
auto end = std::chrono::high_resolution_clock::now();
|
|
std::chrono::duration<double, std::milli> duration = end - start;
|
|
std::cout << (flag == COPY_DIRECT ? "copy1" : "copy2") << "[" << count << "]" << "拷贝使用的时间: " << duration.count()*1.0/count << " 毫秒" << std::endl;
|
|
|
|
return 0;
|
|
}
|
|
|
|
int main() {
|
|
// int dma_fd;
|
|
// void* dma_memory;
|
|
// int r = dma_buf_alloc(DMA_HEAP_DMA32_UNCACHED_PATH, SIZE, &dma_fd, (void **)&dma_memory);
|
|
// if (r < 0)
|
|
// {
|
|
// printf("alloc data %s failed!\n", DMA_HEAP_DMA32_UNCACHED_PATH);
|
|
// return -1;
|
|
// }
|
|
// else
|
|
// {
|
|
// printf("alloc data %s success!\n", DMA_HEAP_DMA32_UNCACHED_PATH);
|
|
// }
|
|
|
|
test_mem_copy();
|
|
// test_mem_copy1(COPY_DIRECT);
|
|
// test_mem_copy1(COPY_CACHE);
|
|
|
|
return 0;
|
|
}
|