Wiki »
内存拷贝¶
- http服务端:表单方式上传文件
查看...查看...
#include <iostream> #include <chrono> #include <fcntl.h> #include <sys/mman.h> #include <unistd.h> #include <cstring> #include <cerrno> #include <getopt.h> #include <sys/mman.h> #include <sys/socket.h> #include <sys/stat.h> #include <time.h> #include <stdbool.h> #include <assert.h> #include <endian.h> #include <errno.h> #include <fcntl.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include <sys/ioctl.h> #include <sys/stat.h> #include <sys/types.h> #include <sys/poll.h> #include <unistd.h> #include <stdbool.h> #include <sys/eventfd.h> #include <sched.h> #include <pthread.h> #include <stdint.h> #include <memory.h> #include <sys/time.h> #include <arm_neon.h> #define DMA_HEAP_UNCACHE_PATH "/dev/dma_heap/system-uncached" #define DMA_HEAP_PATH "/dev/dma_heap/system" #define DMA_HEAP_DMA32_UNCACHED_PATH "/dev/dma_heap/system-uncached-dma32" #define DMA_HEAP_DMA32_PATH "/dev/dma_heap/system-dma32" #define CMA_HEAP_UNCACHED_PATH "/dev/dma_heap/cma-uncached" #define RV1106_CMA_HEAP_PATH "/dev/rk_dma_heap/rk-dma-heap-cma" #define DMA_TYPE DMA_HEAP_DMA32_UNCACHED_PATH int dma_sync_device_to_cpu(int fd); int dma_sync_cpu_to_device(int fd); int dma_buf_alloc(const char *path, size_t size, int *fd, void **va); void dma_buf_free(size_t size, int *fd, void *va); #define DMA_BUF_SYNC_READ (1 << 0) #define DMA_BUF_SYNC_WRITE (2 << 0) #define DMA_BUF_SYNC_RW (DMA_BUF_SYNC_READ | DMA_BUF_SYNC_WRITE) #define DMA_BUF_SYNC_START (0 << 2) #define DMA_BUF_SYNC_END (1 << 2) struct dma_buf_sync { __u64 flags; }; #define DMA_BUF_BASE 'b' #define DMA_BUF_IOCTL_SYNC _IOW(DMA_BUF_BASE, 0, struct dma_buf_sync) #define CMA_HEAP_SIZE 1024 * 1024 typedef unsigned long long __u64; typedef unsigned int __u32; struct dma_heap_allocation_data { __u64 len; __u32 fd; __u32 fd_flags; __u64 heap_flags; }; #define DMA_HEAP_IOC_MAGIC 'H' #define DMA_HEAP_IOCTL_ALLOC _IOWR(DMA_HEAP_IOC_MAGIC, 0x0,\ struct dma_heap_allocation_data) int dma_sync_device_to_cpu(int fd) { struct dma_buf_sync sync = {0}; sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW; return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); } int dma_sync_cpu_to_device(int fd) { struct dma_buf_sync sync = {0}; sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW; return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync); } int dma_buf_alloc(const char *path, size_t size, int *fd, void **va) { int ret; int prot; void *mmap_va; int dma_heap_fd = -1; struct dma_heap_allocation_data buf_data; /* open dma_heap fd */ dma_heap_fd = open(path, O_RDWR); if (dma_heap_fd < 0) { printf("open %s fail!\n", path); return dma_heap_fd; } /* alloc buffer */ memset(&buf_data, 0x0, sizeof(struct dma_heap_allocation_data)); buf_data.len = size; buf_data.fd_flags = O_CLOEXEC | O_RDWR; ret = ioctl(dma_heap_fd, DMA_HEAP_IOCTL_ALLOC, &buf_data); if (ret < 0) { printf("RK_DMA_HEAP_ALLOC_BUFFER failed\n"); return ret; } /* mmap va */ if (fcntl(buf_data.fd, F_GETFL) & O_RDWR) prot = PROT_READ | PROT_WRITE; else prot = PROT_READ; /* mmap contiguors buffer to user */ mmap_va = (void *)mmap(NULL, buf_data.len, prot, MAP_SHARED, buf_data.fd, 0); if (mmap_va == MAP_FAILED) { printf("mmap failed: %s\n", strerror(errno)); return -errno; } *va = mmap_va; *fd = buf_data.fd; close(dma_heap_fd); return 0; } void dma_buf_free(size_t size, int *fd, void *va) { int len; len = size; munmap(va, len); close(*fd); *fd = -1; } #define SIZE 13 * 1024 * 1024 // 13MB void neon_memcpy(void* dest, const void* src, size_t n) { const uint8_t* src_ptr = (const uint8_t*)src; uint8_t* dest_ptr = (uint8_t*)dest; // 确保源和目标指针都是16字节对齐 if (((uintptr_t)src_ptr % 16 != 0) || ((uintptr_t)dest_ptr % 16 != 0)) { // 如果不对齐,先处理未对齐的字节 size_t offset = 0; while (offset < n && ((uintptr_t)(src_ptr + offset) % 16 != 0 || (uintptr_t)(dest_ptr + offset) % 16 != 0)) { dest_ptr[offset] = src_ptr[offset]; offset++; std::cout << "offset: " << offset << std::endl; } // 现在 offset 已经对齐,可以使用 NEON 进行拷贝 for (size_t i = offset; i < n / 16 * 16; i += 16) { uint8x16_t data = vld1q_u8(src_ptr + i); vst1q_u8(dest_ptr + i, data); } // 处理剩余的字节 for (size_t i = (n / 16) * 16; i < n; i++) { dest_ptr[i] = src_ptr[i]; } return; } // 每次拷贝16字节 for (size_t i = 0; i < n / 16; i++) { // 加载16字节 uint8x16_t data = vld1q_u8(src_ptr + i * 16); // 存储16字节 vst1q_u8(dest_ptr + i * 16, data); } // 处理剩余的字节 for (size_t i = (n / 16) * 16; i < n; i++) { dest_ptr[i] = src_ptr[i]; } } int test_mem_copy() { int dma_fd = open(DMA_TYPE, O_RDWR); if (dma_fd < 0) { std::cerr << "文件描述符无效, 错误代码: " << errno << std::endl; close(dma_fd); return -1; } struct dma_heap_allocation_data buf_data; /* alloc buffer */ memset(&buf_data, 0x0, sizeof(struct dma_heap_allocation_data)); buf_data.len = SIZE; buf_data.fd_flags = O_CLOEXEC | O_RDWR; int ret = ioctl(dma_fd, DMA_HEAP_IOCTL_ALLOC, &buf_data); if (ret < 0) { std::cerr << "RK_DMA_HEAP_ALLOC_BUFFER failed" << std::endl; return ret; } /* mmap va */ int prot; if (fcntl(buf_data.fd, F_GETFL) & O_RDWR) prot = PROT_READ | PROT_WRITE; else prot = PROT_READ; // std::cout << "DMA FD: " << dma_fd << ", SIZE: " << SIZE << std::endl; void* dma_memory = mmap(nullptr, SIZE, prot, MAP_SHARED, buf_data.fd, 0); if (dma_memory == MAP_FAILED) { std::cerr << "DMA内存映射失败, 错误代码: " << errno << std::endl; close(dma_fd); return -1; } int shm_fd = open("/dev/shm/test_shared_memory", O_RDWR | O_CREAT, 0666); if (shm_fd < 0) { std::cerr << "无法打开 /dev/shm/test_shared_memory" << std::endl; munmap(dma_memory, SIZE); close(dma_fd); return -1; } // std::cout << "SHM FD: " << shm_fd << ", SIZE: " << SIZE << std::endl; ftruncate(shm_fd, SIZE); void* shm_memory = mmap(nullptr, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); if (shm_memory == MAP_FAILED) { std::cerr << "SHM内存映射失败, 错误代码: " << errno << std::endl; close(shm_fd); munmap(dma_memory, SIZE); close(dma_fd); return -1; } /* count = 10: 发现neon并没有比memcpy快 copy1[10]拷贝使用的时间1: 10.2568 毫秒 copy2[10]拷贝使用的时间2: 10.5191 毫秒 copy3[10]拷贝使用的时间3: 86.7563 毫秒 */ int count = 10; auto start = std::chrono::high_resolution_clock::now(); for(int i = 0; i < count; i++) { std::memcpy(shm_memory, dma_memory, SIZE); } auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration<double, std::milli> duration = end - start; std::cout << "copy1[" << count << "]" << "拷贝使用的时间1: " << duration.count()*1.0/count << " 毫秒" << std::endl; start = std::chrono::high_resolution_clock::now(); for(int i = 0; i < count; i++) { char *ch = new char[SIZE]; std::memcpy(ch, dma_memory, SIZE); std::memcpy(shm_memory, ch, SIZE); delete[] ch; } end = std::chrono::high_resolution_clock::now(); duration = end - start; std::cout << "copy2[" << count << "]" << "拷贝使用的时间2: " << duration.count()*1.0/count << " 毫秒" << std::endl; start = std::chrono::high_resolution_clock::now(); for(int i = 0; i < count; i++) { neon_memcpy(shm_memory, dma_memory, SIZE); } end = std::chrono::high_resolution_clock::now(); duration = end - start; std::cout << "copy3[" << count << "]" << "拷贝使用的时间3: " << duration.count()*1.0/count << " 毫秒" << std::endl; munmap(shm_memory, SIZE); munmap(dma_memory, SIZE); close(shm_fd); close(dma_fd); close(buf_data.fd); return 0; } #define COPY_DIRECT 1 #define COPY_CACHE 2 int test_mem_copy1(int flag) { int count = 100; auto start = std::chrono::high_resolution_clock::now(); for(int i = 0; i < count; i++) { int dma_fd = open(DMA_TYPE, O_RDWR); if (dma_fd < 0) { std::cerr << "文件描述符无效, 错误代码: " << errno << std::endl; close(dma_fd); return -1; } struct dma_heap_allocation_data buf_data; /* alloc buffer */ memset(&buf_data, 0x0, sizeof(struct dma_heap_allocation_data)); buf_data.len = SIZE; buf_data.fd_flags = O_CLOEXEC | O_RDWR; int ret = ioctl(dma_fd, DMA_HEAP_IOCTL_ALLOC, &buf_data); if (ret < 0) { std::cerr << "RK_DMA_HEAP_ALLOC_BUFFER failed" << std::endl; return ret; } /* mmap va */ int prot; if (fcntl(buf_data.fd, F_GETFL) & O_RDWR) prot = PROT_READ | PROT_WRITE; else prot = PROT_READ; // std::cout << "DMA FD: " << dma_fd << ", SIZE: " << SIZE << std::endl; void* dma_memory = mmap(nullptr, SIZE, prot, MAP_SHARED, buf_data.fd, 0); if (dma_memory == MAP_FAILED) { std::cerr << "DMA内存映射失败, 错误代码: " << errno << std::endl; close(dma_fd); return -1; } int shm_fd = open("/dev/shm/test_shared_memory", O_RDWR | O_CREAT, 0666); if (shm_fd < 0) { std::cerr << "无法打开 /dev/shm/test_shared_memory" << std::endl; munmap(dma_memory, SIZE); close(dma_fd); return -1; } // std::cout << "SHM FD: " << shm_fd << ", SIZE: " << SIZE << std::endl; ftruncate(shm_fd, SIZE); void* shm_memory = mmap(nullptr, SIZE, PROT_READ | PROT_WRITE, MAP_SHARED, shm_fd, 0); if (shm_memory == MAP_FAILED) { std::cerr << "SHM内存映射失败, 错误代码: " << errno << std::endl; close(shm_fd); munmap(dma_memory, SIZE); close(dma_fd); return -1; } if(flag == COPY_DIRECT) { std::memcpy(shm_memory, dma_memory, SIZE); } else { char *ch = new char[SIZE]; std::memcpy(ch, dma_memory, SIZE); std::memcpy(shm_memory, ch, SIZE); delete[] ch; } munmap(shm_memory, SIZE); munmap(dma_memory, SIZE); close(shm_fd); close(dma_fd); close(buf_data.fd); } auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration<double, std::milli> duration = end - start; std::cout << (flag == COPY_DIRECT ? "copy1" : "copy2") << "[" << count << "]" << "拷贝使用的时间: " << duration.count()*1.0/count << " 毫秒" << std::endl; return 0; } int main() { // int dma_fd; // void* dma_memory; // int r = dma_buf_alloc(DMA_HEAP_DMA32_UNCACHED_PATH, SIZE, &dma_fd, (void **)&dma_memory); // if (r < 0) // { // printf("alloc data %s failed!\n", DMA_HEAP_DMA32_UNCACHED_PATH); // return -1; // } // else // { // printf("alloc data %s success!\n", DMA_HEAP_DMA32_UNCACHED_PATH); // } test_mem_copy(); // test_mem_copy1(COPY_DIRECT); // test_mem_copy1(COPY_CACHE); return 0; }