当前位置: 首页 > news >正文

策划网站建设关键词歌曲

策划网站建设,关键词歌曲,学院网站怎么做的,如何制作效果图1 定位 thread CUDA 中提供了 blockIdx, threadIdx, blockDim, GridDim来定位发起 thread&#xff0c;下面发起 1 个 grid&#xff0c;里面有 2 个 block&#xff0c;每个 block 里有 5 个 threads。 程序让每个 thread 输出自己的 id 号: #include <stdio.h>__global…

1 定位 thread

CUDA 中提供了 blockIdx, threadIdx, blockDim, GridDim来定位发起 thread,下面发起 1 个 grid,里面有 2 个 block,每个 block 里有 5 个 threads。

程序让每个 thread 输出自己的 id 号:

#include <stdio.h>__global__ void print_id() {int id = blockDim.x * blockIdx.x + threadIdx.x;printf("This is thread %d.\n", id);
}int main() {print_id<<<2, 5>>>();cudaDeviceSynchronize();
}

编译并运行:

nvcc -o get_thread_id get_thread_id.cu -runThis is thread 5.
This is thread 6.
This is thread 7.
This is thread 8.
This is thread 9.
This is thread 0.
This is thread 1.
This is thread 2.
This is thread 3.
This is thread 4.

2 vector add

将向量 a 与向量 b 逐元素相加,计算的结果为向量 c。使用 cpu 与 GPU 分别实现向量相加,并比较执行速度。

这里需要使用cudastart.h定义的计算时间和初始化的函数:

#ifndef CUDASTART_H
#define CUDASTART_H
#define CHECK(call)\
{\const cudaError_t error=call;\if(error != cudaSuccess)\{\printf("Error: %s: %d, ", __FILE__, __LINE__);\printf("code: %d, reason: %s\n", error, cudaGetErrorString(error));\exit(1);\}\
}#include <time.h>
#ifdef _WIN32
#   include <windows.h>
#else
#   include<sys/time.h>
#endifdouble cpuSecond() {struct timeval tp;gettimeofday(&tp, NULL);return((double)tp.tv_sec + (double)tp.tv_usec * 1e-6);
}void initiaData(float* ip, int size) {time_t t;srand((unsigned)time(&t));for(int i = 0; i < size; i++) {ip[i] = (float)(rand() & 0xffff) / 1000.0f;}
}void initDevice(int devNum) {int dev = devNum;cudaDeviceProp deviceProp;CHECK(cudaGetDeviceProperties(&deviceProp, dev));printf("Using device %d: %s\n", dev,  deviceProp.name);CHECK(cudaSetDevice(dev));
}void checkResult(float* hostRef, float* gpuRef, const int N) {double epsilon = 1.0E-8;for(int i = 0; i < N; i++) {if(abs(hostRef[i] - gpuRef[i]) > epsilon) {printf("Results don\'t match!\n");printf("%f(hostRef[%d]) != %f(gpuRef[%d])\n", hostRef[i], i, gpuRef[i], i);return;}}printf("Check result success!\n");
}#endif

vector_add_gpu.cu

#include <stdio.h>
#include <assert.h>
#include <time.h>
#include "cudastart.h"const int N = 1 << 30;inline cudaError_t checkCuda(cudaError_t result) {if (result != cudaSuccess) {fprintf(stderr, "Cuda Runtime Error: %s\n", cudaGetErrorString(result));assert(result == cudaSuccess);}return result;
}void initWith(int* a) {for (int i = 0; i < N; ++i) {a[i] = i;}
}__global__ void addVectorsInto(int* result, int* a, int* b) {int index = threadIdx.x + blockIdx.x * blockDim.x;int gridstride = gridDim.x * blockDim.x;for (int i = index; i < N; i += gridstride)result[i] = a[i] + b[i];
}void checkElementsAre(int* array) {for (int i = 0; i < N; i++) {if (array[i] != 2 * i) {printf("FAIL: array[%d] - %d does not equal %d\n", i, array[i], 2 * i);exit(1);}}printf("SUCCESS All values added correctly.\n");
}void cpuAdd (int* h_a, int* h_b, int* h_c) {int tid = 0;while (tid < N) {h_c[tid] = h_a[tid] +  h_b[tid];tid += 1;}
}int main() {size_t size = N * sizeof(int);int* cpu_a = (int*)malloc(size);int* cpu_b = (int*)malloc(size);int* cpu_c = (int*)malloc(size);initWith(cpu_a);initWith(cpu_b);double start_cpu = cpuSecond();cpuAdd(cpu_a, cpu_b, cpu_c);double end_cpu = cpuSecond();checkElementsAre(cpu_c);printf("vector add, CPU Time used: %f ms\n", (end_cpu - start_cpu) * 1000);free(cpu_a);free(cpu_b);free(cpu_c);int* a;int* b;int* c;int deviceId;cudaGetDevice(&deviceId);checkCuda(cudaMallocManaged(&a, size));checkCuda(cudaMallocManaged(&b, size));checkCuda(cudaMallocManaged(&c, size));cudaMemPrefetchAsync(a, size, cudaCpuDeviceId);cudaMemPrefetchAsync(b, size, cudaCpuDeviceId);initWith(a);initWith(b);cudaMemPrefetchAsync(a, size, deviceId);cudaMemPrefetchAsync(b, size, deviceId);size_t threadsPerBlock = 1024;size_t numberOfBlock = (N + threadsPerBlock - 1) / threadsPerBlock;double start = cpuSecond();addVectorsInto<<<numberOfBlock, threadsPerBlock>>>(c, a, b);checkCuda(cudaDeviceSynchronize());double end = cpuSecond();checkElementsAre(c);printf("vector add, GPU Time used: %f ms\n", (end - start) * 1000);cudaFree(a);cudaFree(b);cudaFree(c);
}

说明:

这段代码需要向 GPU 申请显存,用来存储数组。cudaMallocManagedcudaMalloc 都是 CUDA 中用于分配设备内存的函数,它们之间有几个重要的区别:

  • 管理方式:

    • cudaMallocManaged 分配的内存是统一内存 (Unified Memory),可由 CPU 和 GPU 共享,无需显式地进行数据传输。这使得程序员可以更轻松地编写并行代码,而不必担心内存管理和数据传输。
    • cudaMalloc 分配的内存则是显式地分配给 GPU 使用的内存,需要通过显式的数据传输函数(如 cudaMemcpy)来在 CPU 和 GPU 之间传输数据。
  • 自动数据迁移:

    • 统一内存由 CUDA 运行时自动管理数据的迁移。当 CPU 或 GPU 尝试访问未分配到当前设备的统一内存时,CUDA 运行时会自动将数据迁移到访问的设备上。
    • 对于 cudaMalloc 分配的内存,需要手动使用 cudaMemcpy 等函数进行数据传输。
  • 便利性:

    • 使用 cudaMallocManaged 更加方便,因为无需手动管理数据的迁移和分配,程序员可以更专注于算法和逻辑的实现。
    • cudaMalloc 则需要更多的手动管理,包括数据传输和内存释放。
  • 优缺点:

    • cudaMallocManaged 的优点在于简化了内存管理和数据传输,提高了编程的便利性和代码的可读性。同时,由于统一内存的存在,可以减少内存使用上的一些烦琐问题。
    • cudaMalloc 的优点在于更加灵活,程序员可以精确地控制内存的分配和数据传输,适用于需要更细粒度控制的情况。此外,对于某些特定的算法和应用场景,手动管理内存和数据传输可能会比统一内存更加高效。

编译运行:

nvcc -o vector_add_gpu vector_add_gpu.cu -runSUCCESS All values added correctly.
vector add, CPU Time used: 3081.584930 ms
SUCCESS All values added correctly.
vector add, GPU Time used: 280.308962 ms

3 Reduction

对数组进行规约操作。

vector_reduction.cu

#include <cuda_runtime.h>
#include <stdio.h>
#include "cudastart.h"__global__ void reduce_test(int* g_idata, int* sum, unsigned int n) {int idx = blockIdx.x * blockDim.x + threadIdx.x;if (idx < n) {atomicAdd(sum, g_idata[idx]);}
}__global__ void reduceNeighbored(int* g_idata, int* g_odata, unsigned int n) {//set thread IDunsigned int tid = threadIdx.x;//boundary checkif (tid >= n) return;int* idata = g_idata + blockIdx.x * blockDim.x;// in-place reduction in the global memoryfor (int stride = 1; stride < blockDim.x; stride *= 2) {if ((tid % (2 * stride)) == 0) {idata[tid] += idata[tid + stride];}// synchronize within block__syncthreads();}// write result for this block to global memif (tid == 0) {g_odata[blockIdx.x] = idata[0];}
}__global__ void reduceNeighboredLess(int* g_idata, int* g_odata, unsigned int n) {unsigned int tid = threadIdx.x;unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;// convert global data pointer to the local point of this blockint* idata = g_idata + blockIdx.x * blockDim.x;if (idx >= n) return;//in-place reduction in global memoryfor (int stride = blockDim.x / 2; stride > 0; stride >>= 1) {if (tid < stride) {idata[tid] += idata[tid + stride];}__syncthreads();}//write result for this block to global menif (tid == 0)g_odata[blockIdx.x] = idata[0];
}__global__ void reduceInterleaved(int* g_idata, int* g_odata, unsigned int n) {unsigned int tid = threadIdx.x;unsigned idx = blockIdx.x*blockDim.x + threadIdx.x;// convert global data pointer to the local point of this blockint *idata = g_idata + blockIdx.x*blockDim.x;if (idx >= n) return;//in-place reduction in global memoryfor (int stride = blockDim.x / 2; stride > 0; stride >>= 1) {if (tid < stride) {idata[tid] += idata[tid + stride];}__syncthreads();}//write result for this block to global menif (tid == 0)g_odata[blockIdx.x] = idata[0];
}__global__ void reduceInterleaved_share(int* g_idata, int* g_odata, unsigned int n) {__shared__ int sh_arr[1024];unsigned int tid = threadIdx.x;unsigned int idx = blockIdx.x * blockDim.x + threadIdx.x;if (idx >= n) return;sh_arr[tid] = g_idata[idx];__syncthreads();//in-place reduction in global memoryfor (int stride = blockDim.x / 2; stride > 0; stride >>= 1) {if (tid < stride)sh_arr[tid] += sh_arr[tid + stride];__syncthreads();}//write result for this block to global menif (tid == 0)g_odata[blockIdx.x] = sh_arr[0];
}int main(int argc, char** argv) {initDevice(0);//initializationint size = 1 << 24;printf("with array size %d ", size);//execution configurationint blocksize = 1024;if (argc > 1) {blocksize = atoi(argv[1]);   //从命令行输入设置block大小}dim3 block(blocksize, 1);dim3 grid((size - 1) / block.x + 1, 1);printf("grid %d block %d \n", grid.x, block.x);//allocate host memorysize_t bytes = size * sizeof(int);int *idata_host = (int*)malloc(bytes);int *odata_host = (int*)malloc(grid.x * sizeof(int));int * tmp = (int*)malloc(bytes);//initialize the arrayinitialData_int(idata_host, size);if (size < 100) {printf("Array: [");for (int i = 0; i < size; ++i)printf("%d, ", idata_host[i]);printf("]\n");}memcpy(tmp, idata_host, bytes);double timeStart, timeElaps;int gpu_sum = 0;// device memoryint * idata_dev = NULL;int * odata_dev = NULL;CHECK(cudaMalloc((void**)&idata_dev, bytes));CHECK(cudaMalloc((void**)&odata_dev, grid.x * sizeof(int)));//cpu reduction 对照组int cpu_sum = 0;timeStart = cpuSecond();for (int i = 0; i < size; i++)cpu_sum += tmp[i];timeElaps = 1000 * (cpuSecond() - timeStart);printf("cpu sum:%d \n", cpu_sum);printf("cpu reduction elapsed %lf ms cpu_sum: %d\n", timeElaps, cpu_sum);//kernel 0 reduceint *reduce_sum;CHECK(cudaMalloc((void**)&reduce_sum, sizeof(int)));CHECK(cudaMemset(reduce_sum, 0, sizeof(int)));CHECK(cudaMemcpy(idata_dev, idata_host, bytes, cudaMemcpyHostToDevice));cudaDeviceSynchronize();timeStart = cpuSecond();reduce_test <<<grid, block>>>(idata_dev, reduce_sum, size);cudaDeviceSynchronize();cudaMemcpy(&gpu_sum, reduce_sum, sizeof(int), cudaMemcpyDeviceToHost);printf("gpu sum:%d \n", gpu_sum);printf("gpu atomicAdd elapsed %lf ms <<<grid %d block %d>>>\n", timeElaps, grid.x, block.x);CHECK(cudaFree(reduce_sum));//kernel 1 reduceNeighboredCHECK(cudaMemcpy(idata_dev, idata_host, bytes, cudaMemcpyHostToDevice));CHECK(cudaDeviceSynchronize());timeStart = cpuSecond();reduceNeighbored <<<grid, block>>>(idata_dev, odata_dev, size);cudaDeviceSynchronize();cudaMemcpy(odata_host, odata_dev, grid.x * sizeof(int), cudaMemcpyDeviceToHost);gpu_sum = 0;for (int i = 0; i < grid.x; i++)gpu_sum += odata_host[i];	timeElaps = 1000 * (cpuSecond() - timeStart);printf("gpu sum:%d \n", gpu_sum);printf("gpu reduceNeighbored elapsed %lf ms <<<grid %d block %d>>>\n", timeElaps, grid.x, block.x);//kernel 2 reduceNeighboredlessCHECK(cudaMemcpy(idata_dev, idata_host, bytes, cudaMemcpyHostToDevice));CHECK(cudaDeviceSynchronize());timeStart = cpuSecond();reduceNeighboredLess <<<grid, block >>>(idata_dev, odata_dev, size);cudaDeviceSynchronize();cudaMemcpy(odata_host, odata_dev, grid.x * sizeof(int), cudaMemcpyDeviceToHost);gpu_sum = 0;for (int i = 0; i < grid.x; i++)gpu_sum += odata_host[i];	timeElaps = 1000*(cpuSecond() - timeStart);printf("gpu sum:%d \n", gpu_sum);printf("gpu reduceNeighboredless elapsed %lf ms <<<grid %d block %d>>>\n",timeElaps, grid.x, block.x);//kernel 3 reduceInterleavedCHECK(cudaMemcpy(idata_dev, idata_host, bytes, cudaMemcpyHostToDevice));CHECK(cudaDeviceSynchronize());timeStart = cpuSecond();reduceInterleaved <<<grid, block>>>(idata_dev, odata_dev, size);cudaDeviceSynchronize();cudaMemcpy(odata_host, odata_dev, grid.x * sizeof(int), cudaMemcpyDeviceToHost);gpu_sum = 0;for (int i = 0; i < grid.x; i++)gpu_sum += odata_host[i];	timeElaps = 1000 * (cpuSecond() - timeStart);printf("gpu sum:%d \n", gpu_sum);printf("gpu reduceInterleaved elapsed %lf ms <<<grid %d block %d>>>\n", timeElaps, grid.x, block.x);//kernel 4 reduceInterleaved shared memoryCHECK(cudaMemcpy(idata_dev, idata_host, bytes, cudaMemcpyHostToDevice));CHECK(cudaDeviceSynchronize());timeStart = cpuSecond();reduceInterleaved_share <<<grid, block>>>(idata_dev, odata_dev, size);cudaDeviceSynchronize();cudaMemcpy(odata_host, odata_dev, grid.x * sizeof(int), cudaMemcpyDeviceToHost);gpu_sum = 0;for (int i = 0; i < grid.x; i++)gpu_sum += odata_host[i];	timeElaps = 1000 * (cpuSecond() - timeStart);printf("gpu sum:%d \n", gpu_sum);printf("gpu reduceInterleaved elapsed %lf ms <<<grid %d block %d>>>\n", timeElaps, grid.x, block.x);// free host memoryfree(idata_host);free(odata_host);CHECK(cudaFree(idata_dev));CHECK(cudaFree(odata_dev));// reset devicecudaDeviceReset();// check the resultsif (gpu_sum == cpu_sum) {printf("Test success!\n");}return EXIT_SUCCESS;
}

编译并运行:

nvcc -o vector_reduction vector_reduction.cu -runUsing device 0: NVIDIA GeForce RTX 2080 Ti
with array size 16777216 grid 16384 block 1024
cpu sum:2138839322
cpu reduction elapsed 30.170918 ms cpu_sum: 2138839322
gpu sum:2138839322
gpu atomicAdd elapsed 30.170918 ms <<<grid 16384 block 1024>>>
gpu sum:2138839322
gpu reduceNeighbored elapsed 1.228094 ms <<<grid 16384 block 1024>>>
gpu sum:2138839322
gpu reduceNeighboredless elapsed 0.674963 ms <<<grid 16384 block 1024>>>
gpu sum:2138839322
gpu reduceInterleaved elapsed 0.669956 ms <<<grid 16384 block 1024>>>
gpu sum:2138839322
gpu reduceInterleaved elapsed 0.744820 ms <<<grid 16384 block 1024>>>
Test success!

4 Matrix multiplication

计算二维矩阵乘法。

matmul.cu

#include <cuda_runtime.h>
#include <stdio.h>
#include "cudastart.h"#define N 1024
#define tile_size 16void matrixMulCPU(int* a, int* b, int* c) {int val = 0;for (int row = 0; row < N; ++row) {for (int col = 0; col < N; ++col) {val = 0;for (int k = 0; k < N; ++k) {val += a[row * N + k] * b[k * N + col];}c[row * N + col] = val;}}
}__global__ void maxtrixMulGPU(int* a, int* b, int* c) {int row = blockIdx.x * blockDim.x + threadIdx.x;int col = blockIdx.y * blockDim.y + threadIdx.y;int val = 0;for (int i = 0; i < N; ++i) {val += a[row * N + i] * b[i * N + col];}c[row * N + col] = val;
}__global__ void maxtrixMulGPU_tile(int* a, int* b, int* c) {__shared__ int tile_a[tile_size][tile_size];__shared__ int tile_b[tile_size][tile_size];int row = blockIdx.x * blockDim.x + threadIdx.x;int col = blockIdx.y * blockDim.y + threadIdx.y;for (int i = 0; i < N / tile_size; ++i) {tile_a[threadIdx.x][threadIdx.y] = a[row * N + i * tile_size + threadIdx.y];tile_b[threadIdx.x][threadIdx.y] = b[(i * tile_size + threadIdx.x) * N + col];__syncthreads();for (int j = 0; j < tile_size; ++j) {c[row * N + col] += tile_a[threadIdx.x][j] * tile_b[j][threadIdx.y];}__syncthreads();}
}void check(int* c_cpu, int* c_gpu) {bool error = false;for (int row = 0; row < N && !error; row++) {for (int col = 0; col < N && !error; col++) {if (c_cpu[row * N + col] != c_gpu[row * N + col]) {printf("FOUND ERROR at c[%d][%d]\n", row, col);error = true;break;}}}if (!error) {printf("Success!\n");}
}int main() {int *a, *b, *c_cpu, *c_gpu, *c_gpu_opt; // Allocate a solution matrix for both the CPU and the GPU operationsint size = N * N * sizeof (int); // Number of bytes of an N x N matrixdouble timeStart, timeElaps;// Allocate memorycudaMallocManaged (&a, size);cudaMallocManaged (&b, size);cudaMallocManaged (&c_cpu, size);cudaMallocManaged (&c_gpu, size);cudaMallocManaged (&c_gpu_opt, size);initialData_int(a, N * N);initialData_int(b, N * N);memset(c_cpu, 0, N * N);memset(c_gpu, 0, N * N);memset(c_gpu_opt, 0, N * N);timeStart = cpuSecond();matrixMulCPU(a, b, c_cpu);timeElaps = 1000 * (cpuSecond() - timeStart);printf("cpu matrix mul time: %f ms\n", timeElaps);// test kernel 1: matrixMulGPUdim3 threads_per_block(16, 16, 1);dim3 number_of_blocks(N / threads_per_block.x, N / threads_per_block.y, 1);timeStart = cpuSecond();maxtrixMulGPU <<<number_of_blocks, threads_per_block>>>(a, b, c_gpu);timeElaps = 1000 * (cpuSecond() - timeStart);cudaDeviceSynchronize();printf("gpu matrix mul time: %f ms\n", timeElaps);check(c_cpu, c_gpu);// test kernel 2: matrixMulGPU optimizetimeStart = cpuSecond();maxtrixMulGPU_tile<<<number_of_blocks, threads_per_block>>>(a, b, c_gpu_opt);timeElaps = 1000 * (cpuSecond() - timeStart);cudaDeviceSynchronize();printf("gpu matrix mul time: %f ms\n", timeElaps);check(c_cpu, c_gpu_opt);// Free all our allocated memorycudaFree(a);cudaFree(b);cudaFree(c_cpu);cudaFree(c_gpu);cudaFree(c_gpu_opt);
}

编译并运行:

nvcc -o matmul matmul.cu -runcpu matrix mul time: 4380.882978 ms
gpu matrix mul time: 0.206947 ms
Success!
gpu matrix mul time: 0.063896 ms
Success!

5 cuDNN sigmoid

使用 cuDNN 实现 sigmoid 函数

cudnn_sigmoid.cu

#include <iostream>
#include <cuda_runtime.h>
#include <cudnn.h>int main(int argc, char** argv) {// get gpu infoint numGPUs;cudaGetDeviceCount(&numGPUs);std::cout << "Found " << numGPUs << " GPUs." << std::endl;cudaSetDevice(0); // use GPU0int device;struct cudaDeviceProp devProp;cudaGetDevice(&device);cudaGetDeviceProperties(&devProp, device);std::cout << "Compute capability:" << devProp.major << "." << devProp.minor << std::endl;cudnnHandle_t handle_;cudnnCreate(&handle_);std::cout << "Created cuDNN handle" << std::endl;// create the tensor descriptorcudnnDataType_t dtype = CUDNN_DATA_FLOAT;cudnnTensorFormat_t format = CUDNN_TENSOR_NCHW;int n = 1, c = 1, h = 1, w = 10;int NUM_ELEMENTS = n * c * h * w;cudnnTensorDescriptor_t x_desc;cudnnCreateTensorDescriptor(&x_desc);cudnnSetTensor4dDescriptor(x_desc, format, dtype, n, c, h, w);// create the tensorfloat *x;// 创建 Unified Memory,这样cpu和memory都可以使用cudaMallocManaged(&x, NUM_ELEMENTS * sizeof(float));for(int i = 0; i < NUM_ELEMENTS; i++) x[i] = i * 1.00f;std::cout << "Original array: ";for(int i = 0; i < NUM_ELEMENTS; i++) std::cout << x[i] << " ";// create activation function descriptorfloat alpha[1] = {1};float beta[1] = {0.0};cudnnActivationDescriptor_t sigmoid_activation;cudnnActivationMode_t mode = CUDNN_ACTIVATION_SIGMOID;cudnnNanPropagation_t prop = CUDNN_NOT_PROPAGATE_NAN;cudnnCreateActivationDescriptor(&sigmoid_activation);cudnnSetActivationDescriptor(sigmoid_activation, mode, prop, 0.0f);cudnnActivationForward(handle_,sigmoid_activation,alpha,x_desc,x,beta,x_desc,x);cudnnDestroy(handle_);std::cout << std::endl << "Destroyed cuDNN handle." << std::endl;std::cout << "New array: ";for(int i = 0; i < NUM_ELEMENTS; i++) std::cout << x[i] << " ";std::cout << std::endl;cudaFree(x);return 0;
}

编译并运行:

nvcc -o cudnn_sigmoid cudnn_sigmoid.cu -run -lcudnnFound 2 GPUs.
Compute capability:7.5
Created cuDNN handle
Original array: 0 1 2 3 4 5 6 7 8 9
Destroyed cuDNN handle.
New array: 0.5 0.731059 0.880797 0.952574 0.982014 0.993307 0.997527 0.999089 0.999665 0.999877
http://www.cadmedia.cn/news/13207.html

相关文章:

  • 申请建设网站经费的请示系统清理优化工具
  • 网站被k 如何恢复百度网站登录入口
  • b2bb2c网站电子商务网站建设前期方案b站推广入口2023破解版
  • 有没有安全一点的网站百度渠道开户哪里找
  • 建站平台 在线提交表格功能江阴网站制作公司
  • 公司电商网站开发前端优化网站
  • 西安推荐企业网站制作平台爱站工具seo综合查询
  • 制作高端网站效果怎么样产品推广宣传方案
  • 安徽省建设厅网站个人怎么注册所有关键词
  • 用自己服务器做网站网站都有哪些
  • 做网站自己装服务器企业查询免费
  • 怎么自己做网站模板上海百度推广官网
  • 平台网站建设ppt模板下载爱站网官网
  • 银川市住房和城乡建设厅网站seo网络营销外包
  • ui设计怎么收费重庆seo整站优化效果
  • 广州建设h5网站淘宝关键词搜索量查询工具
  • 什么事网站建设做网络推广有前途吗
  • 广安商城网站建设aso榜单优化
  • 数码庄园的网站建设公司郑州厉害的seo顾问公司
  • 网站智能云刷关键词排名系统
  • 做二手车有哪些网站有哪些手续费app线下推广怎么做
  • 亚马逊网网站建设规划报告淄博seo推广
  • 开发高端网站建设网络销售都是诈骗公司吗
  • discuz是什么东西湖南好搜公司seo
  • 宁国网站设计公司百度竞价关键词出价技巧
  • 832贫困地区农副产品网络销售平台前端seo是什么意思
  • 360网站推广官网硅钙钾镁肥产品营销策划
  • 一起来做网站磁力猫torrent kitty
  • 中国嘉兴门户网站网站建设价格
  • 做课件用这15大网站网页设计学生作业模板