2012-03-28 19 views
11

Esto es parte de mi código CUDA. Pero la última parte de este código dice un mensaje de error.mensaje de error CUDA: error de lanzamiento no especificado

unsigned int *mat_count; 
off_t *mat_position; 
unsigned int *matches_count; 
off_t *matches_position; 
...... 
cudaMalloc ((void **) &mat_count, sizeof(unsigned int)*10); 
cudaMalloc ((void **) &mat_position, sizeof(off_t)*10); 
...... 
matches_count = (unsigned int *)malloc(sizeof(unsigned int)*10); 
matches_position = (off_t *)malloc(sizeof(off_t)*10); 
for (i = 0 ; i < 10 ; i++) { 
    matches_count [i] = 0; 
    matches_position[i] = 0; 
} 
...... 
cudaMemcpy (mat_count, matches_count , sizeof(unsigned int)*10, cudaMemcpyHostToDevice); 
cudaMemcpy (mat_position, matches_position, sizeof(off_t)*10,  cudaMemcpyHostToDevice); 
...... 
match<<<BLK_SIZE,THR_SIZE>>>(
     reference_total, 
     indextable_total, 
     sequences, 
     start_sequence, 
     sequence_length, 
     end_sequence, 
     ref_base, 
     idx_base, 
     msk_base, 
     mat_count, 
     mat_position, 
     reference, 
     first_indexes, 
     seqmaskc 
     ); 
err=cudaGetLastError(); 
if(err!=cudaSuccess) 
{ 
printf("\n1 %s\n", cudaGetErrorString(err)); 
} 
err= cudaMemcpy (matches_count , mat_count, sizeof(unsigned int)*10, cudaMemcpyDeviceToHost); 
if(err!=cudaSuccess) 
{ 
printf("\n2 %s\n", cudaGetErrorString(err)); 
} 
err= cudaMemcpy (matches_position, mat_position, sizeof(off_t)*10, cudaMemcpyDeviceToHost); 
if(err!=cudaSuccess) 
{ 
printf("\n3 %s\n", cudaGetErrorString(err)); 
} 

La siguiente parte del código ha informado de "error de lanzamiento no especificado" en este mensaje de error. No sé por qué se informa este mensaje de error.

err=cudaMemcpy (matches_position, mat_position, sizeof(off_t)*10, cudaMemcpyDeviceToHost); 
if(err!=cudaSuccess) 
{ 
printf("\n3 %s\n", cudaGetErrorString(err)); 
} 

Los siguientes son parte de la función de coincidencia.

__global__ void match(...) 
{ 
    ...... 
reference_blk = (THR_SIZE * blockIdx.x + threadIdx.x) * 32 + reference; 
...... 
//-- added for parallize --// 
for (p = start_p ; p != last_p ; p++) { 
    for (s = start_sequence, sequence = sequences ; s != end_sequence ; 
      s++, sequence += sequence_bytes) { 
     ref_off = *(((unsigned int*)(idx_base)) + p); 

     shifted_in = 0; 

     if((int)(first_indexes[s-start_sequence] % 8 - ref_off % 8) < 0){ 
      int shamt2 = (ref_off % 8 - first_indexes[s-start_sequence] % 8); 

      mask_buffer = *((unsigned long *)(msk_base + (ref_off - first_indexes[s-start_sequence])/8)) >> shamt2; 

      if(((*(unsigned long *)(seqmaskc + 16 * (s-start_sequence)))^mask_buffer) << shamt2) continue; 
     } 
     else if((int)(first_indexes[s-start_sequence] % 8 - ref_off % 8) == 0){ 
      mask_buffer = *((unsigned long *)(msk_base + (ref_off)/8)); 

      if((*(unsigned long *)(seqmaskc + 16 * (s-start_sequence))^mask_buffer)) continue; 
     } 
     else{ 
      int shamt2 = 8 - (first_indexes[s-start_sequence] % 8 - ref_off % 8); 

      mask_buffer = *((unsigned long *)(msk_base + (ref_off/8- first_indexes[s-start_sequence]/8) - 1)) >> shamt2; 

      if(((*(unsigned long *)(seqmaskc + 16 * (s-start_sequence)))^mask_buffer) << shamt2) continue; 
     } 

     //full compare 
     if((int)(first_indexes[s-start_sequence] % 4 - ref_off % 4) < 0){ 
      int shamt = (ref_off % 4 - first_indexes[s-start_sequence] % 4) * 2; 
      memcpy(reference_blk, ref_base + ref_off/4 - first_indexes[s-start_sequence]/4, sequence_bytes); 
      ...... 
      //-- instead of memcmp --// 
      int v = 0; 
      char *p1 = (char *)sequence; 
      char *p2 = (char *)reference_blk; 
      int tmp_asd = sequence_bytes; 
      while(tmp_asd!=0){ 
       v = *(p1++) - *(p2++); 
       if(v!=0) 
        break; 
       tmp_asd--; 
      } 

      if(v == 0){ 
       mat_count[s - (int)start_sequence]++;  /* Maintain count */ 
       mat_position[s - (int)start_sequence] = ref_off-first_indexes[s-start_sequence]; /* Record latest position */ 
      } 
     } 
     else if((int)(first_indexes[s-start_sequence] % 4 - ref_off % 4)== 0){ 
      memcpy(reference_blk, ref_base + ref_off/4 - first_indexes[s-start_sequence]/4, sequence_bytes); 
      ....... 
      //-- instead of memcmp --// 
      int v = 0; 
      char *p1 = (char *)sequence; 
      char *p2 = (char *)reference_blk; 
      int tmp_asd = sequence_bytes; 
      while(tmp_asd!=0){ 
       v = *(p1++) - *(p2++); 
       if(v!=0) 
        break; 
       tmp_asd--; 
      } 
      if(v == 0){ 
       mat_count[s - (int)start_sequence]++;  /* Maintain count */ 
       mat_position[s - (int)start_sequence] = ref_off-first_indexes[s-start_sequence]; /* Record latest position */ 
      } 
     } 
     else 
     { 
      int shamt = 8 - (first_indexes[s-start_sequence] % 4 - ref_off % 4) * 2; 

      memcpy(reference_blk, ref_base + ref_off/4 - first_indexes[s-start_sequence]/4 - 1, 32); 
      ...... 
      //-- instead of memcmp --// 
      int v = 0; 
      char *p1 = (char *)sequence; 
      char *p2 = (char *)reference_blk; 
      int tmp_asd = sequence_bytes; 
      while(tmp_asd!=0){ 
       v = *(p1++) - *(p2++); 
       if(v!=0) 
        break; 
       tmp_asd--; 
      } 

      if (v == 0){ 
       mat_count[s - (int)start_sequence]++;  /* Maintain count */ 
       mat_position[s - (int)start_sequence] = ref_off-first_indexes[s-start_sequence];/* Record latest position */ 
      } 
     } 
    } 
} 

}

+0

¿Cuáles son los valores de '' BLK_SIZE' y THR_SIZE'? –

+0

BLK_SIZE y THR_SIZE son uno. – Jimmy

+0

Probablemente necesites publicar el código para la función kernel 'match' –

Respuesta

30

Un error en el lanzamiento no especificado es casi siempre una violación de segmento. Tiene un error de indexación en algún lugar de su núcleo, probablemente al acceder a la memoria global.

me vería a través de su código, pero es ligeramente incomprensible ...

22

compilar la aplicación con indicadores de depuración nvcc -G -g e intente ejecutar la aplicación en el interior cuda-memcheck o cuda-gdb. Podría darle una pista donde podría estar el problema.

Sólo tiene que ejecutar

cuda-memcheck ./yourApp 
Cuestiones relacionadas