DNA排序
逆序数可以用来描述一个序列混乱程度的量。例如,“DAABEC”的逆序数为5,其中D大于他右边的4个数,E大于他右边的1个数,4+1=5;又如,“ZWQM”的逆序数为3+2+1+0=6。
现在有许多长度一样的字符串,每个字符串里面只会出现四种字母(A,T,G,C)。要求编写程序,将这些字符串按照他们的逆序数进行排序。
输入:
输入数据有多组,以EOF结束。其中,每组数据:
第一行包括两个正整数,第一个正整数N给出了字符串的长度,第二个正整数M给出了字符串的数量。(1<=N,M<=100)
输出:
输出每组数据,不需要额外空行。
将输入的字符串按照其逆序数进行排序,如果两个字符串的逆序数相等,则按照输入中两者先后顺序进行排序。
Sample Input
10 6
AACATGAAGG
TTTTGGCCAA
TTTGGCCAAA
GATCAGATTT
CCCGGGGGGA
ATCGATGCAT
Sample Output
CCCGGGGGGA
AACATGAAGG
GATCAGATTT
ATCGATGCAT
TTTTGGCCAA
TTTGGCCAAA
Source
分析:要求用稳定的排序算法,所以选择了归并排序。计算逆序数原本没想太多用的暴力遍历,但是后来看评论,发现大神的一种有趣的算法。
#include <stdio.h>
#include <stdlib.h>
#include <string.h> #define DNA_LEN 50
#define DNA_NUM 100 #define BUFFER_SIZE 10000 typedef struct
{
int unsortedness;
char dnaString[DNA_LEN];
}Dna; typedef struct
{
int dnaLen;
int dnaNum;
Dna dna[DNA_NUM];
Dna* pDna[DNA_NUM];
}DnaSequence; DnaSequence DnaSeq; void GetDnaSequence(DnaSequence *dnaSeq)
{
int i; scanf("%d %d\n", &dnaSeq->dnaLen, &dnaSeq->dnaNum); for(i = ; i < dnaSeq->dnaNum; i++)
{
if(NULL == gets(dnaSeq->dna[i].dnaString)) break; dnaSeq->pDna[i] = &dnaSeq->dna[i];
}
} void PrintDnaSequence(DnaSequence *dnaSeq)
{
int i; for(i = ; i < dnaSeq->dnaNum; i++)
{
printf("%s\n", dnaSeq->pDna[i]->dnaString);
}
}
/*
void CalcUnsortedness(Dna* dna, int dnaLen)
{
int delta,i,j;
dna->unsortedness = 0;
for(i = 0; i < dnaLen; i++)
{
for(j = i+1; j < dnaLen; j++)
{
delta = dna->dnaString[i] - dna->dnaString[j];
if(delta > 0) dna->unsortedness++;
}
}
}
*/
void CalcUnsortedness(Dna* dna, int dnaLen)
{
int i;
int A = , C = , G = ;
dna->unsortedness = ;
for(i = dnaLen - ; i >= ; i--)
{
switch(dna->dnaString[i])
{
case 'A':
A++;
break;
case 'C':
C++;
dna->unsortedness += A;
break;
case 'G':
G++;
dna->unsortedness += A+C;
break;
case 'T':
dna->unsortedness += A+C+G;
break;
default:
break;
}
}
} int SortCmp(const void* elem1, const void* elem2)
{
Dna* dna1 = (Dna *)(*(size_t*)elem1);
Dna* dna2 = (Dna *)(*(size_t*)elem2); return dna1->unsortedness - dna2->unsortedness;
} char g_mergeBuffer[BUFFER_SIZE]; void Merge(char* array, int elemSize, int left, int mid, int right, int (*SortCmp)(const void*, const void*))
{
int i = left;
int j = mid;
int bufIdx = ; while(i < mid && j <= right)
{
if(SortCmp(&array[i*elemSize], &array[j*elemSize]) <= )
{
memcpy(&g_mergeBuffer[bufIdx], &array[i*elemSize], elemSize);
i++;
}
else
{
memcpy(&g_mergeBuffer[bufIdx], &array[j*elemSize], elemSize);
j++;
}
bufIdx += elemSize;
} for(; i < mid; i++)
{
memcpy(&g_mergeBuffer[bufIdx], &array[i*elemSize], elemSize);
bufIdx += elemSize;
} for(; j <= right; j++)
{
memcpy(&g_mergeBuffer[bufIdx], &array[j*elemSize], elemSize);
bufIdx += elemSize;
} memcpy(&array[left*elemSize], g_mergeBuffer, (right-left+)*elemSize);
} void MergeSort(void* array, int arrayLen, int elemSize, int (*SortCmp)(const void*, const void*))
{
int loop, left, mid, right = ; for(loop = ; loop < arrayLen; loop *= )
{
left = ;
right = ;
while(right < arrayLen - )
{
mid = left + loop;
right = (mid + loop - > arrayLen - ) ? (arrayLen - ) : (mid + loop - );
Merge((char*)array, elemSize, left, mid, right, SortCmp);
left = left + loop * ;
}
}
} void ProcDnaSequence(DnaSequence *dnaSeq)
{
int i;
int elemSize = sizeof(dnaSeq->pDna[]); for(i = ; i < dnaSeq->dnaNum; i++)
{
CalcUnsortedness(&dnaSeq->dna[i], dnaSeq->dnaLen);
}
MergeSort(dnaSeq->pDna, dnaSeq->dnaNum, elemSize, SortCmp);
} int main()
{
GetDnaSequence(&DnaSeq);
ProcDnaSequence(&DnaSeq);
PrintDnaSequence(&DnaSeq);
return ;
}