-
Notifications
You must be signed in to change notification settings - Fork 1
Initial checker script
Mateus Mannes De Medeiros edited this page Apr 14, 2023
·
1 revision
using System.Text;
var originalText = @"The bustling city was alive with energy, as people hurried to work and tourists explored the famous landmarks. Street vendors sold hot dogs and pretzels, while taxi cabs honked their horns in the busy traffic.";
var userInput = @"The bustlin citi it was alive with energy, as the people hurried t and tourist explored the famous as landmarks. Street vendors sold hot dog pretzels, while taxi cabs honked their horns in the busy traffic.";
int thresholdPercentage = 70;
// Calculate the Levenshtein distance
int distance = LevenshteinDistance(originalText, userInput);
// Calculate the similarity score
double similarity = 1 - (double)distance / Math.Max(originalText.Length, userInput.Length);
int similarityPercentage = (int)(similarity * 100);
List<string> originalTokens = TokenizeText(originalText);
List<string> userTokens = TokenizeText(userInput);
(int[,] scoreMatrix, int[,] tracebackMatrix) = NeedlemanWunschAlignment(originalTokens, userTokens);
List<Tuple<string, string>> alignedTokens = Traceback(originalTokens, userTokens, tracebackMatrix);
Console.WriteLine("Aligned Tokens:");
foreach (var pair in alignedTokens)
{
Console.WriteLine($"{pair.Item1} - {pair.Item2}");
}
if (similarityPercentage < thresholdPercentage)
{
Console.WriteLine("The user input is not similar to the original text.");
return;
}
List<Tuple<int, int>> highlightedAreas = new List<Tuple<int, int>>();
int userInputIndex = 0;
int alignedTokenIndex = 0;
var arr = userInput.ToArray();
int jumpedchars = 0;
while (userInputIndex < userInput.Length && alignedTokenIndex < alignedTokens.Count)
{
char currentChar = userInput[userInputIndex];
if (char.IsWhiteSpace(currentChar) || char.IsPunctuation(currentChar))
{
jumpedchars++;
userInputIndex++;
continue;
}
if (alignedTokens[alignedTokenIndex].Item1 != alignedTokens[alignedTokenIndex].Item2)
{
int startIndex = userInputIndex;
while (userInputIndex < userInput.Length && !char.IsWhiteSpace(userInput[userInputIndex]) && !char.IsPunctuation(userInput[userInputIndex]))
{
userInputIndex++;
}
int endIndex = userInputIndex - 1;
// Extend the previous highlighted area
if(alignedTokens[alignedTokenIndex].Item1 == "-"){
// increment endIndex until the next word
int startOfWord = 0;
while (endIndex < userInput.Length)
{
endIndex++;
if(char.IsWhiteSpace(userInput[endIndex]) || char.IsPunctuation(userInput[endIndex])){
startOfWord++;
if(startOfWord == 2){
break;
}
while(char.IsWhiteSpace(userInput[endIndex]) || char.IsPunctuation(userInput[endIndex])) endIndex++;
}
}
userInputIndex = endIndex;
endIndex--;
}
if (highlightedAreas.Count > 0 && startIndex == highlightedAreas.Last().Item2 + jumpedchars + 1)
{
highlightedAreas[highlightedAreas.Count - 1] = Tuple.Create(highlightedAreas.Last().Item1, endIndex);
}
else
{
// if it is a missing or extra word, mark the surrounding words
int startOfWord = 0;
if(alignedTokens[alignedTokenIndex].Item2 == "-"){
var x = 1;
while (startIndex >= 0)
{
startIndex--;
if(char.IsWhiteSpace(userInput[startIndex]) || char.IsPunctuation(userInput[startIndex])){
startOfWord++;
if(startOfWord == 2){
startIndex++;
break;
}
while(char.IsWhiteSpace(userInput[startIndex]) || char.IsPunctuation(userInput[startIndex])) startIndex--;
}
}
}
if(alignedTokens[alignedTokenIndex].Item1 == "-"){
// increment startIndex until the previous word
startOfWord = 0;
while (startIndex >= 0)
{
startIndex--;
if(char.IsWhiteSpace(userInput[startIndex]) || char.IsPunctuation(userInput[startIndex])){
startOfWord++;
if(startOfWord == 2){
startIndex++;
break;
}
while(char.IsWhiteSpace(userInput[startIndex]) || char.IsPunctuation(userInput[startIndex])) startIndex--;
}
}
}
// Add a new highlighted area
highlightedAreas.Add(Tuple.Create(startIndex, endIndex));
}
}
else
{
while (userInputIndex < userInput.Length && !char.IsWhiteSpace(userInput[userInputIndex]) && !char.IsPunctuation(userInput[userInputIndex]))
{
userInputIndex++;
}
}
if(alignedTokens[alignedTokenIndex].Item2 == "-" || alignedTokens[alignedTokenIndex].Item1 == "-")
alignedTokenIndex++;
alignedTokenIndex++;
jumpedchars = 0;
}
StringBuilder highlightedUserInput = new StringBuilder();
int currentHighlightIndex = 0;
for (int i = 0; i < userInput.Length; i++)
{
if (currentHighlightIndex < highlightedAreas.Count && i == highlightedAreas[currentHighlightIndex].Item1)
{
// Add opening tag for highlighting
highlightedUserInput.Append("<mark>");
}
highlightedUserInput.Append(userInput[i]);
if (currentHighlightIndex < highlightedAreas.Count && i == highlightedAreas[currentHighlightIndex].Item2)
{
// Add closing tag for highlighting
highlightedUserInput.Append("</mark>");
currentHighlightIndex++;
}
}
string finalHighlightedUserInput = highlightedUserInput.ToString();
// Print the final highlighted userInput
Console.WriteLine(finalHighlightedUserInput);
return;
static (int[,], int[,]) NeedlemanWunschAlignment(List<string> seq1, List<string> seq2)
{
int seq1Len = seq1.Count;
int seq2Len = seq2.Count;
int[,] scoreMatrix = new int[seq1Len + 1, seq2Len + 1];
int[,] tracebackMatrix = new int[seq1Len + 1, seq2Len + 1];
int matchScore = 2;
int gapScore = -2;
for (int i = 1; i <= seq1Len; i++)
{
scoreMatrix[i, 0] = gapScore * i;
}
for (int j = 1; j <= seq2Len; j++)
{
scoreMatrix[0, j] = gapScore * j;
}
for (int i = 1; i <= seq1Len; i++)
{
for (int j = 1; j <= seq2Len; j++)
{
int mismatchScore = LevenshteinDistance(seq1[i - 1], seq2[j - 1]) * -1;
int scoreDiag = scoreMatrix[i - 1, j - 1] + (seq1[i - 1] == seq2[j - 1] ? matchScore : mismatchScore);
int scoreLeft = scoreMatrix[i - 1, j] + gapScore;
int scoreUp = scoreMatrix[i, j - 1] + gapScore;
int maxScore = Math.Max(scoreDiag, Math.Max(scoreLeft, scoreUp));
scoreMatrix[i, j] = maxScore;
if (maxScore == scoreDiag)
{
tracebackMatrix[i, j] = 1;
}
else if (maxScore == scoreLeft)
{
tracebackMatrix[i, j] = 2;
}
else
{
tracebackMatrix[i, j] = 3;
}
}
}
return (scoreMatrix, tracebackMatrix);
}
static List<Tuple<string, string>> Traceback(List<string> seq1, List<string> seq2, int[,] tracebackMatrix)
{
List<Tuple<string, string>> alignedTokens = new List<Tuple<string, string>>();
int i = seq1.Count;
int j = seq2.Count;
while (i > 0 || j > 0)
{
if (i > 0 && j > 0)
{
if (tracebackMatrix[i, j] == 1)
{
alignedTokens.Insert(0, Tuple.Create(seq1[i - 1], seq2[j - 1]));
i--;
j--;
}
else if (tracebackMatrix[i, j] == 2)
{
alignedTokens.Insert(0, Tuple.Create(seq1[i - 1], "-"));
i--;
}
else
{
alignedTokens.Insert(0, Tuple.Create("-", seq2[j - 1]));
j--;
}
}
else if (i > 0)
{
alignedTokens.Insert(0, Tuple.Create(seq1[i - 1], "-"));
i--;
}
else
{
alignedTokens.Insert(0, Tuple.Create("-", seq2[j - 1]));
j--;
}
}
return alignedTokens;
}
static List<string> TokenizeText(string text)
{
// Remove punctuation and convert to lowercase
string[] punctuation = new string[] { ".", ",", "!", "?", ";", ":" };
text = text.ToLower().Trim();
foreach(var p in punctuation) text = text.Replace(p, "");
// Split the text into tokens using space as a delimiter
var tokens = text.Split(' ').ToList();
tokens.RemoveAll(t => string.IsNullOrWhiteSpace(t));
return tokens;
}
static int LevenshteinDistance(string s1, string s2)
{
int len1 = s1.Length;
int len2 = s2.Length;
int[,] d = new int[len1 + 1, len2 + 1];
for (int i = 0; i <= len1; i++)
{
d[i, 0] = i;
}
for (int j = 0; j <= len2; j++)
{
d[0, j] = j;
}
for (int i = 1; i <= len1; i++)
{
for (int j = 1; j <= len2; j++)
{
int cost = s1[i - 1] == s2[j - 1] ? 0 : 1;
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}
return d[len1, len2];
}