Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
fa02983
Select all for POS is default and data structure clears between searches
tmgreenside Dec 9, 2018
8dd0630
All POS tags are presented when entry field 2 is blank
tmgreenside Jan 16, 2019
0387e4d
Working on adding tag for search criteria
tmgreenside Jan 16, 2019
93674e3
Fixed an issue with collecting data with tags for searches
tmgreenside Jan 18, 2019
cbc3fb2
Tag is accepted as second search input
tmgreenside Jan 22, 2019
f9e3c2a
Successfully builds query with offset, but offset still not working
tmgreenside Jan 23, 2019
e9a0bf6
Offset kind of working
tmgreenside Jan 23, 2019
ed624ba
Offset seems to work
tmgreenside Jan 24, 2019
09931b2
Saving progress before trying another offset technique
tmgreenside Jan 30, 2019
019d5ec
Corpus offset should be good now.
tmgreenside Feb 1, 2019
4a15b25
Saving progress, going to rewrite query builder
tmgreenside Feb 7, 2019
88bd4d8
Experiment with query builder works.
tmgreenside Feb 7, 2019
f911010
Working on corpus highlighting
tmgreenside Feb 8, 2019
6c7f6dd
Saving attempt at highlighting, subbing highlight with italics
tmgreenside Feb 10, 2019
48b3788
Corpus ready for submission, minus highlighting
tmgreenside Feb 10, 2019
1103d48
Fixed a logic error
tmgreenside Feb 13, 2019
1c455dc
Changes for Corpus Dr Hunter requested
tmgreenside Feb 19, 2019
2a93bf4
Fixed Corpus bug
tmgreenside Mar 4, 2019
b09ed0c
Added Google button for speech to text
tmgreenside Mar 6, 2019
52d2810
Working on grabbing audio recording front-end
tmgreenside Mar 6, 2019
a88e2de
Added brief instructions for setting up speech to text, looking at di…
tmgreenside Mar 7, 2019
98cd55b
Saving progress on adding audio input to worksheet
tmgreenside Mar 17, 2019
f84ac64
New plan for speech to text: uploading existing recording as approved…
tmgreenside Mar 21, 2019
93f341a
Working on sending file to Google Cloud for transcription
tmgreenside Mar 25, 2019
fde6c44
Working on sending file to Google Cloud for transcription
tmgreenside Mar 25, 2019
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added 3b8e5045-42db-4315-a9da-20ba22f7f162.wav
Binary file not shown.
106 changes: 49 additions & 57 deletions ComSemApp/corpus/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
def corpus_search(request):
tags = Tag.objects.all()
template = loader.get_template('ComSemApp/corpus/corpus_search_new.html')
return HttpResponse(template.render({'tags': tags, 'offsetRange':[i for i in range(-8,8)]}, request))
return HttpResponse(template.render({'tags': tags, 'offsetRange':[i for i in range(-8,8+1)]}, request))

@login_required
def populate_word_tag(request):
val = request.POST.get('val', None)
val = request.POST.get('val', None).lstrip(' ').rstrip(' ')
search_type = request.POST.get('type', None)
output = request.POST.get('output', None)

Expand Down Expand Up @@ -60,13 +60,17 @@ def search_results(request):
sequential_search = request.POST.get('searchType') == '1'
search_criteria = request.POST.get('searchCriteria', None)

if not search_criteria:
if not search_criteria or search_criteria == "":
return HttpResponse('No search criteria provided', status=401)

search_criteria = json.loads(search_criteria)

query = build_query(len(search_criteria) - 1, search_criteria, sequential_search)
print("Query:", query)
for item in search_criteria:
if item['type'] == 'word' and " " in item['val'].rstrip().lstrip():
return HttpResponse('Invalid input: one word only per entry');

query = build_query(search_criteria, sequential_search)

with connection.cursor() as cursor:
expression_ids = []
cursor.execute(query)
Expand All @@ -76,20 +80,6 @@ def search_results(request):
# grab the information we want about the expressions
expressions = Expression.objects.filter(id__in=expression_ids)

# for each expression, retag in order to show where the matching word / tag is.
# TODO
# for expression in expressions:
# tokens = nltk.word_tokenize(expression.expression)
# tagged = nltk.pos_tag(tokens)
# print (tagged)
# for criterion in search_criteria:
# print (criterion)
# if criterion['type'] == 'tag':
# tag = criterion['val']
# for word in tagged:
# if word[1] == tag:
# print ("match")

context = {
'expressions': expressions,
'sequential_search': sequential_search,
Expand All @@ -98,47 +88,49 @@ def search_results(request):
template = loader.get_template('ComSemApp/corpus/search_results.html')
return HttpResponse(template.render(context, request))


# work backwards through the search criteria - we make n - 1 joins (where n = number of search criteria) with n tables that
# select expression ID and position (if sequential search).
def build_query(i, search_criteria, sequential_search):
current_criteria = search_criteria[i]
criteria_type = current_criteria['type']
val = current_criteria['val']
id_list = current_criteria['id_list']

# if val isnt valid, id_list isn't a list of int ...

if i < 0:
# This query builder makes the following assumptions about the search criteria:
# there is one word, either a tag or a second word, and there may be an offset.
def build_query(search_criteria, sequential_search):
words = []
tags = []
offset = 0
for item in search_criteria:
if item['type'] == 'word':
words.append(item)
elif item['type'] == 'tag':
tags.append(item)
elif item['type'] == 'offset' and sequential_search == True:
offset = item['val']

if len(words) == 0:
return ""
else:
if(criteria_type == "offset"):
print ("to do")

select_position = ", SW.Position" if sequential_search else ""
from_words = ", ComSemApp_word as W " if criteria_type == "tag" else ""

query = "SELECT SW.expression_id" + select_position + " FROM ComSemApp_sequentialwords AS SW" + from_words

if i > 0:
query += ", (" + build_query(i - 1, search_criteria, sequential_search) + ") as Derived" + str(i)

query += " WHERE "

if criteria_type == "tag":
query += " SW.word_id = W.id AND W.tag_id in (" + ','.join([str(id) for id in id_list]) + ") "
query = "SELECT SW.expression_id"
if sequential_search:
query += ", SW.position"
query += " FROM ComSemApp_sequentialwords as SW"

if len(words) > 1 or len(tags) > 0:
query += ", (SELECT SW2.expression_id"
if sequential_search:
query += ", SW2.position"
query += " from ComSemApp_sequentialwords as SW2"
if len(tags) > 0:
query += ", ComSemApp_word as W where W.tag_id in (" + ','.join([str(id) for id in tags[0]['id_list']])
query += ") and SW2.word_id = W.id"
else:
query += " SW.word_id in (" + ','.join([str(id) for id in id_list]) + ") "

if i > 0:
if sequential_search:
next_position = 1
query += " where SW2.word_id in (" + ','.join([str(id) for id in words[1]['id_list']])
query += ")"
query += ") as derived2"
query += " where SW.word_id in (" + ','.join([str(id) for id in words[0]['id_list']])
query += ")"

# if the next search criteria is an offset, we'll use it here then skip it in the next call.
if search_criteria[i-1]['type'] == 'offset':
next_position += search_criteria[i-1]['val']
if len(words) > 1 or len(tags) > 0:
query += " and SW.expression_id = derived2.expression_id"

query += "AND SW.position = (Derived" + str(i) + ".position + " + str(next_position) + ") "
if offset > 0:
query += " and derived2.position <= (SW.position + " + str(offset) + ") and SW.position < derived2.position"
elif offset < 0:
query += " and SW.position <= (derived2.position + " + str(abs(offset)) + ") and derived2.position < SW.position"

query += "AND SW.expression_id = Derived" + str(i) + ".expression_id "
return query
return query
Binary file added ComSemApp/static/ComSemApp/images/google_logo.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
11 changes: 11 additions & 0 deletions ComSemApp/student/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,16 @@
from ComSemApp.models import *
from ComSemApp.libs.mixins import RoleViewMixin, CourseViewMixin, WorksheetViewMixin, SubmissionViewMixin

# Imports the Google Cloud client library
from google.cloud import speech
from google.cloud.speech import enums
from google.cloud.speech import types

# googleClient = speech.SpeechClient()

def TranscribeAudio(request):

return

class StudentViewMixin(RoleViewMixin):

Expand Down Expand Up @@ -193,6 +203,7 @@ def get_context_data(self, **kwargs):
context = super(AttemptCreateView, self).get_context_data(**kwargs)
# TODO - expression mixin rather than grabbing expression twice ?
expression_id = self.kwargs.get('expression_id')

expression = get_object_or_404(Expression, id=expression_id, worksheet=self.worksheet)
context['expression'] = expression
return context
Expand Down
106 changes: 86 additions & 20 deletions ComSemApp/templates/ComSemApp/corpus/corpus_search_new.html
Original file line number Diff line number Diff line change
Expand Up @@ -59,40 +59,50 @@ <h4 class="m-b-10 header-title">
<input type="text" class="form-control input-sm" id="word2" name="word2" placeholder="Type another word (optional)">
</div>
<div class="row">
<div class="col-sm-12 col-md-6">
<div class="col-sm-12 col-md-6" >
<h4 class="m-b-10 header-title">
Selector
</h4>
</div>
</div>
<br>
<div class="row">
<div class="row" style="max-height: 400px; overflow-y: scroll;">
<div class="col-12 nicescroll" id="DynamicField2" name="DynamicField2">
<p>Please enter a word.</p>

</div>
</div>
</div>

<div class="col-lg-2">
<br>
<div class="col-lg-3">

<!-- submit button -->
<button class="btn btn-sm btn-outline-success pull-right" type="submit">
<span>Search <i class="ti-angle-right"></i></span>
</button>
<br><br>
<button class="btn btn-sm btn-outline-success pull-right" name="clearForm" id="clearForm">
<span>Clear <i class="ti-angle-left"></i></span>
</button>

<br><br><br>
<br><br>
<!-- user specifies offset for the corpus search -->
<label for="newOffset">Select Offset</label>
<select id="newOffset" name="addOffset">
<label for="newOffset">Select Offset</label><br>
<select id="newOffset" name="newOffset">
{% for i in offsetRange %}
{% if i == 0 %}
<option value="{{ i }}" selected>{{ i }}</option>
<option value="none" selected>none</option>
{% else %}
<option value="{{ i }}" >{{ i }}</option>
{% endif %}
{% endfor %}
</select>
<br><br>
<p><b>Offset</b> determines how many spaces away from each other two words
can be in a sentence. For example, if you want to see "the" followed
by "king," you would set an offset of 1 (or -1 if "king" is entered for
the first word).</p>
<p>The offset indicates "up to and including," so for an offset value of 4,
two words may be shown together between 1 and 4 spaces away. Negative values
mean the second word will come first in the sentences.</p>
</div>


Expand All @@ -116,7 +126,8 @@ <h4 class="m-b-10 header-title">

// Adds each checked word / POS tag pair to the search criteria
function collectData(posTableWord) {
var type = $('#search_type').val()

var type = "word";
var id_list = [];
var val = posTableWord;

Expand All @@ -129,6 +140,24 @@ <h4 class="m-b-10 header-title">
appendCriterion(type, val, id_list);
}

// Adds each selected POS tag with no accompanying word to the
// search criteria.
function collectTags() {
var type = "tag";
var id_list = [];
var val = "Tag";

$("input[name^='tag_selector_checked']").each(function() {
if ($(this).is(':checked')) {
id_list.push($(this).val());
}
});

if (id_list.length > 0) {
appendCriterion(type, val, id_list);
}
}

/*
Add search data to the search criteria, and add it to a hidden element
holding the search data that is a hold-over from Zeke Hunter-Green's version
Expand Down Expand Up @@ -166,7 +195,7 @@ <h4 class="m-b-10 header-title">
*/
function updateTextInput(val) {
document.getElementById('textInput').value=val; // Maybe change to "none" if val is 0?
}
};

/*
This function takes a word entry field and the field to outout Ajax results,
Expand All @@ -178,9 +207,15 @@ <h4 class="m-b-10 header-title">
var val = $(wordEntryField).val();
var type = 'word';
if (val == "") { // user enters nothing, clear field and return
$(resultField).empty();
$(resultField).html("<p>Please enter a word.</p>");
return;
if (resultField == "#DynamicField2") {
val = "ALL";
type = 'tag';
}
else {
$(resultField).empty();
$(resultField).html("<p>Please enter a word.</p>");
return;
}
}

$(resultField).empty(); // clear the result field, prep for new results
Expand All @@ -194,8 +229,9 @@ <h4 class="m-b-10 header-title">
},

success: function(data){

$(resultField).html(data);
$('#' + val.toLowerCase() + '_checkAll').prop( "checked", true );
selectAll(val.toLowerCase());
},
fail: function(xhr, textStatus, errorThrown) {
alert("Search Failed");
Expand All @@ -211,6 +247,10 @@ <h4 class="m-b-10 header-title">
*/
$(document).ready(function() {

// By default, a table with all possible tags will be shown when the
// page is loaded.
$(window).on("load", getWordPosTags("#word2", "#DynamicField2"));

// user enters word in left box
$("#word1").on("keydown paste", function() {
if (progressTimeout) {
Expand All @@ -227,29 +267,55 @@ <h4 class="m-b-10 header-title">
progressTimeout = setTimeout(getWordPosTags, 500, "#word2", "#DynamicField2");
});

// user hits the "search" button, display corpus search results
// User hits the "Clear" button, word entries are set to empty and the
// offset is set to "none."
$('#clearForm').click(function() {
$("#word2").val("");
$("#word1").val("");
getWordPosTags("#word1", "#DynamicField1");
getWordPosTags("#word2", "#DynamicField2");
$('#newOffset').val("none");
});

// user hits the "search" button, display corpus search results.
// For compatibility, the criteria must be of the form "word1 offset
// word2"
$("#SubmitSearchForm").submit(function(e){
e.preventDefault();

// Working on adapting this for the result table
searchCriteria = [];

$(this).prop("disabled",true);



if ($("#word1").val() !== "") {
collectData($("#word1").val());
} else {
$(this).prop("disabled",false);
return;
}

var sequential_search;

if ($('#newOffset').val() !== "none") {
appendCriterion('offset', parseInt($('#newOffset').val()), []);
sequential_search = '1';
} else {
sequential_search = '0';
}

if ($("#word2").val() !== "") {
collectData($("#word2").val());
} else {
collectTags();
}

var data = {
'searchCriteria': $('#searchCriteria').val(),
'searchType': $('[name=searchType]:checked').val(),
'searchType': sequential_search,
}
$('#searchResults').show().html("<div class='row float-middle'>Loading...</div>").load("{% url 'corpus:search_results' %}", data );
$('#searchResults').show().html("<div class='row float-middle'>Loading...</div>").load("{% url 'corpus:search_results' %}", data);
$(this).prop("disabled",false);
});
});
Expand Down
6 changes: 3 additions & 3 deletions ComSemApp/templates/ComSemApp/corpus/tag_table.html
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
<tr>
<th class="checkbox-cell">
<div class="checkbox checkbox-success">
<input id="checkAll" type="checkbox">
<label for="checkAll"></label>
<!-- <input id="tag_checkAll" name="tag_checkAll" type="checkbox" onclick='selectAll("tag")'>
<label for="checkAll"></label> -->
</div>
</th>
<th>Tag Name</th>
Expand All @@ -19,7 +19,7 @@
<tr>
<td class="checkbox-cell">
<div class="checkbox checkbox-primary">
<input type="checkbox" name="selector_checked[]" value="{{ tag.id }}" id="result{{ forloop.counter }}">
<input type="checkbox" name="tag_selector_checked[]" value="{{ tag.id }}" id="result{{ forloop.counter }}">
<label for="result{{ forloop.counter }}"></label>
</div>
</td>
Expand Down
Loading