diff --git a/3b8e5045-42db-4315-a9da-20ba22f7f162.wav b/3b8e5045-42db-4315-a9da-20ba22f7f162.wav new file mode 100644 index 00000000..c3035571 Binary files /dev/null and b/3b8e5045-42db-4315-a9da-20ba22f7f162.wav differ diff --git a/ComSemApp/corpus/views.py b/ComSemApp/corpus/views.py index 289afd45..8d08ca4b 100644 --- a/ComSemApp/corpus/views.py +++ b/ComSemApp/corpus/views.py @@ -19,11 +19,11 @@ def corpus_search(request): tags = Tag.objects.all() template = loader.get_template('ComSemApp/corpus/corpus_search_new.html') - return HttpResponse(template.render({'tags': tags, 'offsetRange':[i for i in range(-8,8)]}, request)) + return HttpResponse(template.render({'tags': tags, 'offsetRange':[i for i in range(-8,8+1)]}, request)) @login_required def populate_word_tag(request): - val = request.POST.get('val', None) + val = request.POST.get('val', None).lstrip(' ').rstrip(' ') search_type = request.POST.get('type', None) output = request.POST.get('output', None) @@ -60,13 +60,17 @@ def search_results(request): sequential_search = request.POST.get('searchType') == '1' search_criteria = request.POST.get('searchCriteria', None) - if not search_criteria: + if not search_criteria or search_criteria == "": return HttpResponse('No search criteria provided', status=401) search_criteria = json.loads(search_criteria) - query = build_query(len(search_criteria) - 1, search_criteria, sequential_search) - print("Query:", query) + for item in search_criteria: + if item['type'] == 'word' and " " in item['val'].rstrip().lstrip(): + return HttpResponse('Invalid input: one word only per entry'); + + query = build_query(search_criteria, sequential_search) + with connection.cursor() as cursor: expression_ids = [] cursor.execute(query) @@ -76,20 +80,6 @@ def search_results(request): # grab the information we want about the expressions expressions = Expression.objects.filter(id__in=expression_ids) - # for each expression, retag in order to show where the matching word / tag is. - # TODO - # for expression in expressions: - # tokens = nltk.word_tokenize(expression.expression) - # tagged = nltk.pos_tag(tokens) - # print (tagged) - # for criterion in search_criteria: - # print (criterion) - # if criterion['type'] == 'tag': - # tag = criterion['val'] - # for word in tagged: - # if word[1] == tag: - # print ("match") - context = { 'expressions': expressions, 'sequential_search': sequential_search, @@ -98,47 +88,49 @@ def search_results(request): template = loader.get_template('ComSemApp/corpus/search_results.html') return HttpResponse(template.render(context, request)) - -# work backwards through the search criteria - we make n - 1 joins (where n = number of search criteria) with n tables that -# select expression ID and position (if sequential search). -def build_query(i, search_criteria, sequential_search): - current_criteria = search_criteria[i] - criteria_type = current_criteria['type'] - val = current_criteria['val'] - id_list = current_criteria['id_list'] - - # if val isnt valid, id_list isn't a list of int ... - - if i < 0: +# This query builder makes the following assumptions about the search criteria: +# there is one word, either a tag or a second word, and there may be an offset. +def build_query(search_criteria, sequential_search): + words = [] + tags = [] + offset = 0 + for item in search_criteria: + if item['type'] == 'word': + words.append(item) + elif item['type'] == 'tag': + tags.append(item) + elif item['type'] == 'offset' and sequential_search == True: + offset = item['val'] + + if len(words) == 0: return "" - else: - if(criteria_type == "offset"): - print ("to do") - - select_position = ", SW.Position" if sequential_search else "" - from_words = ", ComSemApp_word as W " if criteria_type == "tag" else "" - - query = "SELECT SW.expression_id" + select_position + " FROM ComSemApp_sequentialwords AS SW" + from_words - if i > 0: - query += ", (" + build_query(i - 1, search_criteria, sequential_search) + ") as Derived" + str(i) - - query += " WHERE " - - if criteria_type == "tag": - query += " SW.word_id = W.id AND W.tag_id in (" + ','.join([str(id) for id in id_list]) + ") " + query = "SELECT SW.expression_id" + if sequential_search: + query += ", SW.position" + query += " FROM ComSemApp_sequentialwords as SW" + + if len(words) > 1 or len(tags) > 0: + query += ", (SELECT SW2.expression_id" + if sequential_search: + query += ", SW2.position" + query += " from ComSemApp_sequentialwords as SW2" + if len(tags) > 0: + query += ", ComSemApp_word as W where W.tag_id in (" + ','.join([str(id) for id in tags[0]['id_list']]) + query += ") and SW2.word_id = W.id" else: - query += " SW.word_id in (" + ','.join([str(id) for id in id_list]) + ") " - - if i > 0: - if sequential_search: - next_position = 1 + query += " where SW2.word_id in (" + ','.join([str(id) for id in words[1]['id_list']]) + query += ")" + query += ") as derived2" + query += " where SW.word_id in (" + ','.join([str(id) for id in words[0]['id_list']]) + query += ")" - # if the next search criteria is an offset, we'll use it here then skip it in the next call. - if search_criteria[i-1]['type'] == 'offset': - next_position += search_criteria[i-1]['val'] + if len(words) > 1 or len(tags) > 0: + query += " and SW.expression_id = derived2.expression_id" - query += "AND SW.position = (Derived" + str(i) + ".position + " + str(next_position) + ") " + if offset > 0: + query += " and derived2.position <= (SW.position + " + str(offset) + ") and SW.position < derived2.position" + elif offset < 0: + query += " and SW.position <= (derived2.position + " + str(abs(offset)) + ") and derived2.position < SW.position" - query += "AND SW.expression_id = Derived" + str(i) + ".expression_id " - return query + return query diff --git a/ComSemApp/static/ComSemApp/images/google_logo.png b/ComSemApp/static/ComSemApp/images/google_logo.png new file mode 100644 index 00000000..8a4c8a88 Binary files /dev/null and b/ComSemApp/static/ComSemApp/images/google_logo.png differ diff --git a/ComSemApp/student/views.py b/ComSemApp/student/views.py index 58085058..d79d7a5a 100644 --- a/ComSemApp/student/views.py +++ b/ComSemApp/student/views.py @@ -17,6 +17,16 @@ from ComSemApp.models import * from ComSemApp.libs.mixins import RoleViewMixin, CourseViewMixin, WorksheetViewMixin, SubmissionViewMixin +# Imports the Google Cloud client library +from google.cloud import speech +from google.cloud.speech import enums +from google.cloud.speech import types + +# googleClient = speech.SpeechClient() + +def TranscribeAudio(request): + + return class StudentViewMixin(RoleViewMixin): @@ -193,6 +203,7 @@ def get_context_data(self, **kwargs): context = super(AttemptCreateView, self).get_context_data(**kwargs) # TODO - expression mixin rather than grabbing expression twice ? expression_id = self.kwargs.get('expression_id') + expression = get_object_or_404(Expression, id=expression_id, worksheet=self.worksheet) context['expression'] = expression return context diff --git a/ComSemApp/templates/ComSemApp/corpus/corpus_search_new.html b/ComSemApp/templates/ComSemApp/corpus/corpus_search_new.html index 7c6a27e2..30bcd673 100644 --- a/ComSemApp/templates/ComSemApp/corpus/corpus_search_new.html +++ b/ComSemApp/templates/ComSemApp/corpus/corpus_search_new.html @@ -59,40 +59,50 @@

-
+

Selector

-
-
+
-

Please enter a word.

+
-
-
+
+

+ -


+

- - {% for i in offsetRange %} {% if i == 0 %} - + {% else %} {% endif %} {% endfor %} +

+

Offset determines how many spaces away from each other two words + can be in a sentence. For example, if you want to see "the" followed + by "king," you would set an offset of 1 (or -1 if "king" is entered for + the first word).

+

The offset indicates "up to and including," so for an offset value of 4, + two words may be shown together between 1 and 4 spaces away. Negative values + mean the second word will come first in the sentences.

@@ -116,7 +126,8 @@

// Adds each checked word / POS tag pair to the search criteria function collectData(posTableWord) { - var type = $('#search_type').val() + + var type = "word"; var id_list = []; var val = posTableWord; @@ -129,6 +140,24 @@

appendCriterion(type, val, id_list); } + // Adds each selected POS tag with no accompanying word to the + // search criteria. + function collectTags() { + var type = "tag"; + var id_list = []; + var val = "Tag"; + + $("input[name^='tag_selector_checked']").each(function() { + if ($(this).is(':checked')) { + id_list.push($(this).val()); + } + }); + + if (id_list.length > 0) { + appendCriterion(type, val, id_list); + } + } + /* Add search data to the search criteria, and add it to a hidden element holding the search data that is a hold-over from Zeke Hunter-Green's version @@ -166,7 +195,7 @@

*/ function updateTextInput(val) { document.getElementById('textInput').value=val; // Maybe change to "none" if val is 0? - } + }; /* This function takes a word entry field and the field to outout Ajax results, @@ -178,9 +207,15 @@

var val = $(wordEntryField).val(); var type = 'word'; if (val == "") { // user enters nothing, clear field and return - $(resultField).empty(); - $(resultField).html("

Please enter a word.

"); - return; + if (resultField == "#DynamicField2") { + val = "ALL"; + type = 'tag'; + } + else { + $(resultField).empty(); + $(resultField).html("

Please enter a word.

"); + return; + } } $(resultField).empty(); // clear the result field, prep for new results @@ -194,8 +229,9 @@

}, success: function(data){ - $(resultField).html(data); + $('#' + val.toLowerCase() + '_checkAll').prop( "checked", true ); + selectAll(val.toLowerCase()); }, fail: function(xhr, textStatus, errorThrown) { alert("Search Failed"); @@ -211,6 +247,10 @@

*/ $(document).ready(function() { + // By default, a table with all possible tags will be shown when the + // page is loaded. + $(window).on("load", getWordPosTags("#word2", "#DynamicField2")); + // user enters word in left box $("#word1").on("keydown paste", function() { if (progressTimeout) { @@ -227,13 +267,28 @@

progressTimeout = setTimeout(getWordPosTags, 500, "#word2", "#DynamicField2"); }); - // user hits the "search" button, display corpus search results + // User hits the "Clear" button, word entries are set to empty and the + // offset is set to "none." + $('#clearForm').click(function() { + $("#word2").val(""); + $("#word1").val(""); + getWordPosTags("#word1", "#DynamicField1"); + getWordPosTags("#word2", "#DynamicField2"); + $('#newOffset').val("none"); + }); + + // user hits the "search" button, display corpus search results. + // For compatibility, the criteria must be of the form "word1 offset + // word2" $("#SubmitSearchForm").submit(function(e){ e.preventDefault(); - // Working on adapting this for the result table + searchCriteria = []; + $(this).prop("disabled",true); + + if ($("#word1").val() !== "") { collectData($("#word1").val()); } else { @@ -241,15 +296,26 @@

return; } + var sequential_search; + + if ($('#newOffset').val() !== "none") { + appendCriterion('offset', parseInt($('#newOffset').val()), []); + sequential_search = '1'; + } else { + sequential_search = '0'; + } + if ($("#word2").val() !== "") { collectData($("#word2").val()); + } else { + collectTags(); } var data = { 'searchCriteria': $('#searchCriteria').val(), - 'searchType': $('[name=searchType]:checked').val(), + 'searchType': sequential_search, } - $('#searchResults').show().html("
Loading...
").load("{% url 'corpus:search_results' %}", data ); + $('#searchResults').show().html("
Loading...
").load("{% url 'corpus:search_results' %}", data); $(this).prop("disabled",false); }); }); diff --git a/ComSemApp/templates/ComSemApp/corpus/tag_table.html b/ComSemApp/templates/ComSemApp/corpus/tag_table.html index 0a610528..0d91a8dc 100644 --- a/ComSemApp/templates/ComSemApp/corpus/tag_table.html +++ b/ComSemApp/templates/ComSemApp/corpus/tag_table.html @@ -5,8 +5,8 @@
- - +
Tag Name @@ -19,7 +19,7 @@
- +
diff --git a/ComSemApp/templates/ComSemApp/corpus/word_table.html b/ComSemApp/templates/ComSemApp/corpus/word_table.html index 80c75bfa..92f02811 100644 --- a/ComSemApp/templates/ComSemApp/corpus/word_table.html +++ b/ComSemApp/templates/ComSemApp/corpus/word_table.html @@ -6,7 +6,7 @@
- +
diff --git a/ComSemApp/templates/ComSemApp/student/attempt_form.html b/ComSemApp/templates/ComSemApp/student/attempt_form.html index a4134b19..62772803 100644 --- a/ComSemApp/templates/ComSemApp/student/attempt_form.html +++ b/ComSemApp/templates/ComSemApp/student/attempt_form.html @@ -1,8 +1,8 @@ {% load static %} - - + + @@ -95,26 +95,33 @@ diff --git a/ComSemApp/templates/ComSemApp/student/create_submission.html b/ComSemApp/templates/ComSemApp/student/create_submission.html index 2c9412ac..88d30541 100644 --- a/ComSemApp/templates/ComSemApp/student/create_submission.html +++ b/ComSemApp/templates/ComSemApp/student/create_submission.html @@ -79,7 +79,7 @@

$('#expressionTableCol').addClass('shortened'); $('#expressionEditorCol').addClass('lengthened'); $(this).slideDown(); - }) + }); } diff --git a/ComSemApp/templates/ComSemApp/student/submissions_table.html b/ComSemApp/templates/ComSemApp/student/submissions_table.html index 85bf8a05..1ddb3691 100644 --- a/ComSemApp/templates/ComSemApp/student/submissions_table.html +++ b/ComSemApp/templates/ComSemApp/student/submissions_table.html @@ -3,7 +3,7 @@ {% if submissions %} - +
diff --git a/GoogleSpeech_Start.sh b/GoogleSpeech_Start.sh new file mode 100644 index 00000000..fd6bc353 --- /dev/null +++ b/GoogleSpeech_Start.sh @@ -0,0 +1 @@ +export GOOGLE_APPLICATION_CREDENTIALS="CommunicationSeminar-7d71fc1430af.json" diff --git a/INSTRUCTIONS.md b/INSTRUCTIONS.md new file mode 100644 index 00000000..1101fe37 --- /dev/null +++ b/INSTRUCTIONS.md @@ -0,0 +1,5 @@ +### Google Cloud Speech to Text + +Follow the installation directions at https://cloud.google.com/speech-to-text/docs/quickstart-client-libraries#client-libraries-install-python. +In short, you will have to install the Google Cloud SDK, add it to the path, use Pip to install the client libraries, and get a file with +an authentication key to send requests to Google Cloud for transcriptions. diff --git a/audio.raw b/audio.raw new file mode 100644 index 00000000..5ebf79d3 Binary files /dev/null and b/audio.raw differ diff --git a/googletest.m4a b/googletest.m4a new file mode 100644 index 00000000..ce967778 Binary files /dev/null and b/googletest.m4a differ diff --git a/googletest.py b/googletest.py new file mode 100644 index 00000000..e3526346 --- /dev/null +++ b/googletest.py @@ -0,0 +1,33 @@ +import io +import os + +# Imports the Google Cloud client library +from google.cloud import speech +from google.cloud.speech import enums +from google.cloud.speech import types + +# Instantiates a client +client = speech.SpeechClient() + +# The name of the audio file to transcribe +file_name = "efs/reformulations/41c87965-0174-4231-8253-db494f9a8a61.ogg" + +# Loads the audio into memory +with io.open(file_name, 'rb') as audio_file: + content = audio_file.read() + +audio = types.RecognitionAudio(content=content) + +# for i in range(8000, 48001, 2000): +for i in [8000, 12000, 16000, 24000, 48000]: + print(i) + config = types.RecognitionConfig( + encoding=enums.RecognitionConfig.AudioEncoding.OGG_OPUS, + sample_rate_hertz=i, + language_code='en-US') + + # Detects speech in the audio file + response = client.recognize(config, audio) + print("Recognizing") + for result in response.results: + print('Transcript: {}'.format(result.alternatives[0].transcript)) diff --git a/googletest2.m4a b/googletest2.m4a new file mode 100644 index 00000000..1cee3d95 Binary files /dev/null and b/googletest2.m4a differ diff --git a/googletest2.py b/googletest2.py new file mode 100644 index 00000000..52ac2627 --- /dev/null +++ b/googletest2.py @@ -0,0 +1,21 @@ +import base64 +import googleapiclient.discovery + +with open("tori_voiceover.mp3", 'rb') as speech: + # Base64 encode the binary audio file for inclusion in the JSON + # request. + speech_content = base64.b64encode(speech.read()) + +# Construct the request +service = googleapiclient.discovery.build('speech', 'v1') +service_request = service.speech().recognize( + body={ + "config": { + "encoding": "LINEAR16", # raw 16-bit signed LE samples + "sampleRateHertz": 16000, # 16 khz + "languageCode": "en-US", # a BCP-47 language tag + }, + "audio": { + "content": speech_content + } + }) diff --git a/googletest3.m4a b/googletest3.m4a new file mode 100644 index 00000000..8e1e8617 Binary files /dev/null and b/googletest3.m4a differ diff --git a/requirements.txt b/requirements.txt index 0fd17c35..690c620b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,3 +6,7 @@ mysqlclient==1.3.12 nltk==3.2.5 pytz==2018.5 six==1.11.0 +google-api-core==1.7.0 +google-auth==1.6.3 +google-cloud-speech==0.36.3 +googleapis-common-protos==1.5.8 diff --git a/static/ComSemApp/js/ComSemRecording-opus.js b/static/ComSemApp/js/ComSemRecording-opus.js index af262565..f7f89aa8 100644 --- a/static/ComSemApp/js/ComSemRecording-opus.js +++ b/static/ComSemApp/js/ComSemRecording-opus.js @@ -1,4 +1,4 @@ -// include in pages that need to record / save / access audio files +// include in pages that need to record / save / access audio files! var audioReformulationBlob; @@ -57,7 +57,9 @@ function initializeRecorder(){ screenLogger('Audio stream is ready.'); }); + // Apparently not being used recorder.addEventListener( "dataAvailable", function(e){ + var dataBlob = new Blob( [e.detail], { type: 'audio/ogg' } ); audioReformulationBlob = dataBlob; // save the current blob var fileName = new Date().toISOString() + ".opus"; @@ -78,7 +80,7 @@ function initializeRecorder(){ li.appendChild(audio); // recordingslist.appendChild(li); - + // changed in order to allow only one recording at a time: $('#recordingslist').html(audio); $('#deleteRecordingButton').attr('disabled', false); diff --git a/tori_voiceover.mp3 b/tori_voiceover.mp3 new file mode 100644 index 00000000..7a9f8280 Binary files /dev/null and b/tori_voiceover.mp3 differ