From 6db3248e454763e1cd48cdecd763ddc7bf023146 Mon Sep 17 00:00:00 2001 From: Sean Duffy Date: Sun, 17 May 2015 21:28:36 +0100 Subject: [PATCH 1/5] Capture answer_id for answers --- askfm.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/askfm.py b/askfm.py index aca4491..a391843 100644 --- a/askfm.py +++ b/askfm.py @@ -137,7 +137,13 @@ def responseSorter(question): like_list = (None) if(like_url != None): like_list = getUsernames(like_url) + + #answer_id + match = re.match('question_box_(\d{12})', question.get('id')) + answer_id = match.group(1) + return_data = { + "answer_id":answer_id, "question_text":question_text, "asked_by_who":asked_by_who, "answer":answer, @@ -147,6 +153,7 @@ def responseSorter(question): "like_count":like_count, "like_list":like_list } + return return_data def getAnswers(username): From f6f587575171105fbe1b8b34af15d6b79a33554d Mon Sep 17 00:00:00 2001 From: Sean Duffy Date: Thu, 21 May 2015 22:33:16 +0100 Subject: [PATCH 2/5] Use text_content() instead of text --- askfm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/askfm.py b/askfm.py index a391843..91099b8 100644 --- a/askfm.py +++ b/askfm.py @@ -90,7 +90,9 @@ def responseSorter(question): for i in question_list: for j in i.getchildren(): if(j.tag == 'span'): - question_text = j.text + text = j.text_content() + if text is not None and text != '': + question_text = text.replace('\n', ' ').replace('\r', ' ') #asked_by_who asked_by = question.find('div/span/a') if(asked_by == None): From 9fb9866168890d305f7dbd0ba0e960d2c287876c Mon Sep 17 00:00:00 2001 From: Sean Duffy Date: Thu, 21 May 2015 22:59:00 +0100 Subject: [PATCH 3/5] Deal with missing answer id --- askfm.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/askfm.py b/askfm.py index 91099b8..1c1dfda 100644 --- a/askfm.py +++ b/askfm.py @@ -141,8 +141,10 @@ def responseSorter(question): like_list = getUsernames(like_url) #answer_id + answer_id = (None) match = re.match('question_box_(\d{12})', question.get('id')) - answer_id = match.group(1) + if match is not None: + answer_id = match.group(1) return_data = { "answer_id":answer_id, From 0a140be811385a635dc9aa971722fb1e34872561 Mon Sep 17 00:00:00 2001 From: Sean Duffy Date: Thu, 21 May 2015 23:01:46 +0100 Subject: [PATCH 4/5] Introduce optional delay --- askfm.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/askfm.py b/askfm.py index 1c1dfda..1046374 100644 --- a/askfm.py +++ b/askfm.py @@ -171,6 +171,9 @@ def getAnswers(username): next_page = -1 while(True): + if delay > 0: + sleep(delay) + token = getToken(tree) time = getTime(tree) if(next_page < 0): @@ -192,7 +195,7 @@ def getAnswers(username): next_page = int(re.search(r'\d+', re.search(r'val\(\d+\)', raw_post_result.text).group(0)).group(0)) return dict_holder -def getUser(username): +def getUser(username, delay=0): tree = getTree(username) if(isUserDeactivated(tree)): return None @@ -206,5 +209,5 @@ def getUser(username): user["user_answer_count"] = getAnswerCount(tree) user["user_like_count"] = getLikeCount(tree) user["user_gift_count"] = getGifts(tree) - user["answers"] = getAnswers(username) + user["answers"] = getAnswers(username, delay) return user From e5bf8a66a6781af7d7769cb77de5f4a90392fc23 Mon Sep 17 00:00:00 2001 From: Sean Duffy Date: Thu, 21 May 2015 23:04:45 +0100 Subject: [PATCH 5/5] Add missing argument to getAnswers --- askfm.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/askfm.py b/askfm.py index 1046374..ad10c73 100644 --- a/askfm.py +++ b/askfm.py @@ -160,7 +160,7 @@ def responseSorter(question): return return_data -def getAnswers(username): +def getAnswers(username, delay): dict_holder = [] tree = getTree(username) for i in tree.xpath("//div[@class='questionBox']"):