From 9c25cf586cfc55e8c7708531fc6a0387c60427b1 Mon Sep 17 00:00:00 2001
From: yzhou <yzhou0728@gmail.com>
Date: Tue, 24 Jan 2012 01:37:31 -0500
Subject: [PATCH 1/2] Search working almost perfectly with postgres/sqlite.
 Missing pagination and the search page is not very pretty at all. Speed is
 also something to be desired

---
 search.py                              | 370 +++++++++++++++++++++++++
 templates/search/results.html          |  29 +-
 views/main.py                          |  51 +++-
 views/pages.py                         |   6 +-
 wiki/management/commands/buildindex.py |  23 ++
 wiki/models/searchindex.py             |  39 +++
 6 files changed, 512 insertions(+), 6 deletions(-)
 create mode 100755 search.py
 create mode 100755 wiki/management/commands/buildindex.py
 create mode 100755 wiki/models/searchindex.py

diff --git a/search.py b/search.py
new file mode 100755
index 0000000..67273c5
--- /dev/null
+++ b/search.py
@@ -0,0 +1,370 @@
+import sys
+import subprocess
+import re
+import traceback
+import os
+import hashlib
+from django.db import connection, transaction
+import itertools
+import time
+
+def index_entire_history(file_path, page_id, *args, **kwargs):
+	print "indexing %s - %s" % (file_path.split('/')[2] , file_path.split("/")[5])
+	try:
+		file_path = file_path.strip('/')
+
+		# not a git repo
+		if not os.path.exists("%s/.git" % file_path):
+			return
+
+
+		command = "cd \"%s\" && git log --reverse --format=format:\"%%H\"" % (file_path)
+		lines = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate()[0].splitlines()
+	except:
+		return (0, 1)
+	commit_count = 0
+	fail_count = 0
+	#index each commit starting from the earliest
+	for (i, line) in enumerate(lines):
+		try:
+			index_commit(file_path, page_id, commit=line)
+
+			#progress bar lol
+			total_width = 60
+			current_width = (i + 1) * total_width / len(lines)
+			sys.stdout.write("\r|" + ('-' * (current_width)).ljust(total_width) + "|" + "%d%%" % ((i + 1) * 100 / len(lines)))
+
+			commit_count += 1
+		except:
+			traceback.print_exc(file=sys.stdout)
+			fail_count = 1
+	print "\nDone!"
+	return (commit_count, fail_count)
+
+#TODO: return total num of results, better ordering (tf-idf?), and COMMENT
+def search(search_query, start):
+	results = []
+	subqueries = get_valid_keywords(search_query)
+	con = connection.cursor()
+	subresults = []
+	pages_hit = {}
+	for q in subqueries:
+		query = "\
+				SELECT page,max(frequency * exp(-3*head)) as score FROM wiki_keyword where keyword = %s\
+				GROUP BY page ORDER BY score DESC LIMIT 20 OFFSET %s\
+				"
+		con.execute(query , (q, start))
+		for (i, row) in enumerate(con.fetchall()):
+			page = row[0]
+			if page not in pages_hit:
+				pages_hit[page] = {"priority":1.0 + (10.0 - i) / 10, "words":[q], "id":page}
+			else:
+				pages_hit[page]["priority"] += 1.0 + (10.0 - i) / 10
+				pages_hit[page]["words"].append(q)
+	pages = sorted(pages_hit.values(), key=lambda k: k["priority"], reverse=True)[:10]
+	commit_queries = []
+	for page in pages:
+		commits_hit = {}
+		lines_hit = {}
+		for word in page["words"]:
+			con.execute("SELECT \"commit\" from wiki_keyword where page = %s and keyword = %s", (page["id"], word))
+			for row in con.fetchall():
+				if row[0] in commits_hit:
+					commits_hit[row[0]]["count"] += 1
+				else:
+					commits_hit[row[0]] = {"commit": row[0], "count": 1}
+			con.execute("SELECT head_line_num from wiki_keywordlocation where head = 0 and page = %s and word = %s", (page["id"], word))
+			for row in con.fetchall():
+				if row[0] in lines_hit:
+					lines_hit[row[0]]["count"] += 1
+				else:
+					lines_hit[row[0]] = {"line": row[0], "count": 1}
+		page["commits"] = sorted(commits_hit.values(), key=lambda k:k["count"], reverse=True)
+		page["lines"] = sorted(lines_hit.values(), key=lambda k:k["count"], reverse=True)[:3]
+	return pages
+
+@transaction.commit_manually()
+def index_commit(file_path, page_id, *args, **kwargs):
+	start = time.clock()
+	#indexes the head by default, but can index past commits
+	commit = kwargs.get("commit", "HEAD")
+
+	conn = kwargs.get("db", connection)
+	file_path = file_path.strip('/')
+	command = "cd \"%s\" && git show --format=format:\"%%H%%n%%ct\" %s" % (file_path , commit)
+	lines = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE).communicate()[0].splitlines()
+
+
+	commit = None
+	date = None
+	current_hunk = None
+
+
+	words_changed = {}
+	words_to_add = []
+
+	#we don't really care about what word was deleted, all the words in the line are deleted
+	lines_deleted = []
+
+	# used to calculate the positions of words in the older commit now in the newest version, with added/removed lines
+	line_offset = []
+
+	#parse the diff to index the new commit
+	for (i, line) in enumerate(lines):
+
+		#first two lines are the commit and the date respectively
+		if i == 0:
+			commit = line
+		if i == 1:
+			date = line
+
+		#a hunk header, we're starting a hunk, parse the header
+		if line[0:2] == "@@":
+			# hunk header is in the format of 
+			# @@ -\d,\d +\d,\d @@ line-content
+			hunk_pattern = re.compile(r"@@ -([\d]+),?([\d]*) \+([\d]+),?([\d]*) @@.*")
+			m = hunk_pattern.match(line)
+			current_hunk = {}
+			current_hunk["oldstart"] = int(m.group(1))
+
+			#sometimes the range of the hunk is omitted because it's 1
+			if len(m.group(2)):
+				current_hunk["oldlen"] = int(m.group(2))
+			else:
+				current_hunk["oldlen"] = 1
+			current_hunk["newstart"] = int(m.group(3))
+			if len(m.group(4)):
+				current_hunk["newlen"] = int(m.group(4))
+			else:
+				current_hunk["newlen"] = 1
+
+			#0 lines read for this current hunk
+			current_hunk["oldread"] = 0
+			current_hunk["newread"] = 0
+
+		#not inside a hunk, and not a hunk header so no need to parse
+		if not current_hunk or len(line) == 0:
+			continue
+
+		#inserted line
+		if line[0] == '+':
+
+			#line number in the current commit
+			line_num = current_hunk["newstart"] + current_hunk["newread"]
+			old_line_num = current_hunk["oldstart"] + current_hunk["oldread"]
+
+			#a start of a + block
+			#sometimes a blank line is put between every line of the hunk for some reason
+			prev_line = lines[i - 1] if len(lines[i - 1]) else lines[i - 2]
+			if prev_line[0] != line[0]:
+				line_offset.append({"line_num":old_line_num, "len": 1})
+			#middle of a + block
+			else:
+				line_offset[-1]["len"] += 1
+
+			keywords = get_valid_keywords(line)
+			if len(keywords) == 0:
+				current_hunk["newread"] += 1
+				continue
+
+			for (word_pos, word) in enumerate(keywords):
+				hash = None
+				if word not in words_changed:
+					hash = "%d-%s-%s" % (page_id, hashlib.md5(word).hexdigest()[:20], commit[:10])
+
+					words_changed[word] = {"count":1, "hash":hash}
+
+				else:
+					hash = words_changed[word]["hash"]
+					words_changed[word]["count"] += 1
+				id = "%d-%d-%s-%d" % (page_id, word_pos, commit[:20], line_num)
+				words_to_add.append({"id":id, "word":word, "hash":hash, "line":line_num, "pos":word_pos})
+
+			current_hunk["newread"] += 1
+
+		#deleted line
+		elif line[0] == '-':
+			#line number in older version
+			line_num = current_hunk["oldstart"] + current_hunk["oldread"]
+
+			#a start of a - block
+			prev_line = lines[i - 1] if len(lines[i - 1]) else lines[i - 2]
+			if prev_line[0] != line[0]:
+				#has to add one because the line is deleted, so every line starting from the line after it will
+				#be shifted
+				line_offset.append({"line_num":line_num + 1, "len":-1})
+			#middle of a - block
+			else:
+				line_offset[-1]["len"] -= 1
+				line_offset[-1]["line_num"] = line_num + 1
+
+			keywords = get_valid_keywords(line)
+			if len(keywords) == 0:
+				current_hunk["oldread"] += 1
+				continue
+
+			for (word_pos, word) in enumerate(keywords):
+				hash = None
+				if word not in words_changed:
+					hash = "%d-%s-%s" % (page_id, hashlib.md5(word).hexdigest()[:20], commit[:10])
+
+					words_changed[word] = {"count":-1, "hash":hash}
+				else:
+					words_changed[word]["count"] -= 1
+
+			lines_deleted.append(line_num)
+
+
+			current_hunk["oldread"] += 1
+
+		#unchanged line
+		elif line[0] == ' ':
+			line_num = current_hunk["newstart"] + current_hunk["newread"]
+			old_line_num = current_hunk["oldstart"] + current_hunk["oldread"]
+
+			current_hunk["newread"] += 1
+			current_hunk["oldread"] += 1
+
+	def split_seq(iterable, size):
+		it = iter(iterable)
+		item = list(itertools.islice(it, size))
+		while item:
+			yield item
+			item = list(itertools.islice(it, size))
+
+	cursor = conn.cursor()
+
+	try:
+		#add the changes to the word base in this commit, no positional information yet
+		insert_commit_changes(words_changed, page_id, commit, date, cursor)
+		for chunk in split_seq(lines_deleted, 100):
+			#the words in the deleted lines are no longer in the head
+			unflag_deleted(chunk, page_id, cursor)
+		#the position of the words in the unmodified lines is pushed to different positions
+		shift_older_lines(line_offset, page_id, cursor)
+	except:
+		transaction.rollback()
+	else:
+		#have to commit here so the new keywords don't collide with the old ones(which are shifted from the commit)
+		transaction.commit()
+		try:
+			#finally add the new words to the index
+			add_new_words(words_to_add, page_id, cursor)
+		except:
+			transaction.rollback()
+		else:
+			transaction.commit()
+
+def add_new_words(words, page, cursor):
+	values = []
+	for word in words:
+		value = (word["id"], word["hash"], word["word"], page, word["line"], word["pos"], word["line"])
+		values.append(value)
+	query = "INSERT INTO wiki_keywordlocation values (%s,%s,%s,%s,%s,%s,0,%s,0)"
+	try:
+		cursor.executemany(query, values)
+	except:
+		traceback.print_exc(file=sys.stdout)
+		raise Exception
+
+def shift_older_lines(offsets, page, cursor):
+	highest_affected = sys.maxint
+	#have to use temp because we're want to modify head_line_num based on the old value of head_line_num
+	cursor.execute("UPDATE wiki_keywordlocation SET temp = head_line_num WHERE page = %d and head = 0" % (page))
+	for offset in offsets:
+		loc = offset["line_num"]
+		if loc < highest_affected:
+			highest_affected = loc
+		len = offset["len"]
+		query = "UPDATE wiki_keywordlocation SET temp = temp + %d WHERE page = %d and head_line_num >= %d and head = 0" % (len, page, loc)
+		try:
+			cursor.execute(query)
+		except:
+
+			traceback.print_exc(file=sys.stdout)
+			raise Exception
+	cursor.execute("UPDATE wiki_keywordlocation SET head_line_num = temp WHERE page = %d and head_line_num >= %d and head = 0" % (page, highest_affected))
+
+def unflag_deleted(lines, page, cursor):
+	query = "UPDATE wiki_keywordlocation SET head = head +1 WHERE page = " + str(page) + " and head_line_num = %s"
+	try:
+		cursor.executemany(query, [(k,) for k in lines])
+	except:
+		print query
+		traceback.print_exc(file=sys.stdout)
+		raise Exception
+
+
+def insert_commit_changes(words, page, commit, date, cursor):
+	start = time.clock()
+	values = []
+	query = u"UPDATE wiki_keyword SET head = head + 1  WHERE page = " + str(page) + " and keyword = %s"
+	try:
+		cursor.executemany(query, [(k,) for k in words.keys()])
+	except:
+		print query
+		traceback.print_exc(file=sys.stdout)
+		raise Exception
+	for word in words.keys():
+		frequency = words[word]["count"]
+		value = (words[word]["hash"], page, word, commit, word, page, frequency, frequency, date)
+		values.append(value)
+
+	query = u"INSERT INTO wiki_keyword values(%s,%s,%s,%s,COALESCE((SELECT frequency FROM wiki_keyword WHERE keyword = %s and page = %s and head = 1)+%s,%s),%s,0)"
+	try:
+		cursor.executemany(query, values)
+	except:
+		traceback.print_exc(file=sys.stdout)
+		raise Exception
+
+
+ignore_words = ["the", "be", "to", "of", "and", "a",
+			"in", "that", "have", "I", "it", "for",
+			"not", "on", "with", "he", "as", "you", "why",
+			"do", "at", "this", "but", "his", "by", "where",
+			"from", "they", "we", "say", "her", "she", "name",
+			"or", "an", "will", "my", "one", "all",
+			"would", "there", "their", "what", "so",
+			"up", "out", "if", "about", "who", "get",
+			"which", "go", "me", "when", "make", "can",
+			"like", "time", "no", "just", "him", "know",
+			"take", "person", "into", "year", "your",
+			"good", "some", "could", "them", "see", "number",
+			"other", "than", "then", "now", "look", "only",
+			"come", "its", "over", "think", "also", "back", "has",
+			"after", "use", "two", "how", "our", "work", "usually",
+			"first", "well", "way", "even", "new", "want", "ago", "com",
+			"because", "any", "these", "give", "day", "are", "dont", "don"
+			"most", "us", "toc", "iff", "neg", "let", "lor"]
+
+def get_valid_keywords(line):
+	line = unicode(line)
+	#split on delimiters
+	delim = re.compile(r"[\.,:;\[\]\(\)\-_\\/=\+\}\{><\|]")
+	line = delim.sub(" ", line)
+
+	#strip random punctuations and quotes
+	punct = re.compile(r"[\*#\\?@`~!\$%\^&\"\']")
+	line = punct.sub(" ", line)
+
+
+	#split camel case words into individual words
+	#pattern = re.compile('([A-Z][A-Z][a-z])|([a-z][A-Z])')
+	#with_camel = pattern.sub(lambda m: m.group()[:1] + " " + m.group()[1:], with_spacing)
+
+	keywords = line.split()
+
+
+	#eliminate words shorter than 3 letters, and frequently used words
+	def eliminate(word):
+		return word.lower() not in ignore_words and len(word) > 2
+	keywords = map(lambda k:k.lower(), filter(eliminate, keywords))
+	return keywords
+
+if len(sys.argv) > 2:
+	if sys.argv[1] == "index":
+		index_commit(sys.argv[2], sys.argv[3])
+
+	if sys.argv[1] == "search":
+		search(sys.argv[2])
+
diff --git a/templates/search/results.html b/templates/search/results.html
index 4e21842..a39daa6 100644
--- a/templates/search/results.html
+++ b/templates/search/results.html
@@ -3,8 +3,33 @@
 {% block content %}
 <div class="container">
 	<section>
-		<h1>Search results for {{ query }}</h1>
-		<p>The search feature is still under construction. Got a suggestion for how search should behave? <a href="/about#contact-us">Let us know!</a></p>
+	<form action="/search" method="get" id="search-result">
+	<input type="text" name="query" class="xlarge" value="{{ query }}" />
+	<input class="btn primary" type="submit" value="Search" />
+	</form>
+	Search completed in {{time}} seconds
+	<br />
+	<br />
+	<div class="results">
+	{% for result in results %}
+		<div class="search-result">
+			<h3><a href="{{result.page_url}}">{{result.page_title}}</a></h3>
+			<div>			
+			{{result.preview_text|safe}}
+			</div>
+			<h5>Mentioned in commits
+			{% for commit in result.commits_mentioned%}
+			<a href={{commit.url}}>{{commit.name}}</a>, 
+			{% endfor %}
+			{% if result.commits_hidden > 0 %}
+			and <a href="#">{{result.commits_hidden}} other{{result.commits_hidden|pluralize}}</a>
+			{% endif %}
+			</h5> 
+		</div>
+		<br />
+		<hr />
+	{% endfor %}
+	</div>
 	</section>
 </div>
 {% endblock %}
diff --git a/views/main.py b/views/main.py
index aef43cd..92eeb71 100755
--- a/views/main.py
+++ b/views/main.py
@@ -6,9 +6,14 @@
 from wiki.utils.users import validate_username
 from wiki.models.pages import Page
 from blog.models import BlogPost
+from wiki.models.searchindex import Result
 from django.http import Http404
 from urls import static_urls
 import os
+import re
+from time import clock
+from search import search as search_module
+from django.utils.html import escape
 
 # welcome is only set to true when called from register()
 # Triggers the display of some sort of welcome message
@@ -193,9 +198,51 @@ def markdown(request):
 
 def search(request):
 	if 'query' in request.GET:
+		start = clock()
+		results = search_module(request.GET["query"], 0)
+		parsed_results = []
+		for result in results:
+			presult = Result()
+			page = Page.objects.get(pk=result["id"])
+			presult.page_url = page.get_absolute_url()
+			presult.page_title = presult.page_url.replace("_", " ").strip("/").replace("/", " / ")
+			line_nums = [k["line"] for k in result["lines"]]
+			content = page.load_content().splitlines()
+			content_length = len(content)
+			preview_line_nums = []
+			if len(line_nums):
+				preview_line_nums = line_nums[:2]
+			else:
+				preview_line_nums = [0]
+
+			# get the surrounding lines for context
+			preview_lines = map(lambda k: escape(content[k - 1]), preview_line_nums)
+			preview = " ...<br />... ".join(preview_lines)
+
+			#not working perfectly, suppose to contract long paragraphs to only show the parts containing the words
+			"""
+			if len(preview) > 200:
+				seg_length = 200 / len(result['words'])
+				p = re.compile("(([^ ]* (.){0,%d})(%s)(.{0,%d} [^ ]*) +)+" % (seg_length, "|".join(result['words']), seg_length), re.IGNORECASE)
+				shortened = []
+				for match in p.finditer(preview):
+					shortened.append(match.group(0))
+				preview = "..." + " ... ".join(shortened) + " ..."
+			"""
+			# highlight found words
+			p = re.compile("|".join(result['words']), re.IGNORECASE)
+			preview = p.sub(lambda m: "<strong>%s</strong>" % m.group(0), preview)
+
+			presult.preview_text = preview
+
+			for commit in result["commits"][:3]:
+				presult.commits_mentioned.append({"name":commit["commit"][:10], "url":(presult.page_url + "/commit/" + commit["commit"])})
+			presult.commits_hidden = len(result["commits"]) - 3 if len(result["commits"]) > 3 else 0
+			parsed_results.append(presult)
 		data = {
-			'title': 'Search results',
-			'query': request.GET['query']
+			'query': request.GET['query'],
+			'results':parsed_results,
+			'time':"%0.4f" % (clock() - start)
 		}
 		return render(request, 'search/results.html', data)
 	else:
diff --git a/views/pages.py b/views/pages.py
index 6e74592..9317fc5 100644
--- a/views/pages.py
+++ b/views/pages.py
@@ -12,6 +12,7 @@
 from wiki.utils.currents import current_term, current_year
 from views.main import register
 from datetime import datetime
+from search import index_commit
 
 def show(request, department, number, page_type, term, year, slug):
 	course = get_object_or_404(Course, department=department, number=int(number))
@@ -104,6 +105,7 @@ def edit(request, department, number, page_type, term, year, slug):
 		username = request.user.username
 		message = request.POST['message']
 		page.save_content(request.POST['content'], message, username)
+		index_commit(page.get_filepath(), page.pk)
 		data = {
 			'course': course,
 			'page': page,
@@ -173,7 +175,7 @@ def create(request, department, number, page_type):
 			except ValueError:
 				pass # defaults to the current year
 			data['current_exam_type'] = request.POST['exam_type'] if 'exam_type' in request.POST else ''
-			data['subject'] =  request.POST['subject'] if 'subject' in request.POST else ''
+			data['subject'] = request.POST['subject'] if 'subject' in request.POST else ''
 
 			data['content'] = request.POST['content']
 			data['message'] = request.POST['message']
@@ -187,7 +189,7 @@ def create(request, department, number, page_type):
 			username = request.user.username
 			email = request.user.email
 			new_page.save_content(request.POST['content'], commit_message, username)
-
+			index_commit(new_page.get_filepath(), new_page.pk)
 			# Add the history item - should be done automatically one day
 			course.add_event(page=new_page, user=request.user, action='created', message=commit_message)
 			data['page'] = new_page
diff --git a/wiki/management/commands/buildindex.py b/wiki/management/commands/buildindex.py
new file mode 100755
index 0000000..61916a7
--- /dev/null
+++ b/wiki/management/commands/buildindex.py
@@ -0,0 +1,23 @@
+from django.core.management.base import BaseCommand, CommandError
+from django.db import connection
+from search import index_entire_history
+from wiki.models.pages import Page
+import time
+class Command(BaseCommand):
+	def handle(self, *args, **options):
+		print "Clearing indices"
+		cursor = connection.cursor()
+		cursor.execute("DELETE FROM wiki_keyword")
+		cursor.execute("DELETE FROM wiki_keywordlocation")
+		print "Starting to index the pages"
+		start = time.clock()
+		success = 0
+		fail = 0
+		pages = Page.objects.all()
+		for page in pages:
+			(yay, no) = index_entire_history(page.get_filepath(), page.pk)
+			success += yay
+			fail += no
+		print "%d failures" % fail
+		print "Successfully indexed %d commits in %d files in %s seconds" % (success, len(pages), time.clock() - start)
+
diff --git a/wiki/models/searchindex.py b/wiki/models/searchindex.py
new file mode 100755
index 0000000..88d8a64
--- /dev/null
+++ b/wiki/models/searchindex.py
@@ -0,0 +1,39 @@
+from django.db import models
+from django.db.models import fields
+
+class Keyword(models.Model):
+	class Meta:
+		app_label = 'wiki'
+		unique_together = (("keyword", "commit", "page"))
+	hash = models.CharField(max_length=40, primary_key=True)
+	page = models.IntegerField()
+	keyword = models.CharField(max_length=120) #reasonable length for longest word
+	commit = models.CharField(max_length=100)
+	frequency = models.IntegerField(default=0)
+	date = models.IntegerField()
+	head = models.IntegerField()
+
+class KeywordLocation(models.Model):
+	class Meta:
+		app_label = 'wiki'
+
+	k_id = models.CharField(max_length=32, primary_key=True)
+	keyword_hash = models.CharField(max_length=40)
+	word = models.CharField(max_length=120)
+	page = models.IntegerField()
+	line_num = models.IntegerField()
+	pos = models.IntegerField()
+	head = models.IntegerField()
+	head_line_num = models.IntegerField(null=True)
+	temp = models.IntegerField(null=True)
+
+#not actually a model
+class Result():
+
+	def __init__(self):
+		self.page = None
+		self.preview_text = ""
+		self.commits_mentioned = []
+		self.terms = []
+		self.commits_hidden = 0
+

From 3177bd3ac4640a703cf42cc5012c95e3a60daa19 Mon Sep 17 00:00:00 2001
From: hc5 <yzhou0728@gmail.com>
Date: Thu, 26 Jan 2012 14:31:06 -0500
Subject: [PATCH 2/2] minor unicode bug fix

---
 search.py                            | 5 +++--
 wiki/management/__init__.py          | 0
 wiki/management/commands/__init__.py | 0
 wiki/models/__init__.py              | 1 +
 4 files changed, 4 insertions(+), 2 deletions(-)
 create mode 100755 wiki/management/__init__.py
 create mode 100755 wiki/management/commands/__init__.py

diff --git a/search.py b/search.py
index 67273c5..3644ace 100755
--- a/search.py
+++ b/search.py
@@ -1,3 +1,4 @@
+# encoding: utf-8
 import sys
 import subprocess
 import re
@@ -7,7 +8,7 @@
 from django.db import connection, transaction
 import itertools
 import time
-
+from django.utils.encoding import smart_str, smart_unicode
 def index_entire_history(file_path, page_id, *args, **kwargs):
 	print "indexing %s - %s" % (file_path.split('/')[2] , file_path.split("/")[5])
 	try:
@@ -338,7 +339,7 @@ def insert_commit_changes(words, page, commit, date, cursor):
 			"most", "us", "toc", "iff", "neg", "let", "lor"]
 
 def get_valid_keywords(line):
-	line = unicode(line)
+	line = smart_str(line)
 	#split on delimiters
 	delim = re.compile(r"[\.,:;\[\]\(\)\-_\\/=\+\}\{><\|]")
 	line = delim.sub(" ", line)
diff --git a/wiki/management/__init__.py b/wiki/management/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/wiki/management/commands/__init__.py b/wiki/management/commands/__init__.py
new file mode 100755
index 0000000..e69de29
diff --git a/wiki/models/__init__.py b/wiki/models/__init__.py
index 097f6d5..70cf0bc 100755
--- a/wiki/models/__init__.py
+++ b/wiki/models/__init__.py
@@ -6,3 +6,4 @@
 from history import *
 from users import *
 from series import *
+from searchindex import *
\ No newline at end of file