commit 59f61fcbab33c5ea2f1555233fe334e70f350114 Author: Elf M. Sternberg Date: Thu Oct 21 16:21:07 2010 -0700 Initial check-in, after turning into a standalone. diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a81ee1f --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +*.pyc +*.rej +*.orig +*.pyo +*# +.#* +.DS_Store +*~ +*.xcf +build/ diff --git a/AUTHORS b/AUTHORS new file mode 100644 index 0000000..c94b8b7 --- /dev/null +++ b/AUTHORS @@ -0,0 +1 @@ +Elf M. Sternberg diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d262ea0 --- /dev/null +++ b/LICENSE @@ -0,0 +1,19 @@ +The MIT License + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE AND DATA IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..29579c8 --- /dev/null +++ b/README.md @@ -0,0 +1,103 @@ +django-mysqlfulltextsearch +============================== + +django-mysqlfulltextsearch is a simple plug-in for Django that +provides access to MySQL's FULLTEXT INDEX feature available in MySQL +5.0 and up. + +Although Django supports the "search" lookup in QuerySet filters +[http://docs.djangoproject.com/en/dev/ref/models/querysets/#search], +the docs specify that you must create the fulltext index yourself. +This variant, inspired by code from a blog entry by Andrew Durdin +[http://www.mercurytide.co.uk/news/article/django-full-text-search/], +includes a return value "relevance," which is the score MySQL awards +to each row returned. This is a win for small sites that do not need +a heavyweight search solution such as Lucene or Xapian. ("relevance" +is supposed to be a configurable dynamic field name, but I haven't +provided a reliable path to change it yet.) + +Along with the updated API, this code provides for index discovery. +If a table has exactly one fulltext index, you can create a +SearchManager without declaring any fields at all, and it will +auto-discover the index on its own. If you specify a tuple of search +fields for which no corresponding index exists, the returned exception +will include a list of valid indices. + + + +Standard Usage: +--------------- + +Create the index. For the model "book" in the app "Books": + +./manage dbshell +> CREATE FULLTEXT INDEX book_title on books_book (title, summary) + +Or via South: + +def forwards(self, orm): + db.execute('CREATE FULLTEXT INDEX book_text_index ON books_book (title, summary)') + +Using the index: + +from mysqlfulltextsearch import SearchManager +class Books: + ... + objects = SearchManager() + +books = Book.objects.search('The Metamorphosis', ('title', 'summary')).order_by('-relevance') + +> books[0].title +"The Metamorphosis" +> books[0].author +"Franz Kafka" +> books[0].relevance +9.4 + +If there is only one index for the table, the fields do not need to be +specified, the SearchQuerySet object can find it automatically: + +from mysqlfulltextsearch import SearchManager +class Books: + ... + objects = SearchManager() + +books = Book.objects.search('The Metamorphosis').order_by('-relevance') + + + +Tips: +----- +Generating the index is a relatively heavyweight process. When you +have a few thousand documents, it might be best to load them first, +then generate the index afterward. + + + +To Do: +----- + +-- Easy + +Make the "relevance" dynamic field name configurable. + + +-- Moderate + +Provide means for matching against BOOLEAN, NATURAL LANGUAGE, and +QUERY EXPANSION modes. (Preliminary experiments with this revealed +some... interesting... problems with parameter quotation.) + + +-- Difficult + +Provide means for using a SearchManager to access indices on joined +tables, for example: + + Author.objects.search("The Metamorphosis", "book__title") + +-- Insane + +Provide for a way to have FULLTEXT search indices specified in a +model's Meta class, and have syncdb or south pick up that information +and do the right thing with it. diff --git a/mysqlfulltextsearch/__init__.py b/mysqlfulltextsearch/__init__.py new file mode 100644 index 0000000..566731c --- /dev/null +++ b/mysqlfulltextsearch/__init__.py @@ -0,0 +1,32 @@ +# Copyright 2010 by Elf M. Sternberg. All rights not expressly granted +# herein are reserved. +# +# Created in the United States of America. +# +# This digital media is protected by U.S. and international copyright +# and intellectual property laws. Unless otherwise specified, all +# information and screens appearing as part of this digital medium, +# including software, services, documents, text, images, icons, and +# logos design; the selection, assembly, arrangement, and design +# thereof; and the code that enables its presentation, are the sole +# property of Elf M. Sternberg. + +# Permission is hereby granted, free of charge, to any person +# obtaining a copy of this software and associated documentation files +# (the "Software"), to deal in the Software without restriction, +# including without limitation the rights to use, copy, modify, merge, +# publish, distribute, sublicense, and/or sell copies of the Software, +# and to permit persons to whom the Software is furnished to do so, +# subject to the following conditions: + +# The above copyright notice and this permission notice shall be +# included in all copies or substantial portions of the Software. + +# THE SOFTWARE AND DATA IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY +# KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE +# WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. diff --git a/mysqlfulltextsearch/models.py b/mysqlfulltextsearch/models.py new file mode 100644 index 0000000..6b20219 --- /dev/null +++ b/mysqlfulltextsearch/models.py @@ -0,0 +1 @@ +# Create your models here. diff --git a/mysqlfulltextsearch/search_manager.py b/mysqlfulltextsearch/search_manager.py new file mode 100644 index 0000000..283c252 --- /dev/null +++ b/mysqlfulltextsearch/search_manager.py @@ -0,0 +1,133 @@ +from functools import wraps +from django.core.exceptions import FieldError +from django.db import models, backends +from django.db import connection +from MySQLdb import OperationalError +from MySQLdb.constants.ER import FT_MATCHING_KEY_NOT_FOUND + + +def _get_indices(model): + """ Return all of the FULLTEXT indices available for a given + Django model.""" + + cursor = connection.cursor() + cursor.execute('show index from %s where index_type = "FULLTEXT"' % + connection.ops.quote_name(model._meta.db_table)) + found = {} + item = cursor.fetchone() + while item: + if not item: + break + (model_name, key_name, column_name) = (item[0], item[2], item[4]) + if not found.has_key(key_name): + found[key_name] = [] + found[key_name].append(column_name) + item = cursor.fetchone() + + return found.values() + + +def _handle_oper(f): + """ Specialized wrapper for methods of SearchQuerySet that will + inform the user of what indices are available, should the user + specify a list of fields on which to search.""" + + def wrapper(self, *args, **kwargs): + try: + return f(self, *args, **kwargs) + except OperationalError, e: + if e.args[0] != FT_MATCHING_KEY_NOT_FOUND: + raise + + idc = _get_indices(self.model) + message = "No FULLTEXT indices found for this table." + if len(idc) > 0: + message = ("Index not found. Indices available include: %s" % + str(tuple(idc))) + raise FieldError, message + + return wraps(f)(wrapper) + + +class SearchQuerySet(models.query.QuerySet): + """ A QuerySet with a new method, search, and wrappers around the + most common operations performed on a query set.""" + + def __init__(self, model = None, query = None, using = None, + aggregate_field_name = 'relevance'): + + super(SearchQuerySet, self).__init__(model, query, using) + self._aggregate_field_name = aggregate_field_name + + + def search(self, query, fields): + meta = self.model._meta + + if not fields: + found = _get_indices(self.model) + if len(found) != 1: + raise FieldError, "More than one index found for this table." + fields = found[0] + + columns = [meta.get_field(name, many_to_many=False).column + for name in fields] + full_names = ["%s.%s" % (connection.ops.quote_name(meta.db_table), + connection.ops.quote_name(column)) + for column in columns] + match_expr = "MATCH(%s) AGAINST (%%s)" % (", ".join(full_names)) + + return self.extra(select={self._aggregate_field_name: match_expr}, + where=[match_expr], + params=[query], + select_params = [query]) + + + # Python Magic Methods wrapped to provide useful information on exception. + + def __repr__(self): + return super(SearchQuerySet, self).__repr__() + __repr__ = _handle_oper(__repr__) + + + def __len__(self): + return super(SearchQuerySet, self).__len__() + __len__ = _handle_oper(__len__) + + + def __iter__(self): + return super(SearchQuerySet, self).__iter__() + __iter__ = _handle_oper(__iter__) + + + def _result_iter(self): + return super(SearchQuerySet, self)._result_iter() + _result_iter = _handle_oper(_result_iter) + + + def __nonzero__(self): + return super(SearchQuerySet, self).__nonzero__() + __nonzero__ = _handle_oper(__nonzero__) + + + def __getitem__(self, k): + return super(SearchQuerySet, self).__getitem__(k) + __getitem__ = _handle_oper(__getitem__) + + + # This is a private method of QuerySet. It's not guaranteed to even exist + # after Django 1.2 + + def _fill_cache(self, *args, **kwargs): + return super(SearchQuerySet, self)._fill_cache(*args, **kwargs) + _fill_cache = _handle_oper(_fill_cache) + + + +class SearchManager(models.Manager): + + def get_query_set(self, fields = []): + return SearchQuerySet(self.model) + + + def search(self, query, fields = []): + return self.get_query_set().search(query, fields) diff --git a/mysqlfulltextsearch/tests.py b/mysqlfulltextsearch/tests.py new file mode 100644 index 0000000..2247054 --- /dev/null +++ b/mysqlfulltextsearch/tests.py @@ -0,0 +1,23 @@ +""" +This file demonstrates two different styles of tests (one doctest and one +unittest). These will both pass when you run "manage.py test". + +Replace these with more appropriate tests for your application. +""" + +from django.test import TestCase + +class SimpleTest(TestCase): + def test_basic_addition(self): + """ + Tests that 1 + 1 always equals 2. + """ + self.failUnlessEqual(1 + 1, 2) + +__test__ = {"doctest": """ +Another way to test that 1 + 1 is equal to 2. + +>>> 1 + 1 == 2 +True +"""} + diff --git a/mysqlfulltextsearch/views.py b/mysqlfulltextsearch/views.py new file mode 100644 index 0000000..60f00ef --- /dev/null +++ b/mysqlfulltextsearch/views.py @@ -0,0 +1 @@ +# Create your views here. diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..c313db4 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +Django==1.2 +MySQL>=5.0 diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..f205110 --- /dev/null +++ b/setup.py @@ -0,0 +1,23 @@ +#!/usr/bin/env python + +from setuptools import setup, find_packages + +setup ( + name='django-mysqlfulltextsearch', + version='0.1', + description='A full-text search app for Django and MySQL', + author='Elf M. Sternberg', + author_email='elf.sternberg@gmail.com', + url='http://github.com/elfsternberg/django-mysqlfulltextsearch/', + license='MIT License', + classifiers=[ + 'Development Status :: 3 - Alpha', + 'Environment :: Plugins', + 'Framework :: Django', + 'Intended Audience :: Developers', + 'License :: OSI Approved :: BSD License', + 'Programming Language :: Python', + 'Topic :: Software Development :: Libraries :: Python Modules', + ], + packages=find_packages(), +)