Solr 和 django-haystack 的組合,真是強大,全文檢索真是太優了,交給專業的就是沒錯,多得是一大堆東西,還要自己蠻幹
這樣可以解決,用 RealTimeSearchIndex 的 ManyToManyField 不更新的問題,不過變成更新資料重複 update Solr ,的 over head ,以後,應該有更好的做好
Google Group 的討論 http://groups.google.com/group/django-haystack/browse_thread/thread/a274b5aeb121b1d4/5f4c4f7632aa1dfc?pli=1
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# models.py | |
from django.db import models | |
from django.contrib.auth.models import User | |
import datetime | |
class Ejournal(models.Model): | |
user = models.ForeignKey(User) | |
title = models.CharField( max_length=200) | |
author = models.ManyToManyField(blank=True, null=True) | |
publisher = models.CharField(default='', blank=True, max_length=200) | |
publish_date = models.DateTimeField( default=datetime.datetime.now ) | |
abstract = models.TextField(default='', blank=True ) | |
categories = models.ManyToManyField('Category', blank=True, null=True) | |
def __unicode__(self): | |
return self.title | |
class Category(models.Model): | |
title = models.CharField(max_length=200) | |
title_zh = models.CharField(max_length=200) | |
parent = models.ForeignKey('self', blank=True, null=True) | |
def __unicode__(self): | |
return self.parent and "%s---%s" % (self.parent.title, self.title) or self.title | |
class Author(models.Model): | |
title = models.CharField(max_length=200) | |
def __unicode__(self): | |
return self.title | |
# search_indexes.py | |
import datetime | |
from haystack.indexes import * | |
from haystack import site | |
from ejournal.models import Ejournal, Category, Author | |
class EjournalIndex(RealTimeSearchIndex): | |
text = CharField(document=True, use_template=True) | |
publish_date = DateTimeField(model_attr='publish_date') | |
publisher = CharField(model_attr='publisher') | |
language = CharField(model_attr='language') | |
categories = MultiValueField(faceted=True) | |
author = MultiValueField(faceted=True) | |
def prepare_categories(self, obj): | |
return [item.title for item in obj.categories.all()] | |
def prepare_author(self, obj): | |
return [item.title for item in obj.author.all()] | |
def get_queryset(self): | |
"""Used when the entire index for model is updated.""" | |
return Ejournal.objects.filter(publish_date__lte=datetime.datetime.now()) | |
def _setup_save(self, model): | |
super(EjournalIndex, self)._setup_save(model) | |
signals.m2m_changed.connect( self.update_object, sender=Ejournal.author.through) | |
site.register(Ejournal, EjournalIndex) |
PS: 為什麼 Solr 而不是 Xapian 或是直接用 Sphinx ,考量原因是 Apache 及 GPL license 還有就是成熟度,及 Solr 提供 API ,算是一個很方便 Lucene 接口,不單可以只做資料庫的全文檢索,可以應用的範圍相當大,這就給大家自由發揮了