Duplicate items when ordering by Generic Relation in Django

When the optional ordering = ... attribute of a Django Model's Meta class contains a GenericRelation from the Content Types framework, there is no way to eliminate duplicate items being returned, even when using .distinct() (since .order_by(...) is applied by the ORM only after the SQL SELECT DISTINCT clause is built).

This problem is current as of Django 1.2 and has no generic solution as far as I know.

This problem will manifest when two or more different Models instances are attached to instances of the Model with the GenericForeignKey. In the following example, we have Model X which has a GenericForeignKey, and two Models, Y and Z, which have GenericRelations to X and ordering based on these relations.

1 from django.db.models import CharField
2 from django.contrib.contenttypes.models import \
3     ContentType
4 from django.contrib.contenttypes.generic import \
5     GenericForeignKey, GenericRelation
 1 class X(Model):
 2     """
 3     This Model has a unique name and is attached to
 4     another Model through a Generic relation.
 5     """
 6 
 7     # The unique name of this X.
 8     name = CharField(u'name', unique = True,
 9         max_length = 128, null = False)
10 
11     # The Content Type of the generic relation to the
12     # attached content.
13     content_type = models.ForeignKey(ContentType,
14         null = True, blank = True,
15         related_name = 'x_contents')
16 
17     # The ID of the attached content.
18     content_id = models.PositiveIntegerField(
19         null = True, blank = True)
20 
21     # The attached content.
22     content = GenericForeignKey('content_type',
23         'content_id')
24 
25     def __unicode__(self):
26         """
27         The textual representation of this Model is
28         its name.
29         """
30         return self.name
31 
32     class Meta:
33         # order by X's name
34         ordering = ['name', ]
 1 class Y(Model):
 2     """
 3     This Model acts as content for an X, and has a
 4     convenient reverse Generic Relation to X.
 5     """
 6 
 7     # The unique name of this Y.
 8     name = CharField(u'name', null = False,
 9         unique = True, max_length = 128)
10 
11     # The X this Y is attached to.
12     xs = GenericRelation(X,
13         content_type_field = 'content_type',
14         object_id_field = 'content_id')
15 
16     def __unicode__(self):
17         """
18         The textual representation of this Model is
19         its name.
20         """
21         return self.name
22 
23     class Meta:
24         # order by X's name
25         ordering = ['xs__name', ]
 1 class Z(Model):
 2     """
 3     This Model acts as content for an X, and has a
 4     convenient reverse Generic Relation to X.
 5     """
 6 
 7     # The unique name of this Z.
 8     name = CharField(u'name', null = False,
 9         unique = True, max_length = 128)
10 
11     # The X this Z is attached to.
12     xs = GenericRelation(X,
13         content_type_field = 'content_type',
14         object_id_field = 'content_id')
15 
16     def __unicode__(self):
17         """
18         The textual representation of this Model is
19         its name.
20         """
21         return self.name
22 
23     class Meta:
24         # order by X's name
25         ordering = ['xs__name', ]

The following sequence of statements demonstrates this problem:

x1 = X.objects.create(name = 'first x')
x2 = X.objects.create(name = 'second x')
x3 = X.objects.create(name = 'third x')
x4 = X.objects.create(name = 'fourth x')
y1 = Y.objects.create(name = 'first y')
y2 = Y.objects.create(name = 'second y')
z1 = Z.objects.create(name = 'first z')
z2 = Z.objects.create(name = 'second z')
x1.content = y1
x1.save()
x2.content = z1
x2.save()
x3.content = y2
x3.save()
x4.content = z2
x4.save()
X.objects.all()
[<X: first x>, <X: fourth x>, <X: second x>, <X: third x>]
Y.objects.all()
[<Y: first y>, <Y: first y>, <Y: second y>]

Notice that 'first y' appears twice. The SQL COUNT statement will not have any ordering applied, and so returns the correct value:

Y.objects.count()
2

However, when ordering is applied, the duplicate instance is present:

len(Y.objects.all())
3

This problem will manifest when two or more different Models' instances are attached to instances of the Model with the GenericForeignKey. In the following example, we have Model X which has a GenericForeignKey, and two Models Y and Z which have GenericRelations to X and ordering based on these relations.

1 from django.db.models import CharField
2 from django.contrib.contenttypes.models import \
3     ContentType
4 from django.contrib.contenttypes.generic import \
5     GenericForeignKey, GenericRelation
 1 class X(Model):
 2     """
 3     This Model has a unique name and is attached to
 4     another Model through a Generic Relation.
 5     """
 6 
 7     # The unique name of this X.
 8     name = CharField(u'name', null = False,
 9         unique = True, max_length = 128)
10 
11     # The Content Type of the generic relation to
12     # the attached content.
13     content_type = models.ForeignKey(ContentType,
14         null = True, blank = True,
15         related_name = 'x_contents')
16 
17     # The ID of the attached content.
18     content_id = models.PositiveIntegerField(
19         null = True, blank = True)
20 
21     # The attached content.
22     content = GenericForeignKey('content_type',
23         'content_id')
24 
25     def __unicode__(self):
26         """
27         The textual representation of this Model is
28         its name.
29         """
30         return self.name
31 
32     class Meta:
33         # order by X's name
34         ordering = ['name', ]
 1 class Y(Model):
 2     """
 3     This Model acts as content for an X, and has a
 4     convenient reverse Generic Relation to X.
 5     """
 6 
 7     # The unique name of this Y.
 8     name = CharField(u'name', null = False,
 9         unique = True, max_length = 128)
10 
11     # The X this Y is attached to.
12     xs = GenericRelation(X,
13         content_type_field = 'content_type',
14         object_id_field = 'content_id')
15 
16     def __unicode__(self):
17         """
18         The textual representation of this Model is
19         its name.
20         """
21         return self.name
22 
23     class Meta:
24         # order by X's name
25         ordering = ['xs__name', ]
 1 class Z(Model):
 2     """
 3     This Model acts as content for an X, and has a
 4     convenient reverse Generic Relation to X.
 5     """
 6 
 7     # The unique name of this Z.
 8     name = CharField(u'name', null = False,
 9         unique = True, max_length = 128)
10 
11     # The X this Z is attached to.
12     xs = GenericRelation(X,
13         content_type_field = 'content_type',
14         object_id_field = 'content_id')
15 
16     def __unicode__(self):
17         """
18         The textual representation of this Model is
19         its name.
20         """
21         return self.name
22 
23     class Meta:
24         # order by X's name
25         ordering = ['xs__name', ]

The following sequence of statements demonstrates this problem:

x1 = X.objects.create(name = 'first x')
x2 = X.objects.create(name = 'second x')
x3 = X.objects.create(name = 'third x')
x4 = X.objects.create(name = 'fourth x')

y1 = Y.objects.create(name = 'first y')
y2 = Y.objects.create(name = 'second y')

z1 = Z.objects.create(name = 'first z')
z2 = Z.objects.create(name = 'second z')

x1.content = y1
x1.save()

x2.content = z1
x2.save()

x3.content = y2
x3.save()

x4.content = z2
x4.save()

X.objects.all()
[<X: first x>, <X: fourth x>, <X: second x>, <X: third x>]

Y.objects.all()
[<Y: first y>, <Y: first y>, <Y: second y>]

Notice that 'first y' appears twice.

Y.objects.count()
2

The SQL COUNT statement will not have any ordering applied, and so returns the correct value.

len(Y.objects.all())
3