Monday, November 13, 2017

Anyone Having Success with Wand to Create Thumbnails from PDF files?

I am using ImageMagick 6.7.7-10 2017-07-31 Q16 on Ubuntu 14.04, through Wand 0.4.4, Python 3.4.3 and Django 1.11. I am trying to create a jpg thumbnail of a pdf file.

On the command line, I can do this with no errors:

convert -thumbnail x300 -background white -alpha remove Lucy.pdf[0] output_thumbnail.jpg


But when I try to use wand on the same image I get this error:

Traceback (most recent call last):
  File "/home/mark/python-projects/memorabilia-project/memorabilia/models.py", line 24, in make_thumb
    pages = Image(blob = b)
  File "/home/mark/.virtualenvs/memorabilia/lib/python3.4/site-packages/wand/image.py", line 2742, in __init__
    self.read(blob=blob, resolution=resolution)
  File "/home/mark/.virtualenvs/memorabilia/lib/python3.4/site-packages/wand/image.py", line 2822, in read
    self.raise_exception()
  File "/home/mark/.virtualenvs/memorabilia/lib/python3.4/site-packages/wand/resource.py", line 222, in raise_exception
    raise e
wand.exceptions.MissingDelegateError: no decode delegate for this image format `' @ error/blob.c/BlobToImage/367
I looked at the delegates.xml file for ImageMagic in /etc, and there are entries for pdf files.

My models.py (slightly abbreviated):

def make_thumb(instance, filename, newname, b):
    file_name, file_extension = os.path.splitext(filename)
    file_extension = file_extension.lower()
    print(os.path.join(settings.MEDIA_ROOT, filename))
    try:
        if file_extension in [".pdf"]:
            pages = Image(blob = b)
            first_page = pages.sequence[0]
            image = Image(first_page)
        else:
            image = Image(blob = b)
        image.transform(resize="x200")
        image.format = "jpg"
        thumb_name, thumb_extension = os.path.splitext(newname)
        thumb_extension = ".jpg"
        thumb_name = thumb_name + thumb_extension
        image.save(filename=os.path.join(settings.MEDIA_ROOT, 'thumbs', thumb_name))
        instance.thumb_storage = thumb_name
        return True
    except:
        instance.thumb_storage = ''
        print(traceback.format_exc())
        return False
        
def unique_file_path(instance, filename):
    """This function calculates the sha256 hash for the file as well
    as a UUID4 string for the file name. The hash is stored in the model
    and the function returns the new path for the file using the UUID4
    file name.
    """
    # Save original file name in model
    instance.original_file_name = filename
    
    # calculate the hash for the file
    h = hashlib.sha256()
    buff_size = 128*1024
    #with open(instance.storage_file_name, 'rb', buffering=0) as f:
    for b in iter(lambda : instance.storage_file_name.read(buff_size), b""):
        h.update(b)
    instance.computed_sha256 = h.hexdigest()
    print ("b="+str(len(b)))
        
    # Get new file name/upload path
    base, ext = splitext(filename)
    newname = "%s%s" % (uuid.uuid4(), ext)
    if not make_thumb(instance, filename, newname, b):
        print('Could not create thumbnail - is the file type valid?')
        
    #return os.path.join('documents', newname)
    return os.path.join('',newname)

# Document               
class Document(models.Model):
    document_id = models.AutoField(primary_key=True)
      
    # Document states
    PRIVATE = 1
    PENDING = 2
    PUBLISHED = 3
    DOCUMENT_STATE = (
        (PRIVATE, 'private'),
        (PENDING, 'pending'),
        (PUBLISHED, 'published'),
    )
    document_state = models.IntegerField(choices=DOCUMENT_STATE, default=PRIVATE)
    
    # Document types
    documentType_id = models.ForeignKey(DocumentType, on_delete=models.CASCADE,)
    
    # Document fields
    title = models.CharField('title', max_length=200)
    storage_file_name = models.FileField('File name', upload_to=unique_file_path)
    thumb_storage = models.CharField(editable=False, max_length=200, blank=True, null=True)
    original_file_name = models.CharField(editable=False, max_length=200)
    computed_sha256 = models.CharField(editable=False, max_length=64)
    created = models.DateTimeField(editable=False)
    updated = models.DateTimeField(editable=False)
    
    class Meta:
        ordering = ['title']

    def __str__(self):
        return "%s" % (self.title)
        
    def save(self, *args, **kwargs):
        if not self.document_id:
            # Change created timestamp only if it's new entry
            self.created = timezone.now() 
        self.updated = timezone.now()
        super(Document, self).save(*args, **kwargs)

@receiver(models.signals.post_delete, sender=Document)
def auto_delete_file_on_delete(sender, instance, **kwargs):
    """
    Deletes file from filesystem
    when corresponding `Document` object is deleted.
    """
    if instance.storage_file_name:
        if os.path.isfile(instance.storage_file_name.path):
            os.remove(instance.storage_file_name.path)

@receiver(models.signals.pre_save, sender=Document)
def auto_delete_file_on_change(sender, instance, **kwargs):
    """
    Deletes old file from filesystem
    when corresponding `Document` object is updated
    with new file.
    """
    if not instance.document_id:
        return False
    
    try:
        old_storage_file_name = Document.objects.get(pk=instance.document_id).storage_file_name
        old_computed_sha256 = Document.objects.get(pk=instance.document_id).computed_sha256
    except Document.DoesNotExist:
        return False 
    
    new_computed_sha256 = instance.computed_sha256
    if old_computed_sha256 == new_computed_sha256:
        if os.path.isfile(old_storage_file_name.path):
            os.remove(old_storage_file_name.path)

Thanks for any suggestions on how to get this conversion to work through wand. I am not married to wand. If there is an easier way to make a thumbnail of the first page of a pdf file, please let me know!

Thanks!

Mark

--
You received this message because you are subscribed to the Google Groups "Django users" group.
To unsubscribe from this group and stop receiving emails from it, send an email to django-users+unsubscribe@googlegroups.com.
To post to this group, send email to django-users@googlegroups.com.
Visit this group at https://groups.google.com/group/django-users.
To view this discussion on the web visit https://groups.google.com/d/msgid/django-users/CAEqej2M-D7L9f_0Bk3gXaKw-xYEJvOkwbfZEfSYoSiPUeduqWQ%40mail.gmail.com.
For more options, visit https://groups.google.com/d/optout.

No comments:

Post a Comment