Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 24 additions & 5 deletions lib/mindee/input/sources/local_input_source.rb
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ module Source
ALLOWED_MIME_TYPES = [
'application/pdf',
'image/heic',
'image/heif',
'image/png',
'image/jpeg',
'image/tiff',
Expand All @@ -37,11 +38,7 @@ class LocalInputSource
def initialize(io_stream, filename, repair_pdf: false)
@io_stream = io_stream
@filename = filename
@file_mimetype = if repair_pdf
Marcel::MimeType.for @io_stream
else
Marcel::MimeType.for @io_stream, name: @filename
end
@file_mimetype = detect_mime_type(repair_pdf)
if ALLOWED_MIME_TYPES.include? @file_mimetype
logger.debug("Loaded new input #{@filename} from #{self.class}")
return
Expand Down Expand Up @@ -197,6 +194,28 @@ def source_text?

Mindee::PDF::PDFTools.source_text?(@io_stream)
end

private

# Checks the mimetype for the file. If it is a PDF, it will attempt to repair it if repair_pdf is true.
# @param repair_pdf [bool] Whether to attempt to repair the PDF.
# @return [String] The mimetype of the file.
def detect_mime_type(repair_pdf)
return Marcel::MimeType.for(@io_stream) if repair_pdf

heif_mimetype_from_extension || Marcel::MimeType.for(@io_stream, name: @filename)
end

# Checks the file extension for a HEIF mimetype.
# @return [String, nil] The mimetype if found, nil otherwise.
def heif_mimetype_from_extension
case File.extname(@filename.to_s).downcase
when '.heic'
'image/heic'
when '.heif'
'image/heif'
end
end
end

# Replaces non-ASCII characters by their UNICODE escape sequence.
Expand Down
6 changes: 6 additions & 0 deletions sig/mindee/input/sources/local_input_source.rbs
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,12 @@ module Mindee
def write_to_file: (String?) -> void
def compress!: (?quality: Integer, ?max_width: Integer?, ?max_height: Integer?, ?force_source_text: bool, ?disable_source_text: bool) -> Integer
def source_text?: -> bool?

private

def detect_mime_type: (bool) -> String

def heif_mimetype_from_extension: -> String?
end
def self.convert_to_unicode_escape: (String) -> String
end
Expand Down
2 changes: 1 addition & 1 deletion spec/data
18 changes: 18 additions & 0 deletions spec/input/sources/sources_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,24 @@
expect(input_source.page_count).to eq(1)
expect(input_source.pdf?).to eq(false)
end

it 'should load a HEIC from a path', :all_deps do
input_source = Mindee::Input::Source::PathInputSource.new(
File.join(FILE_TYPES_DIR, 'receipt.heif')
)
expect(input_source.file_mimetype).to eq('image/heif')
expect(input_source.filename).to eq('receipt.heif')
expect(input_source.page_count).to eq(1)
expect(input_source.pdf?).to eq(false)

input_source2 = Mindee::Input::Source::PathInputSource.new(
File.join(FILE_TYPES_DIR, 'receipt.jpg.heif')
)
expect(input_source2.file_mimetype).to eq('image/heif')
expect(input_source2.filename).to eq('receipt.jpg.heif')
expect(input_source2.page_count).to eq(1)
expect(input_source2.pdf?).to eq(false)
end
end

context 'A PDF input file', :all_deps do
Expand Down
4 changes: 2 additions & 2 deletions spec/v2/file_operations/crop_operation_integration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def check_findoc_return(findoc_response)

extracted_images.save_all_to_disk(OUTPUT_DIR)

expect(File.size(File.join(OUTPUT_DIR, 'crop_001.jpg'))).to be_between(560_000, 675_000)
expect(File.size(File.join(OUTPUT_DIR, 'crop_002.jpg'))).to be_between(580_000, 680_000)
expect(File.size(File.join(OUTPUT_DIR, 'crop_001.jpg'))).to be_between(560_000, 700_000)
expect(File.size(File.join(OUTPUT_DIR, 'crop_002.jpg'))).to be_between(580_000, 700_000)
end
end
Loading