Get Image Size Without Downloading It In Python
Solution 1:
I found the solution on this site to work well:
import urllib
import ImageFile
def getsizes(uri):
# get file size *and* image size (None if not known)
file = urllib.urlopen(uri)
size = file.headers.get("content-length")
if size: size = int(size)
p = ImageFile.Parser()
while1:
data = file.read(1024)
if not data:
break
p.feed(data)
if p.image:
return size, p.image.size
break
file.close()
return size, None
print getsizes("http://www.pythonware.com/images/small-yoyo.gif")
# (10965, (179, 188))
Solution 2:
This is based on ed's answer mixed with other things I found on the web. I ran into the same issue as grotos with .read(24). Download getimageinfo.py from here and download ReSeekFile.py from here.
importurllib2imgdata= urllib2.urlopen(href)
image_type,width,height = getimageinfo.getImageInfo(imgdata)
Modify getimageinfo as such...
import ReseekFile
defgetImageInfo(datastream):
datastream = ReseekFile.ReseekFile(datastream)
data = str(datastream.read(30))
#Skipping to jpeg# handle JPEGselif (size >= 2) and data.startswith('\377\330'):
content_type = 'image/jpeg'
datastream.seek(0)
datastream.read(2)
b = datastream.read(1)
try:
while (b andord(b) != 0xDA):
while (ord(b) != 0xFF): b = datastream.read(1)
while (ord(b) == 0xFF): b = datastream.read(1)
if (ord(b) >= 0xC0andord(b) <= 0xC3):
datastream.read(3)
h, w = struct.unpack(">HH", datastream.read(4))
breakelse:
datastream.read(int(struct.unpack(">H", datastream.read(2))[0])-2)
b = datastream.read(1)
width = int(w)
height = int(h)
except struct.error:
passexcept ValueError:
pass
Solution 3:
This is just a Python 3+ adaptation of an earlier answer here.
from urllib import request as ulreq
from PIL import ImageFile
def getsizes(uri):
# get file size *and* image size (None if not known)
file = ulreq.urlopen(uri)
size = file.headers.get("content-length")
if size:
size = int(size)
p = ImageFile.Parser()
while True:
data = file.read(1024)
if not data:
break
p.feed(data)
if p.image:
return size, p.image.size
break
file.close()
return(size, None)
Solution 4:
If you're willing to download the first 24 bytes of each file, then this function (mentioned in johnteslade's answer to the question you mention) will work out the dimensions.
That's probably the least downloading necessary to do the job you want.
importurllib2start= urllib2.urlopen(image_url).read(24)
Edit (1):
In the case of jpeg files it seems to need more bytes. You could edit the function so that instead of reading a StringIO.StringIO(data) it instead reads the file handle from urlopen. Then it will read exactly as much of the image as it needs to find out the width and height.
Solution 5:
Since getimageinfo.py mentioned above doesn't work in Python3. Pillow is used instead of it.
Pillow can be found in pypi, or installed by using pip: pip install pillow
.
from io import BytesIO from PIL import Image import requests hrefs = ['https://farm4.staticflickr.com/3894/15008518202_b016d7d289_m.jpg','https://farm4.staticflickr.com/3920/15008465772_383e697089_m.jpg','https://farm4.staticflickr.com/3902/14985871946_86abb8c56f_m.jpg'] RANGE = 5000 for href in hrefs: req = requests.get(href,headers={'User-Agent':'Mozilla5.0(Google spider)','Range':'bytes=0-{}'.format(RANGE)}) im = Image.open(BytesIO(req.content)) print(im.size)
Post a Comment for "Get Image Size Without Downloading It In Python"