4

I want to change the label of a GeoTIFF array.

This is what I am doing

import numpy as np
from osgeo import gdal
def changeLabel(arr):
    arr[np.where( arr == 103 )] = 10        # class 1
    arr[np.where( arr == 33 )]  = 20        # class 2
    arr[np.where( arr == 65 )]  = 30        # class 3
    arr[np.where( arr == 1 )]   = 40        # class 4
    arr[np.where( arr == 9 )]   = 50        # class 5
    arr[np.where( arr == 17 )]  = 60        # class 6
    arr[np.where( arr == 129 )] =  0        # water / no data
    return arr

def distanceFile(inFile, outFile):
    driver = gdal.GetDriverByName('GTiff')
    file = gdal.Open(inFile)
    band = file.GetRasterBand(1)
    lista = band.ReadAsArray()
    lista = changeLabel(lista)

    # create new file
    file2 = driver.Create(outFile, file.RasterXSize , file.RasterYSize , 1)
    b = file2.GetRasterBand(1)
    b.SetNoDataValue(0)
    b.SetNoDataValue(1)
    b.WriteArray(lista)

    # spatial ref system
    proj = file.GetProjection()
    georef = file.GetGeoTransform()
    file2.SetProjection(proj)
    file2.SetGeoTransform(georef)
    file2 = None

inFile='inFile.tif'
outFile='outFile.tif'
## Run the function
distanceFile(inFile,outFile)

However while inFile has a size of 150 MB, the outFile size is 6.7 GB. How can I reduce the size of the produced file?

Vince
  • 20,017
  • 15
  • 45
  • 64
emax
  • 269
  • 4
  • 17

1 Answers1

6

You want to use a sensible data type (UInt8 should work for your data) and compress the output. You can do this with the Create statement, and use LZW (or DEFLATE) compression:

file2 = driver.Create(outFile, xsize=file.RasterXSize, 
                     ysize=file.RasterYSize, bands=1,
                     eType=gdal.GDT_Byte,
                     options=['COMPRESS=LZW']
                     )

See more details about compression options here.

Jose
  • 3,332
  • 20
  • 21