mirror of
https://github.com/qgis/QGIS.git
synced 2025-02-25 00:58:06 -05:00
464 lines
16 KiB
C++
464 lines
16 KiB
C++
/*****************************************************************************
|
|
* Copyright (c) 2020, Hobu, Inc. (info@hobu.co) *
|
|
* *
|
|
* All rights reserved. *
|
|
* *
|
|
* This program is free software; you can redistribute it and/or modify *
|
|
* it under the terms of the GNU General Public License as published by *
|
|
* the Free Software Foundation; either version 3 of the License, or *
|
|
* (at your option) any later version. *
|
|
* *
|
|
****************************************************************************/
|
|
|
|
|
|
#include "Epf.hpp"
|
|
#include "EpfTypes.hpp"
|
|
#include "FileProcessor.hpp"
|
|
#include "Reprocessor.hpp"
|
|
#include "Writer.hpp"
|
|
#include "../untwine/Common.hpp"
|
|
#include "../untwine/Las.hpp"
|
|
|
|
#include <unordered_set>
|
|
#include <filesystem>
|
|
|
|
#include <pdal/pdal_features.hpp>
|
|
#include <pdal/Dimension.hpp>
|
|
#include <pdal/Metadata.hpp>
|
|
#include <pdal/StageFactory.hpp>
|
|
#include <pdal/util/Algorithm.hpp>
|
|
#include <pdal/util/Bounds.hpp>
|
|
#include <pdal/util/FileUtils.hpp>
|
|
#include <pdal/util/ProgramArgs.hpp>
|
|
|
|
|
|
namespace
|
|
{
|
|
|
|
// PDAL's directoryList had a bug, so we've imported a working
|
|
// version here so that we can still use older PDAL releases.
|
|
|
|
#ifndef __APPLE_CC__
|
|
std::vector<std::string> directoryList(const std::string& dir)
|
|
{
|
|
namespace fs = std::filesystem;
|
|
|
|
std::vector<std::string> files;
|
|
|
|
try
|
|
{
|
|
fs::directory_iterator it(untwine::toNative(dir));
|
|
fs::directory_iterator end;
|
|
while (it != end)
|
|
{
|
|
files.push_back(untwine::fromNative(it->path()));
|
|
it++;
|
|
}
|
|
}
|
|
catch (fs::filesystem_error&)
|
|
{
|
|
files.clear();
|
|
}
|
|
return files;
|
|
}
|
|
#else
|
|
|
|
#include <dirent.h>
|
|
|
|
// Provide simple opendir/readdir solution for OSX because directory_iterator is
|
|
// not available until OSX 10.15
|
|
std::vector<std::string> directoryList(const std::string& dir)
|
|
{
|
|
|
|
DIR *dpdf;
|
|
struct dirent *epdf;
|
|
|
|
std::vector<std::string> files;
|
|
dpdf = opendir(dir.c_str());
|
|
if (dpdf != NULL){
|
|
while ((epdf = readdir(dpdf))){
|
|
files.push_back(untwine::fromNative(epdf->d_name));
|
|
}
|
|
}
|
|
closedir(dpdf);
|
|
|
|
return files;
|
|
|
|
}
|
|
#endif
|
|
|
|
} // unnamed namespace
|
|
|
|
namespace untwine
|
|
{
|
|
namespace epf
|
|
{
|
|
|
|
/// Epf
|
|
|
|
static_assert(MaxBuffers > NumFileProcessors, "MaxBuffers must be greater than NumFileProcessors.");
|
|
Epf::Epf(BaseInfo& common) : m_b(common), m_pool(NumFileProcessors)
|
|
{}
|
|
|
|
|
|
Epf::~Epf()
|
|
{}
|
|
|
|
|
|
void Epf::run(ProgressWriter& progress)
|
|
{
|
|
using namespace pdal;
|
|
|
|
BOX3D totalBounds;
|
|
|
|
if (pdal::FileUtils::fileExists(m_b.opts.tempDir + "/" + MetadataFilename))
|
|
throw FatalError("Output directory already contains EPT data.");
|
|
|
|
m_grid.setCubic(m_b.opts.doCube);
|
|
|
|
// Create the file infos. As each info is created, the N x N x N grid is expanded to
|
|
// hold all the points. If the number of points seems too large, N is expanded to N + 1.
|
|
// The correct N is often wrong, especially for some areas where things are more dense.
|
|
std::vector<FileInfo> fileInfos;
|
|
point_count_t totalPoints = createFileInfo(m_b.opts.inputFiles, m_b.opts.dimNames, fileInfos);
|
|
|
|
if (m_b.opts.level != -1)
|
|
m_grid.resetLevel(m_b.opts.level);
|
|
|
|
// This is just a debug thing that will allow the number of input files to be limited.
|
|
if (fileInfos.size() > m_b.opts.fileLimit)
|
|
fileInfos.resize(m_b.opts.fileLimit);
|
|
|
|
// Stick all the dimension names from each input file in a set.
|
|
std::unordered_set<std::string> allDimNames;
|
|
for (const FileInfo& fi : fileInfos)
|
|
for (const FileDimInfo& fdi : fi.dimInfo)
|
|
allDimNames.insert(fdi.name);
|
|
|
|
// Register the dimensions, either as the default type or double if we don't know
|
|
// what it is.
|
|
PointLayoutPtr layout(new PointLayout());
|
|
for (const std::string& dimName : allDimNames)
|
|
{
|
|
Dimension::Type type;
|
|
try
|
|
{
|
|
type = Dimension::defaultType(Dimension::id(dimName));
|
|
}
|
|
catch (pdal::pdal_error&)
|
|
{
|
|
type = Dimension::Type::Double;
|
|
}
|
|
layout->registerOrAssignDim(dimName, type);
|
|
}
|
|
layout->finalize();
|
|
|
|
// Fill in dim info now that the layout is finalized.
|
|
for (FileInfo& fi : fileInfos)
|
|
{
|
|
for (FileDimInfo& di : fi.dimInfo)
|
|
{
|
|
di.dim = layout->findDim(di.name);
|
|
di.type = layout->dimType(di.dim);
|
|
di.offset = layout->dimOffset(di.dim);
|
|
}
|
|
}
|
|
|
|
// Make a writer with NumWriters threads.
|
|
m_writer.reset(new Writer(m_b.opts.tempDir, NumWriters, layout->pointSize()));
|
|
|
|
// Sort file infos so the largest files come first. This helps to make sure we don't delay
|
|
// processing big files that take the longest (use threads more efficiently).
|
|
std::sort(fileInfos.begin(), fileInfos.end(), [](const FileInfo& f1, const FileInfo& f2)
|
|
{ return f1.numPoints > f2.numPoints; });
|
|
|
|
progress.setPointIncrementer(totalPoints, 40);
|
|
|
|
// Add the files to the processing pool
|
|
m_pool.trap(true, "Unknown error in FileProcessor");
|
|
for (const FileInfo& fi : fileInfos)
|
|
{
|
|
int pointSize = layout->pointSize();
|
|
m_pool.add([&fi, &progress, pointSize, this]()
|
|
{
|
|
FileProcessor fp(fi, pointSize, m_grid, m_writer.get(), progress);
|
|
fp.run();
|
|
});
|
|
}
|
|
|
|
// Wait for all the processors to finish and restart.
|
|
m_pool.join();
|
|
// Tell the writer that it can exit. stop() will block until the writer threads
|
|
// are finished. stop() will throw if an error occurred during writing.
|
|
m_writer->stop();
|
|
|
|
// If the FileProcessors had an error, throw.
|
|
std::vector<std::string> errors = m_pool.clearErrors();
|
|
if (errors.size())
|
|
throw FatalError(errors.front());
|
|
|
|
m_pool.go();
|
|
progress.setPercent(.4);
|
|
|
|
// Get totals from the current writer that are greater than the MaxPointsPerNode.
|
|
// Each of these voxels that is too large will be reprocessed.
|
|
//ABELL - would be nice to avoid this copy, but it probably doesn't matter much.
|
|
Totals totals = m_writer->totals(MaxPointsPerNode);
|
|
|
|
// Progress for reprocessing goes from .4 to .6.
|
|
progress.setPercent(.4);
|
|
progress.setIncrement(.2 / (std::max)((size_t)1, totals.size()));
|
|
|
|
// Make a new writer since we stopped the old one. Could restart, but why bother with
|
|
// extra code...
|
|
m_writer.reset(new Writer(m_b.opts.tempDir, 4, layout->pointSize()));
|
|
m_pool.trap(true, "Unknown error in Reprocessor");
|
|
for (auto& t : totals)
|
|
{
|
|
VoxelKey key = t.first;
|
|
int numPoints = t.second;
|
|
int pointSize = layout->pointSize();
|
|
std::string tempDir = m_b.opts.tempDir;
|
|
|
|
// Create a reprocessor thread. Note that the grid is copied by value and
|
|
// its level is re-calculated based on the number of points.
|
|
m_pool.add([&progress, key, numPoints, pointSize, tempDir, this]()
|
|
{
|
|
Reprocessor r(key, numPoints, pointSize, tempDir, m_grid, m_writer.get());
|
|
r.run();
|
|
progress.writeIncrement("Reprocessed voxel " + key.toString());
|
|
});
|
|
}
|
|
m_pool.stop();
|
|
// If the Reprocessors had an error, throw.
|
|
errors = m_pool.clearErrors();
|
|
if (errors.size())
|
|
throw FatalError(errors.front());
|
|
|
|
m_writer->stop();
|
|
|
|
fillMetadata(layout);
|
|
}
|
|
|
|
void Epf::fillMetadata(const pdal::PointLayoutPtr layout)
|
|
{
|
|
using namespace pdal;
|
|
|
|
// Info to be passed to sampler.
|
|
m_b.bounds = m_grid.processingBounds();
|
|
m_b.trueBounds = m_grid.conformingBounds();
|
|
if (m_srsFileInfo.valid())
|
|
m_b.srs = m_srsFileInfo.srs;
|
|
m_b.pointSize = 0;
|
|
|
|
// Set the pointFormatId based on whether or not colors exist in the file
|
|
if (layout->hasDim(Dimension::Id::Infrared))
|
|
m_b.pointFormatId = 8;
|
|
else if (layout->hasDim(Dimension::Id::Red) ||
|
|
layout->hasDim(Dimension::Id::Green) ||
|
|
layout->hasDim(Dimension::Id::Blue))
|
|
m_b.pointFormatId = 7;
|
|
else
|
|
m_b.pointFormatId = 6;
|
|
|
|
const Dimension::IdList& lasDims = pdrfDims(m_b.pointFormatId);
|
|
for (Dimension::Id id : layout->dims())
|
|
{
|
|
FileDimInfo di;
|
|
di.name = layout->dimName(id);
|
|
di.type = layout->dimType(id);
|
|
di.offset = layout->dimOffset(id);
|
|
di.dim = id;
|
|
di.extraDim = !Utils::contains(lasDims, id);
|
|
m_b.pointSize += pdal::Dimension::size(di.type);
|
|
m_b.dimInfo.push_back(di);
|
|
}
|
|
auto calcScale = [](double scale, double low, double high)
|
|
{
|
|
if (scale > 0)
|
|
return scale;
|
|
|
|
// 2 billion is a little less than the int limit. We center the data around 0 with the
|
|
// offset, so we're applying the scale to half the range of the data.
|
|
double val = high / 2 - low / 2;
|
|
double power = std::ceil(std::log10(val / 2000000000.0));
|
|
|
|
// Set an arbitrary limit on scale of 1e10-4.
|
|
return std::pow(10, (std::max)(power, -4.0));
|
|
};
|
|
|
|
m_b.scale[0] = calcScale(m_b.scale[0], m_b.trueBounds.minx, m_b.trueBounds.maxx);
|
|
m_b.scale[1] = calcScale(m_b.scale[1], m_b.trueBounds.miny, m_b.trueBounds.maxy);
|
|
m_b.scale[2] = calcScale(m_b.scale[2], m_b.trueBounds.minz, m_b.trueBounds.maxz);
|
|
|
|
// Find an offset such that (offset - min) / scale is close to an integer. This helps
|
|
// to eliminate warning messages in lasinfo that complain because of being unable
|
|
// to write nominal double values precisely using a 32-bit integer.
|
|
// The hope is also that raw input values are written as the same raw values
|
|
// on output. This may not be possible if the input files have different scaling or
|
|
// incompatible offsets.
|
|
auto calcOffset = [](double minval, double maxval, double scale)
|
|
{
|
|
double interval = maxval - minval;
|
|
double spacings = interval / scale; // Number of quantized values in our range.
|
|
double halfspacings = spacings / 2; // Half of that number.
|
|
double offset = (int32_t)halfspacings * scale; // Round to an int value and scale down.
|
|
return minval + offset; // Add the base (min) value.
|
|
};
|
|
|
|
m_b.offset[0] = calcOffset(m_b.trueBounds.minx, m_b.trueBounds.maxx, m_b.scale[0]);
|
|
m_b.offset[1] = calcOffset(m_b.trueBounds.miny, m_b.trueBounds.maxy, m_b.scale[1]);
|
|
m_b.offset[2] = calcOffset(m_b.trueBounds.minz, m_b.trueBounds.maxz, m_b.scale[2]);
|
|
}
|
|
|
|
PointCount Epf::createFileInfo(const StringList& input, StringList dimNames,
|
|
std::vector<FileInfo>& fileInfos)
|
|
{
|
|
using namespace pdal;
|
|
|
|
std::vector<FileInfo> tempFileInfos;
|
|
std::vector<std::string> filenames;
|
|
PointCount totalPoints = 0;
|
|
|
|
// If there are some dim names specified, make sure they contain X, Y and Z and that
|
|
// they're all uppercase.
|
|
if (!dimNames.empty())
|
|
{
|
|
for (std::string& d : dimNames)
|
|
d = Utils::toupper(d);
|
|
for (const std::string xyz : { "X", "Y", "Z" })
|
|
if (!Utils::contains(dimNames, xyz))
|
|
dimNames.push_back(xyz);
|
|
}
|
|
|
|
// If any of the specified input files is a directory, get the names of the files
|
|
// in the directory and add them.
|
|
for (const std::string& filename : input)
|
|
{
|
|
if (FileUtils::isDirectory(filename))
|
|
{
|
|
std::vector<std::string> dirfiles = directoryList(filename);
|
|
filenames.insert(filenames.end(), dirfiles.begin(), dirfiles.end());
|
|
}
|
|
else
|
|
filenames.push_back(filename);
|
|
}
|
|
|
|
std::vector<double> xOffsets;
|
|
std::vector<double> yOffsets;
|
|
std::vector<double> zOffsets;
|
|
|
|
// Determine a driver for each file and get a preview of the file. If we couldn't
|
|
// Create a FileInfo object containing the file bounds, dimensions, filename and
|
|
// associated driver. Expand our grid by the bounds and file point count.
|
|
for (std::string& filename : filenames)
|
|
{
|
|
StageFactory factory;
|
|
std::string driver = factory.inferReaderDriver(filename);
|
|
if (driver.empty())
|
|
throw FatalError("Can't infer reader for '" + filename + "'.");
|
|
Stage *s = factory.createStage(driver);
|
|
pdal::Options opts;
|
|
opts.add("filename", filename);
|
|
s->setOptions(opts);
|
|
|
|
QuickInfo qi = s->preview();
|
|
|
|
if (!qi.valid())
|
|
throw FatalError("Couldn't get quick info for '" + filename + "'.");
|
|
|
|
// Get scale values from the reader if they exist.
|
|
pdal::MetadataNode root = s->getMetadata();
|
|
pdal::MetadataNode m = root.findChild("scale_x");
|
|
if (m.valid())
|
|
m_b.scale[0] = (std::max)(m_b.scale[0], m.value<double>());
|
|
m = root.findChild("scale_y");
|
|
if (m.valid())
|
|
m_b.scale[1] = (std::max)(m_b.scale[1], m.value<double>());
|
|
m = root.findChild("scale_z");
|
|
if (m.valid())
|
|
m_b.scale[2] = (std::max)(m_b.scale[2], m.value<double>());
|
|
m = root.findChild("offset_x");
|
|
if (m.valid())
|
|
xOffsets.push_back(m.value<double>());
|
|
m = root.findChild("offset_y");
|
|
if (m.valid())
|
|
yOffsets.push_back(m.value<double>());
|
|
m = root.findChild("offset_z");
|
|
if (m.valid())
|
|
zOffsets.push_back(m.value<double>());
|
|
|
|
FileInfo fi;
|
|
fi.bounds = qi.m_bounds;
|
|
fi.numPoints = qi.m_pointCount;
|
|
fi.filename = filename;
|
|
fi.driver = driver;
|
|
|
|
// Accept dimension names if there are no limits or this name is in the list
|
|
// of desired dimensions.
|
|
for (const std::string& name : qi.m_dimNames)
|
|
if (dimNames.empty() || Utils::contains(dimNames, Utils::toupper(name)))
|
|
fi.dimInfo.push_back(FileDimInfo(name));
|
|
|
|
if (m_srsFileInfo.valid() && m_srsFileInfo.srs != qi.m_srs)
|
|
std::cerr << "Files have mismatched SRS values. Using SRS from '" <<
|
|
m_srsFileInfo.filename << "'.\n";
|
|
fi.srs = qi.m_srs;
|
|
tempFileInfos.push_back(fi);
|
|
if (!m_srsFileInfo.valid() && qi.m_srs.valid())
|
|
m_srsFileInfo = fi;
|
|
|
|
m_grid.expand(qi.m_bounds, qi.m_pointCount);
|
|
totalPoints += fi.numPoints;
|
|
}
|
|
|
|
// If we had an offset from the input, choose one in the middle of the list of offsets.
|
|
if (xOffsets.size())
|
|
{
|
|
std::sort(xOffsets.begin(), xOffsets.end());
|
|
m_b.offset[0] = xOffsets[xOffsets.size() / 2];
|
|
}
|
|
if (yOffsets.size())
|
|
{
|
|
std::sort(yOffsets.begin(), yOffsets.end());
|
|
m_b.offset[1] = yOffsets[yOffsets.size() / 2];
|
|
}
|
|
if (zOffsets.size())
|
|
{
|
|
std::sort(zOffsets.begin(), zOffsets.end());
|
|
m_b.offset[2] = zOffsets[zOffsets.size() / 2];
|
|
}
|
|
|
|
// If we have LAS start capability, break apart file infos into chunks of size 5 million.
|
|
#ifdef PDAL_LAS_START
|
|
PointCount ChunkSize = 5'000'000;
|
|
for (const FileInfo& fi : tempFileInfos)
|
|
{
|
|
if (fi.driver != "readers.las" || fi.numPoints < ChunkSize)
|
|
{
|
|
fileInfos.push_back(fi);
|
|
continue;
|
|
}
|
|
PointCount remaining = fi.numPoints;
|
|
pdal::PointId start = 0;
|
|
while (remaining)
|
|
{
|
|
FileInfo lasFi(fi);
|
|
lasFi.numPoints = (std::min)(ChunkSize, remaining);
|
|
lasFi.start = start;
|
|
fileInfos.push_back(lasFi);
|
|
|
|
start += ChunkSize;
|
|
remaining -= lasFi.numPoints;
|
|
}
|
|
}
|
|
#else
|
|
fileInfos = std::move(tempFileInfos);
|
|
#endif
|
|
|
|
return totalPoints;
|
|
}
|
|
|
|
} // namespace epf
|
|
} // namespace untwine
|