diff --git a/external/pdal_wrench/alg.cpp b/external/pdal_wrench/alg.cpp index 1fe15bd9d9d..5f6aa45eabd 100644 --- a/external/pdal_wrench/alg.cpp +++ b/external/pdal_wrench/alg.cpp @@ -18,6 +18,7 @@ #include #include +#include using namespace pdal; @@ -79,7 +80,7 @@ bool runAlg(std::vector args, Alg &alg) bool Alg::parseArgs(std::vector args) -{ +{ pdal::Arg* argInput = nullptr; if (hasSingleInput) { @@ -87,6 +88,7 @@ bool Alg::parseArgs(std::vector args) } (void)programArgs.add("filter,f", "Filter expression for input data", filterExpression); + (void)programArgs.add("bounds", "Filter by rectangle", filterBounds); addArgs(); // impl in derived @@ -112,6 +114,19 @@ bool Alg::parseArgs(std::vector args) return false; } + if (!filterBounds.empty()) + { + try + { + parseBounds(filterBounds); + } + catch (pdal::Bounds::error err) + { + std::cerr << "invalid bounds: " << err.what() << std::endl; + return false; + } + } + if (!checkArgs()) // impl in derived class return false; diff --git a/external/pdal_wrench/alg.hpp b/external/pdal_wrench/alg.hpp index fcdfa9c7fa8..808c7c17512 100644 --- a/external/pdal_wrench/alg.hpp +++ b/external/pdal_wrench/alg.hpp @@ -42,6 +42,8 @@ struct Alg std::string filterExpression; // optional argument to limit input points + std::string filterBounds; // optional clipping rectangle for input (pdal::Bounds) + bool needsSingleCrs = true; // most algs assume that all input files in VPC are in the same CRS, // and only few exceptions (e.g. info) tolerate mixture of multiple CRS diff --git a/external/pdal_wrench/boundary.cpp b/external/pdal_wrench/boundary.cpp index fad8fbc69b1..0a6b80a90ab 100644 --- a/external/pdal_wrench/boundary.cpp +++ b/external/pdal_wrench/boundary.cpp @@ -68,6 +68,32 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, double r Stage& r = manager->makeReader(tile->inputFilenames[0], ""); + Stage *last = &r; + + // filtering + if (!tile->filterBounds.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("bounds", tile->filterBounds)); + + if (readerSupportsBounds(r)) + { + // Reader of the format can do the filtering - use that whenever possible! + r.addOptions(filter_opts); + } + else + { + // Reader can't do the filtering - do it with a filter + last = &manager->makeFilter( "filters.crop", *last, filter_opts); + } + } + if (!tile->filterExpression.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + last = &manager->makeFilter( "filters.expression", *last, filter_opts); + } + // TODO: what edge size? (by default samples 5000 points if not specified // TODO: set threshold ? (default at least 16 points to keep the cell) // btw. if threshold=0, there are still missing points because of simplification (smooth=True) @@ -78,13 +104,8 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, double r hexbin_opts.add(pdal::Option("edge_size", resolution)); } hexbin_opts.add(pdal::Option("threshold", pointsThreshold)); + (void)manager->makeFilter( "filters.hexbin", *last, hexbin_opts ); - if (!tile->filterExpression.empty()) - { - hexbin_opts.add(pdal::Option("where", tile->filterExpression)); - } - - (void)manager->makeFilter( "filters.hexbin", r, hexbin_opts ); return manager; } @@ -99,14 +120,14 @@ void Boundary::preparePipelines(std::vector>& p for (const VirtualPointCloud::File& f : vpc.files) { - ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(f.filename); pipelines.push_back(pipeline(&tile, resolution, pointsThreshold)); } } else { - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); pipelines.push_back(pipeline(&tile, resolution, pointsThreshold)); } diff --git a/external/pdal_wrench/clip.cpp b/external/pdal_wrench/clip.cpp index 8d9fb0a7cf8..39c07fdf188 100644 --- a/external/pdal_wrench/clip.cpp +++ b/external/pdal_wrench/clip.cpp @@ -118,21 +118,38 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, const pd Stage& r = manager->makeReader( tile->inputFilenames[0], ""); - Stage& f = manager->makeFilter( "filters.crop", r, crop_opts ); + Stage *last = &r; - pdal::Options writer_opts; - writer_opts.add(pdal::Option("forward", "all")); - - Stage& w = manager->makeWriter( tile->outputFilename, "", f, writer_opts); + // filtering + if (!tile->filterBounds.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("bounds", tile->filterBounds)); + if (readerSupportsBounds(r)) + { + // Reader of the format can do the filtering - use that whenever possible! + r.addOptions(filter_opts); + } + else + { + // Reader can't do the filtering - do it with a filter + last = &manager->makeFilter( "filters.crop", *last, filter_opts); + } + } if (!tile->filterExpression.empty()) { Options filter_opts; - filter_opts.add(pdal::Option("where", tile->filterExpression)); - f.addOptions(filter_opts); - w.addOptions(filter_opts); + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + last = &manager->makeFilter( "filters.expression", *last, filter_opts); } + last = &manager->makeFilter( "filters.crop", *last, crop_opts ); + + pdal::Options writer_opts; + writer_opts.add(pdal::Option("forward", "all")); + manager->makeWriter( tile->outputFilename, "", *last, writer_opts); + return manager; } @@ -175,7 +192,7 @@ void Clip::preparePipelines(std::vector>& pipel std::cout << "using " << f.filename << std::endl; } - ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(f.filename); // for input file /x/y/z.las that goes to /tmp/hello.vpc, @@ -190,7 +207,7 @@ void Clip::preparePipelines(std::vector>& pipel } else { - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); tile.outputFilename = outputFile; pipelines.push_back(pipeline(&tile, crop_opts)); diff --git a/external/pdal_wrench/density.cpp b/external/pdal_wrench/density.cpp index 6dd5407edb2..8e70e807ce7 100644 --- a/external/pdal_wrench/density.cpp +++ b/external/pdal_wrench/density.cpp @@ -73,20 +73,53 @@ std::unique_ptr Density::pipeline(ParallelJobInfo *tile) const readers.push_back(&manager->makeReader(f, "")); } - if (tile->mode == ParallelJobInfo::Spatial) + std::vector last = readers; + + // find out what will be the bounding box for this job + // (there could be also no bbox if there's no "bounds" filter and no tiling) + BOX2D filterBox = !tile->filterBounds.empty() ? parseBounds(tile->filterBounds).to2d() : BOX2D(); + BOX2D box = intersectTileBoxWithFilterBox(tile->box, filterBox); + + if (box.valid()) { + // We are going to do filtering of points based on 2D box. Ideally we want to do + // the filtering in the reader (if the reader can do it efficiently like copc/ept), + // otherwise we have to add filters.crop stage to filter points after they were read + for (Stage* reader : readers) { - // with COPC files, we can also specify bounds at the reader - // that will only read the required parts of the file - if (reader->getName() == "readers.copc") + if (readerSupportsBounds(*reader)) { + // add "bounds" option to reader pdal::Options copc_opts; copc_opts.add(pdal::Option("threads", 1)); - copc_opts.add(pdal::Option("bounds", box_to_pdal_bounds(tile->box))); + copc_opts.add(pdal::Option("bounds", box_to_pdal_bounds(box))); reader->addOptions(copc_opts); } } + + if (!allReadersSupportBounds(readers) && !tile->filterBounds.empty()) + { + // At least some readers can't do the filtering - do it with a filter + Options filter_opts; + filter_opts.add(pdal::Option("bounds", tile->filterBounds)); + Stage *filterCrop = &manager->makeFilter( "filters.crop", filter_opts); + for (Stage *s : last) + filterCrop->setInput(*s); + last.clear(); + last.push_back(filterCrop); + } + } + + if (!tile->filterExpression.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + Stage *filterExpr = &manager->makeFilter( "filters.expression", filter_opts); + for (Stage *s : last) + filterExpr->setInput(*s); + last.clear(); + last.push_back(filterExpr); } pdal::Options writer_opts; @@ -98,9 +131,9 @@ std::unique_ptr Density::pipeline(ParallelJobInfo *tile) const writer_opts.add(pdal::Option("gdalopts", "TILED=YES")); writer_opts.add(pdal::Option("gdalopts", "COMPRESS=DEFLATE")); - if (tile->box.valid()) + if (box.valid()) { - BOX2D box2 = tile->box; + BOX2D box2 = box; // fix tile size - PDAL's writers.gdal adds one pixel (see GDALWriter::createGrid()), // because it probably expects that that the bounds and resolution do not perfectly match box2.maxx -= resolution; @@ -109,20 +142,13 @@ std::unique_ptr Density::pipeline(ParallelJobInfo *tile) const writer_opts.add(pdal::Option("bounds", box_to_pdal_bounds(box2))); } - if (!tile->filterExpression.empty()) - { - writer_opts.add(pdal::Option("where", tile->filterExpression)); - } - // TODO: "writers.gdal: Requested driver 'COG' does not support file creation."" // writer_opts.add(pdal::Option("gdaldriver", "COG")); pdal::StageCreationOptions opts{ tile->outputFilename, "", nullptr, writer_opts, "" }; Stage& w = manager->makeWriter( opts ); - for (Stage *stage : readers) - { - w.setInput(*stage); // connect all readers to the writer - } + for (Stage *stage : last) + w.setInput(*stage); return manager; } @@ -172,7 +198,14 @@ void Density::preparePipelines(std::vector>& pi // to avoid empty areas in resulting rasters tileBox.clip(gridBounds); - ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression); + if (!filterBounds.empty() && !intersectionBox2D(tileBox, parseBounds(filterBounds).to2d()).valid()) + { + if (verbose) + std::cout << "skipping tile " << iy << " " << ix << " -- " << tileBox.toBox() << std::endl; + continue; + } + + ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression, filterBounds); for (const VirtualPointCloud::File & f: vpc.overlappingBox2D(tileBox)) { tile.inputFilenames.push_back(f.filename); @@ -226,7 +259,14 @@ void Density::preparePipelines(std::vector>& pi { BOX2D tileBox = t.boxAt(ix, iy); - ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression); + if (!filterBounds.empty() && !intersectionBox2D(tileBox, parseBounds(filterBounds).to2d()).valid()) + { + if (verbose) + std::cout << "skipping tile " << iy << " " << ix << " -- " << tileBox.toBox() << std::endl; + continue; + } + + ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); // create temp output file names @@ -245,7 +285,7 @@ void Density::preparePipelines(std::vector>& pi { // single input LAS/LAZ - no parallelism - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); tile.outputFilename = outputFile; pipelines.push_back(pipeline(&tile)); @@ -256,8 +296,13 @@ void Density::preparePipelines(std::vector>& pi void Density::finalize(std::vector>& pipelines) { - if (pipelines.size() > 1) + if (!tileOutputFiles.empty()) { rasterTilesToCog(tileOutputFiles, outputFile); + + // clean up the temporary directory + fs::path outputParentDir = fs::path(outputFile).parent_path(); + fs::path outputSubdir = outputParentDir / fs::path(outputFile).stem(); + fs::remove_all(outputSubdir); } } diff --git a/external/pdal_wrench/merge.cpp b/external/pdal_wrench/merge.cpp index c7f3b5fda12..18e9c6541c9 100644 --- a/external/pdal_wrench/merge.cpp +++ b/external/pdal_wrench/merge.cpp @@ -55,27 +55,59 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile) std::unique_ptr manager( new PipelineManager ); - pdal::Options options; - options.add(pdal::Option("forward", "all")); - - if (!tile->filterExpression.empty()) - { - options.add(pdal::Option("where", tile->filterExpression)); - } - - Stage& w = manager->makeWriter(tile->outputFilename, "", options); - + std::vector readers; for (const std::string& f : tile->inputFilenames) { - w.setInput(manager->makeReader(f, "")); + readers.push_back(&manager->makeReader(f, "")); } + std::vector last = readers; + + // filtering + if (!tile->filterBounds.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("bounds", tile->filterBounds)); + + if (allReadersSupportBounds(readers)) + { + // Reader of the format can do the filtering - use that whenever possible! + for (Stage *r : readers) + r->addOptions(filter_opts); + } + else + { + // Reader can't do the filtering - do it with a filter + Stage *filterCrop = &manager->makeFilter( "filters.crop", filter_opts); + for (Stage *s : last) + filterCrop->setInput(*s); + last.clear(); + last.push_back(filterCrop); + } + } + if (!tile->filterExpression.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + Stage *filterExpr = &manager->makeFilter( "filters.expression", filter_opts); + for (Stage *s : last) + filterExpr->setInput(*s); + last.clear(); + last.push_back(filterExpr); + } + + pdal::Options options; + options.add(pdal::Option("forward", "all")); + Stage* writer = &manager->makeWriter(tile->outputFilename, "", options); + for (Stage *s : last) + writer->setInput(*s); + return manager; } void Merge::preparePipelines(std::vector>& pipelines, const BOX3D &, point_count_t &totalPoints) { - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames = inputFiles; tile.outputFilename = outputFile; diff --git a/external/pdal_wrench/thin.cpp b/external/pdal_wrench/thin.cpp index eef9e0e917e..4afd500f2ec 100644 --- a/external/pdal_wrench/thin.cpp +++ b/external/pdal_wrench/thin.cpp @@ -99,33 +99,48 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, std::str Stage& r = manager->makeReader( tile->inputFilenames[0], ""); - Stage *filterPtr = nullptr; + Stage *last = &r; + + // filtering + if (!tile->filterBounds.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("bounds", tile->filterBounds)); + + if (readerSupportsBounds(r)) + { + // Reader of the format can do the filtering - use that whenever possible! + r.addOptions(filter_opts); + } + else + { + // Reader can't do the filtering - do it with a filter + last = &manager->makeFilter( "filters.crop", *last, filter_opts); + } + } + if (!tile->filterExpression.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + last = &manager->makeFilter( "filters.expression", *last, filter_opts); + } if (mode == "every-nth") { pdal::Options decim_opts; decim_opts.add(pdal::Option("step", stepEveryN)); - filterPtr = &manager->makeFilter( "filters.decimation", r, decim_opts ); + last = &manager->makeFilter( "filters.decimation", *last, decim_opts ); } else if (mode == "sample") { pdal::Options sample_opts; sample_opts.add(pdal::Option("cell", stepSample)); - filterPtr = &manager->makeFilter( "filters.sample", r, sample_opts ); + last = &manager->makeFilter( "filters.sample", *last, sample_opts ); } pdal::Options writer_opts; writer_opts.add(pdal::Option("forward", "all")); // TODO: maybe we could use lower scale than the original - - Stage& w = manager->makeWriter( tile->outputFilename, "", *filterPtr, writer_opts); - - if (!tile->filterExpression.empty()) - { - Options filter_opts; - filter_opts.add(pdal::Option("where", tile->filterExpression)); - filterPtr->addOptions(filter_opts); - w.addOptions(filter_opts); - } + manager->makeWriter( tile->outputFilename, "", *last, writer_opts); return manager; } @@ -153,7 +168,7 @@ void Thin::preparePipelines(std::vector>& pipel for (const VirtualPointCloud::File& f : vpc.files) { - ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(f.filename); // for input file /x/y/z.las that goes to /tmp/hello.vpc, @@ -168,7 +183,7 @@ void Thin::preparePipelines(std::vector>& pipel } else { - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); tile.outputFilename = outputFile; pipelines.push_back(pipeline(&tile, mode, stepEveryN, stepSample)); diff --git a/external/pdal_wrench/tile/tile.cpp b/external/pdal_wrench/tile/tile.cpp index 1e9578373eb..6700a9eec44 100644 --- a/external/pdal_wrench/tile/tile.cpp +++ b/external/pdal_wrench/tile/tile.cpp @@ -684,7 +684,7 @@ static void tilingPass2(BaseInfo &m_b, TileGrid &m_grid, FileInfo &m_srsFileInfo { size_t nPointsToRead = std::min(readChunkSize, ptCnt-i); fileReader.read(contentPtr, nPointsToRead*m_b.pointSize); - for (size_t a = 0; a < nPointsToRead; ++a) + for (std::size_t a = 0; a < nPointsToRead; ++a) { char *base = contentPtr + a * m_b.pointSize; for (const untwine::FileDimInfo& fdi : dims) diff --git a/external/pdal_wrench/to_raster.cpp b/external/pdal_wrench/to_raster.cpp index ba48b727c90..ba1e8a7d5a0 100644 --- a/external/pdal_wrench/to_raster.cpp +++ b/external/pdal_wrench/to_raster.cpp @@ -74,7 +74,9 @@ bool ToRaster::checkArgs() } -static std::unique_ptr pipeline(ParallelJobInfo *tile, double resolution, std::string attribute) + + +static std::unique_ptr pipeline(ParallelJobInfo *tile, double resolution, std::string attribute, double collarSize) { std::unique_ptr manager( new PipelineManager ); @@ -84,20 +86,64 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, double r readers.push_back(&manager->makeReader(f, "")); } - if (tile->mode == ParallelJobInfo::Spatial) + std::vector last = readers; + + // find out what will be the bounding box for this job + // (there could be also no bbox if there's no "bounds" filter and no tiling) + BOX2D filterBox = !tile->filterBounds.empty() ? parseBounds(tile->filterBounds).to2d() : BOX2D(); + BOX2D box = intersectTileBoxWithFilterBox(tile->box, filterBox); + BOX2D boxWithCollar; + + // box with collar is used for filtering of data on the input + // bow without collar is used for output bounds + + if (box.valid()) { + BOX2D filterBoxWithCollar = filterBox; + if (filterBoxWithCollar.valid()) + filterBoxWithCollar.grow(collarSize); + boxWithCollar = tile->box; + boxWithCollar.grow(collarSize); + boxWithCollar = intersectTileBoxWithFilterBox(boxWithCollar, filterBoxWithCollar); + + // We are going to do filtering of points based on 2D box. Ideally we want to do + // the filtering in the reader (if the reader can do it efficiently like copc/ept), + // otherwise we have to add filters.crop stage to filter points after they were read + for (Stage* reader : readers) { - // with COPC files, we can also specify bounds at the reader - // that will only read the required parts of the file - if (reader->getName() == "readers.copc") + if (readerSupportsBounds(*reader)) { + // add "bounds" option to reader pdal::Options copc_opts; copc_opts.add(pdal::Option("threads", 1)); - copc_opts.add(pdal::Option("bounds", box_to_pdal_bounds(tile->boxWithCollar))); + copc_opts.add(pdal::Option("bounds", box_to_pdal_bounds(boxWithCollar))); reader->addOptions(copc_opts); } } + + if (!allReadersSupportBounds(readers) && !tile->filterBounds.empty()) + { + // At least some readers can't do the filtering - do it with a filter + Options filter_opts; + filter_opts.add(pdal::Option("bounds", box_to_pdal_bounds(filterBoxWithCollar))); + Stage *filterCrop = &manager->makeFilter( "filters.crop", filter_opts); + for (Stage *s : last) + filterCrop->setInput(*s); + last.clear(); + last.push_back(filterCrop); + } + } + + if (!tile->filterExpression.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + Stage *filterExpr = &manager->makeFilter( "filters.expression", filter_opts); + for (Stage *s : last) + filterExpr->setInput(*s); + last.clear(); + last.push_back(filterExpr); } pdal::Options writer_opts; @@ -109,9 +155,9 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, double r writer_opts.add(pdal::Option("gdalopts", "TILED=YES")); writer_opts.add(pdal::Option("gdalopts", "COMPRESS=DEFLATE")); - if (tile->box.valid()) + if (box.valid()) { - BOX2D box2 = tile->box; + BOX2D box2 = box; // fix tile size - PDAL's writers.gdal adds one pixel (see GDALWriter::createGrid()), // because it probably expects that that the bounds and resolution do not perfectly match box2.maxx -= resolution; @@ -120,20 +166,13 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, double r writer_opts.add(pdal::Option("bounds", box_to_pdal_bounds(box2))); } - if (!tile->filterExpression.empty()) - { - writer_opts.add(pdal::Option("where", tile->filterExpression)); - } - // TODO: "writers.gdal: Requested driver 'COG' does not support file creation."" // writer_opts.add(pdal::Option("gdaldriver", "COG")); pdal::StageCreationOptions opts{ tile->outputFilename, "", nullptr, writer_opts, "" }; Stage& w = manager->makeWriter( opts ); - for (Stage *stage : readers) - { - w.setInput(*stage); // connect all readers to the writer - } + for (Stage *stage : last) + w.setInput(*stage); return manager; } @@ -184,13 +223,20 @@ void ToRaster::preparePipelines(std::vector>& p // to avoid empty areas in resulting rasters tileBox.clip(gridBounds); - ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression); + if (!filterBounds.empty() && !intersectionBox2D(tileBox, parseBounds(filterBounds).to2d()).valid()) + { + if (verbose) + std::cout << "skipping tile " << iy << " " << ix << " -- " << tileBox.toBox() << std::endl; + continue; + } + + ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression, filterBounds); // add collar to avoid edge effects - tile.boxWithCollar = tileBox; - tile.boxWithCollar.grow(collarSize); + BOX2D boxWithCollar = tileBox; + boxWithCollar.grow(collarSize); - for (const VirtualPointCloud::File & f: vpc.overlappingBox2D(tile.boxWithCollar)) + for (const VirtualPointCloud::File & f: vpc.overlappingBox2D(boxWithCollar)) { tile.inputFilenames.push_back(f.filename); totalPoints += f.count; @@ -206,7 +252,7 @@ void ToRaster::preparePipelines(std::vector>& p tileOutputFiles.push_back(tile.outputFilename); - pipelines.push_back(pipeline(&tile, resolution, attribute)); + pipelines.push_back(pipeline(&tile, resolution, attribute, collarSize)); } } } @@ -232,12 +278,19 @@ void ToRaster::preparePipelines(std::vector>& p { BOX2D tileBox = t.boxAt(ix, iy); - ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression); + if (!filterBounds.empty() && !intersectionBox2D(tileBox, parseBounds(filterBounds).to2d()).valid()) + { + if (verbose) + std::cout << "skipping tile " << iy << " " << ix << " -- " << tileBox.toBox() << std::endl; + continue; + } + + ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); // add collar to avoid edge effects - tile.boxWithCollar = tileBox; - tile.boxWithCollar.grow(collarSize); + BOX2D boxWithCollar = tileBox; + boxWithCollar.grow(collarSize); // create temp output file names // for tile (x=2,y=3) that goes to /tmp/hello.tif, @@ -247,7 +300,7 @@ void ToRaster::preparePipelines(std::vector>& p tileOutputFiles.push_back(tile.outputFilename); - pipelines.push_back(pipeline(&tile, resolution, attribute)); + pipelines.push_back(pipeline(&tile, resolution, attribute, collarSize)); } } } @@ -255,10 +308,10 @@ void ToRaster::preparePipelines(std::vector>& p { // single input LAS/LAZ - no parallelism - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); tile.outputFilename = outputFile; - pipelines.push_back(pipeline(&tile, resolution, attribute)); + pipelines.push_back(pipeline(&tile, resolution, attribute, 0)); } } @@ -266,8 +319,13 @@ void ToRaster::preparePipelines(std::vector>& p void ToRaster::finalize(std::vector>& pipelines) { - if (pipelines.size() > 1) + if (!tileOutputFiles.empty()) { rasterTilesToCog(tileOutputFiles, outputFile); + + // clean up the temporary directory + fs::path outputParentDir = fs::path(outputFile).parent_path(); + fs::path outputSubdir = outputParentDir / fs::path(outputFile).stem(); + fs::remove_all(outputSubdir); } } diff --git a/external/pdal_wrench/to_raster_tin.cpp b/external/pdal_wrench/to_raster_tin.cpp index c9b94f2593b..c6253eecb29 100644 --- a/external/pdal_wrench/to_raster_tin.cpp +++ b/external/pdal_wrench/to_raster_tin.cpp @@ -70,7 +70,7 @@ bool ToRasterTin::checkArgs() } -std::unique_ptr pipeline(ParallelJobInfo *tile, double resolution) +std::unique_ptr pipeline(ParallelJobInfo *tile, double resolution, double collarSize) { std::unique_ptr manager( new PipelineManager ); @@ -80,44 +80,90 @@ std::unique_ptr pipeline(ParallelJobInfo *tile, double resoluti readers.push_back(&manager->makeReader(f, "")); } - if (tile->mode == ParallelJobInfo::Spatial) + std::vector last = readers; + + // find out what will be the bounding box for this job + // (there could be also no bbox if there's no "bounds" filter and no tiling) + BOX2D filterBox = !tile->filterBounds.empty() ? parseBounds(tile->filterBounds).to2d() : BOX2D(); + BOX2D box = intersectTileBoxWithFilterBox(tile->box, filterBox); + BOX2D boxWithCollar; + + // box with collar is used for filtering of data on the input + // bow without collar is used for output bounds + + if (box.valid()) { + BOX2D filterBoxWithCollar = filterBox; + if (filterBoxWithCollar.valid()) + filterBoxWithCollar.grow(collarSize); + boxWithCollar = tile->box; + boxWithCollar.grow(collarSize); + boxWithCollar = intersectTileBoxWithFilterBox(boxWithCollar, filterBoxWithCollar); + + // We are going to do filtering of points based on 2D box. Ideally we want to do + // the filtering in the reader (if the reader can do it efficiently like copc/ept), + // otherwise we have to add filters.crop stage to filter points after they were read + for (Stage* reader : readers) { - // with COPC files, we can also specify bounds at the reader - // that will only read the required parts of the file - if (reader->getName() == "readers.copc") + if (readerSupportsBounds(*reader)) { + // add "bounds" option to reader pdal::Options copc_opts; copc_opts.add(pdal::Option("threads", 1)); - copc_opts.add(pdal::Option("bounds", box_to_pdal_bounds(tile->boxWithCollar))); + copc_opts.add(pdal::Option("bounds", box_to_pdal_bounds(boxWithCollar))); reader->addOptions(copc_opts); } } - } - Stage &delaunay = manager->makeFilter("filters.delaunay"); - for (Stage *stage : readers) - { - delaunay.setInput(*stage); // connect all readers to the writer + if (!allReadersSupportBounds(readers) && !tile->filterBounds.empty()) + { + // At least some readers can't do the filtering - do it with a filter + Options filter_opts; + filter_opts.add(pdal::Option("bounds", box_to_pdal_bounds(filterBoxWithCollar))); + Stage *filterCrop = &manager->makeFilter( "filters.crop", filter_opts); + for (Stage *s : last) + filterCrop->setInput(*s); + last.clear(); + last.push_back(filterCrop); + } } if (!tile->filterExpression.empty()) { Options filter_opts; - filter_opts.add(pdal::Option("where", tile->filterExpression)); - delaunay.addOptions(filter_opts); + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + Stage *filterExpr = &manager->makeFilter( "filters.expression", filter_opts); + for (Stage *s : last) + filterExpr->setInput(*s); + last.clear(); + last.push_back(filterExpr); } + if (readers.size() > 1) + { + // explicitly merge if there are multiple inputs, otherwise things don't work + // (no pixels are written - not sure which downstream stage is to blame) + Stage *merge = &manager->makeFilter("filters.merge"); + for (Stage *stage : last) + merge->setInput(*stage); + last.clear(); + last.push_back(merge); + } + + Stage &delaunay = manager->makeFilter("filters.delaunay"); + for (Stage *stage : last) + delaunay.setInput(*stage); + pdal::Options faceRaster_opts; faceRaster_opts.add(pdal::Option("resolution", resolution)); - if (tile->box.valid()) // if box is not provided, filters.faceraster will calculate it from data + if (box.valid()) // if box is not provided, filters.faceraster will calculate it from data { - faceRaster_opts.add(pdal::Option("origin_x", tile->box.minx)); - faceRaster_opts.add(pdal::Option("origin_y", tile->box.miny)); - faceRaster_opts.add(pdal::Option("width", (tile->box.maxx-tile->box.minx)/resolution)); - faceRaster_opts.add(pdal::Option("height", (tile->box.maxy-tile->box.miny)/resolution)); + faceRaster_opts.add(pdal::Option("origin_x", box.minx)); + faceRaster_opts.add(pdal::Option("origin_y", box.miny)); + faceRaster_opts.add(pdal::Option("width", ceil((box.maxx-box.minx)/resolution))); + faceRaster_opts.add(pdal::Option("height", ceil((box.maxy-box.miny)/resolution))); } Stage &faceRaster = manager->makeFilter("filters.faceraster", delaunay, faceRaster_opts); @@ -177,13 +223,20 @@ void ToRasterTin::preparePipelines(std::vector> // to avoid empty areas in resulting rasters tileBox.clip(gridBounds); - ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression); + if (!filterBounds.empty() && !intersectionBox2D(tileBox, parseBounds(filterBounds).to2d()).valid()) + { + if (verbose) + std::cout << "skipping tile " << iy << " " << ix << " -- " << tileBox.toBox() << std::endl; + continue; + } + + ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression, filterBounds); // add collar to avoid edge effects - tile.boxWithCollar = tileBox; - tile.boxWithCollar.grow(collarSize); + BOX2D boxWithCollar = tileBox; + boxWithCollar.grow(collarSize); - for (const VirtualPointCloud::File & f: vpc.overlappingBox2D(tile.boxWithCollar)) + for (const VirtualPointCloud::File & f: vpc.overlappingBox2D(boxWithCollar)) { tile.inputFilenames.push_back(f.filename); totalPoints += f.count; @@ -199,7 +252,7 @@ void ToRasterTin::preparePipelines(std::vector> tileOutputFiles.push_back(tile.outputFilename); - pipelines.push_back(pipeline(&tile, resolution)); + pipelines.push_back(pipeline(&tile, resolution, collarSize)); } } } @@ -225,12 +278,19 @@ void ToRasterTin::preparePipelines(std::vector> { BOX2D tileBox = t.boxAt(ix, iy); - ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression); + if (!filterBounds.empty() && !intersectionBox2D(tileBox, parseBounds(filterBounds).to2d()).valid()) + { + if (verbose) + std::cout << "skipping tile " << iy << " " << ix << " -- " << tileBox.toBox() << std::endl; + continue; + } + + ParallelJobInfo tile(ParallelJobInfo::Spatial, tileBox, filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); // add collar to avoid edge effects - tile.boxWithCollar = tileBox; - tile.boxWithCollar.grow(collarSize); + BOX2D boxWithCollar = tileBox; + boxWithCollar.grow(collarSize); // create temp output file names // for tile (x=2,y=3) that goes to /tmp/hello.tif, @@ -240,23 +300,28 @@ void ToRasterTin::preparePipelines(std::vector> tileOutputFiles.push_back(tile.outputFilename); - pipelines.push_back(pipeline(&tile, resolution)); + pipelines.push_back(pipeline(&tile, resolution, collarSize)); } } } else { - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); tile.outputFilename = outputFile; - pipelines.push_back(pipeline(&tile, resolution)); + pipelines.push_back(pipeline(&tile, resolution, 0)); } } void ToRasterTin::finalize(std::vector>& pipelines) { - if (pipelines.size() > 1) + if (!tileOutputFiles.empty()) { rasterTilesToCog(tileOutputFiles, outputFile); + + // clean up the temporary directory + fs::path outputParentDir = fs::path(outputFile).parent_path(); + fs::path outputSubdir = outputParentDir / fs::path(outputFile).stem(); + fs::remove_all(outputSubdir); } } diff --git a/external/pdal_wrench/to_vector.cpp b/external/pdal_wrench/to_vector.cpp index a1952a6219c..c75a8b387d4 100644 --- a/external/pdal_wrench/to_vector.cpp +++ b/external/pdal_wrench/to_vector.cpp @@ -55,17 +55,37 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, const st Stage& r = manager->makeReader( tile->inputFilenames[0], ""); + Stage *last = &r; + + // filtering + if (!tile->filterBounds.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("bounds", tile->filterBounds)); + + if (readerSupportsBounds(r)) + { + // Reader of the format can do the filtering - use that whenever possible! + r.addOptions(filter_opts); + } + else + { + // Reader can't do the filtering - do it with a filter + last = &manager->makeFilter( "filters.crop", *last, filter_opts); + } + } + if (!tile->filterExpression.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + last = &manager->makeFilter( "filters.expression", *last, filter_opts); + } + pdal::Options writer_opts; writer_opts.add(pdal::Option("ogrdriver", "GPKG")); if (!attributes.empty()) writer_opts.add(pdal::Option("attr_dims", join_strings(attributes, ','))); - - if (!tile->filterExpression.empty()) - { - writer_opts.add(pdal::Option("where", tile->filterExpression)); - } - - (void)manager->makeWriter( tile->outputFilename, "writers.ogr", r, writer_opts); + (void)manager->makeWriter( tile->outputFilename, "writers.ogr", *last, writer_opts); return manager; } @@ -87,7 +107,7 @@ void ToVector::preparePipelines(std::vector>& p for (const VirtualPointCloud::File& f : vpc.files) { - ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(f.filename); // for input file /x/y/z.las that goes to /tmp/hello.vpc, @@ -102,7 +122,7 @@ void ToVector::preparePipelines(std::vector>& p } else { - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); tile.outputFilename = outputFile; pipelines.push_back(pipeline(&tile, attributes)); diff --git a/external/pdal_wrench/translate.cpp b/external/pdal_wrench/translate.cpp index 3ab51be69bb..9c6dd16151a 100644 --- a/external/pdal_wrench/translate.cpp +++ b/external/pdal_wrench/translate.cpp @@ -82,6 +82,32 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, std::str Stage& r = manager->makeReader( tile->inputFilenames[0], "", reader_opts); + Stage *last = &r; + + // filtering + if (!tile->filterBounds.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("bounds", tile->filterBounds)); + + if (readerSupportsBounds(r)) + { + // Reader of the format can do the filtering - use that whenever possible! + r.addOptions(filter_opts); + } + else + { + // Reader can't do the filtering - do it with a filter + last = &manager->makeFilter( "filters.crop", *last, filter_opts); + } + } + if (!tile->filterExpression.empty()) + { + Options filter_opts; + filter_opts.add(pdal::Option("expression", tile->filterExpression)); + last = &manager->makeFilter( "filters.expression", *last, filter_opts); + } + // optional reprojection Stage* reproject = nullptr; if (!transformCrs.empty()) @@ -91,12 +117,13 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, std::str if (!transformCoordOp.empty()) { transform_opts.add(pdal::Option("coord_op", transformCoordOp)); - reproject = &manager->makeFilter( "filters.projpipeline", r, transform_opts); + reproject = &manager->makeFilter( "filters.projpipeline", *last, transform_opts); } else { - reproject = &manager->makeFilter( "filters.reprojection", r, transform_opts); + reproject = &manager->makeFilter( "filters.reprojection", *last, transform_opts); } + last = reproject; } pdal::Options writer_opts; @@ -115,17 +142,7 @@ static std::unique_ptr pipeline(ParallelJobInfo *tile, std::str writer_opts.add(pdal::Option("offset_z", "auto")); } - Stage& writerInput = reproject ? *reproject : r; - Stage& w = manager->makeWriter( tile->outputFilename, "", writerInput, writer_opts); - - if (!tile->filterExpression.empty()) - { - Options filter_opts; - filter_opts.add(pdal::Option("where", tile->filterExpression)); - w.addOptions(filter_opts); - if (reproject) - reproject->addOptions(filter_opts); - } + Stage& w = manager->makeWriter( tile->outputFilename, "", *last, writer_opts); return manager; } @@ -153,7 +170,7 @@ void Translate::preparePipelines(std::vector>& for (const VirtualPointCloud::File& f : vpc.files) { - ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::FileBased, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(f.filename); // for input file /x/y/z.las that goes to /tmp/hello.vpc, @@ -168,7 +185,7 @@ void Translate::preparePipelines(std::vector>& } else { - ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression); + ParallelJobInfo tile(ParallelJobInfo::Single, BOX2D(), filterExpression, filterBounds); tile.inputFilenames.push_back(inputFile); tile.outputFilename = outputFile; pipelines.push_back(pipeline(&tile, assignCrs, transformCrs, transformCoordOp)); diff --git a/external/pdal_wrench/utils.cpp b/external/pdal_wrench/utils.cpp index 4b33374e04f..53399c45ec1 100644 --- a/external/pdal_wrench/utils.cpp +++ b/external/pdal_wrench/utils.cpp @@ -12,6 +12,7 @@ #include "utils.hpp" +#include #include #include @@ -196,7 +197,68 @@ bool rasterTilesToCog(const std::vector &inputFiles, const std::str rasterVrtToCog(ds, outputFile); GDALClose(ds); - // TODO: remove VRT + partial tifs after gdal_translate? + std::filesystem::remove(outputVrt); return true; } + +bool readerSupportsBounds(Stage &reader) +{ + // these readers support "bounds" option with a 2D/3D bounding box, and based + // on it, they will do very efficient reading of data and only return what's + // in the given bounding box + return reader.getName() == "readers.copc" || reader.getName() == "readers.ept"; +} + +bool allReadersSupportBounds(const std::vector &readers) +{ + for (Stage *r : readers) + { + if (!readerSupportsBounds(*r)) + return false; + } + return true; +} + +pdal::Bounds parseBounds(const std::string &boundsStr) +{ + // if the input string is not a correct 2D/3D PDAL bounds then parse() + // will throw an exception + pdal::Bounds b; + std::string::size_type pos(0); + b.parse(boundsStr, pos); + return b; +} + +BOX2D intersectionBox2D(const BOX2D &b1, const BOX2D &b2) +{ + BOX2D b; + b.minx = b1.minx > b2.minx ? b1.minx : b2.minx; + b.miny = b1.miny > b2.miny ? b1.miny : b2.miny; + b.maxx = b1.maxx < b2.maxx ? b1.maxx : b2.maxx; + b.maxy = b1.maxy < b2.maxy ? b1.maxy : b2.maxy; + if (b.minx > b.maxx || b.miny > b.maxy) + return BOX2D(); + return b; +} + + +BOX2D intersectTileBoxWithFilterBox(const BOX2D &tileBox, const BOX2D &filterBox) +{ + if (tileBox.valid() && filterBox.valid()) + { + return intersectionBox2D(tileBox, filterBox); + } + else if (tileBox.valid()) + { + return tileBox; + } + else if (filterBox.valid()) + { + return filterBox; + } + else + { + return BOX2D(); // invalid box + } +} diff --git a/external/pdal_wrench/utils.hpp b/external/pdal_wrench/utils.hpp index 6a76889aa0d..dab20ce7da2 100644 --- a/external/pdal_wrench/utils.hpp +++ b/external/pdal_wrench/utils.hpp @@ -77,7 +77,8 @@ struct ParallelJobInfo } mode; ParallelJobInfo(ParallelMode m = Single): mode(m) {} - ParallelJobInfo(ParallelMode m, const BOX2D &b, const std::string fe): mode(m), box(b), filterExpression(fe) {} + ParallelJobInfo(ParallelMode m, const BOX2D &b, const std::string fe, const std::string fb) + : mode(m), box(b), filterExpression(fe), filterBounds(fb) {} // what input point cloud files to read for a job std::vector inputFilenames; @@ -88,20 +89,20 @@ struct ParallelJobInfo // bounding box for this job (for input/output) BOX2D box; - // bounding box for the job with extra collar that some algs may use - // in case they need access to neighboring points at the edges of tiles - BOX2D boxWithCollar; - // PDAL filter expression to apply on all pipelines std::string filterExpression; + // PDAL filter on 2D or 3D bounds to apply on all pipelines + // Format is "([xmin, xmax], [ymin, ymax])" or "([xmin, xmax], [ymin, ymax], [zmin, zmax])" + std::string filterBounds; + // modes of operation: // A. multi input without box (LAS/LAZ) -- per file strategy // - all input files are processed, no filtering on bounding box // B. multi input with box (anything) -- tile strategy // - all input files are processed, but with filtering applied // - COPC: filtering inside readers.copc with "bounds" argument - // - LAS/LAZ: filter either using CropFilter after reader -or- "where" + // - LAS/LAZ: filter either using CropFilter after reader -or- "where" // streaming algs: // - multi-las: if not overlapping: mode A @@ -225,6 +226,16 @@ void runPipelineParallel(point_count_t totalPoints, bool isStreaming, std::vecto std::string box_to_pdal_bounds(const BOX2D &box); +pdal::Bounds parseBounds(const std::string &boundsStr); + +bool readerSupportsBounds(Stage &reader); + +bool allReadersSupportBounds(const std::vector &readers); + +BOX2D intersectionBox2D(const BOX2D &b1, const BOX2D &b2); + +BOX2D intersectTileBoxWithFilterBox(const BOX2D &tileBox, const BOX2D &filterBox); + inline bool ends_with(std::string const & value, std::string const & ending) { diff --git a/external/pdal_wrench/vpc.cpp b/external/pdal_wrench/vpc.cpp index 75b70c89aee..a3235a401d9 100644 --- a/external/pdal_wrench/vpc.cpp +++ b/external/pdal_wrench/vpc.cpp @@ -12,16 +12,18 @@ #include #include +#include namespace fs = std::filesystem; #include "vpc.hpp" #include "utils.hpp" +#include +#include #include #include "nlohmann/json.hpp" -#include using json = nlohmann::json; @@ -110,6 +112,14 @@ bool VirtualPointCloud::read(std::string filename) vpcf.crsWkt = f["properties"]["proj:wkt2"]; vpcCrsWkt.insert(vpcf.crsWkt); + // read boundary geometry + nlohmann::json nativeGeometry = f["properties"]["proj:geometry"]; + std::stringstream sstream; + sstream << std::setw(2) << nativeGeometry << std::endl; + std::string wkt = sstream.str(); + pdal::Geometry nativeGeom(sstream.str()); + vpcf.boundaryWkt = nativeGeom.wkt(); + nlohmann::json nativeBbox = f["properties"]["proj:bbox"]; vpcf.bbox = BOX3D( nativeBbox[0].get(), nativeBbox[1].get(), nativeBbox[2].get(), @@ -126,6 +136,33 @@ bool VirtualPointCloud::read(std::string filename) vpcf.schema.push_back(VirtualPointCloud::SchemaItem(schemaItem["name"], schemaItem["type"], schemaItem["size"].get())); } + // read stats + for (auto &statsItem : f["properties"]["pc:statistics"]) + { + vpcf.stats.push_back(VirtualPointCloud::StatsItem( + statsItem["name"], + statsItem["position"], + statsItem["average"], + statsItem["count"], + statsItem["maximum"], + statsItem["minimum"], + statsItem["stddev"], + statsItem["variance"])); + } + + // read overview file (if any, expecting at most one) + // this logic is very basic, we should be probably checking roles of assets + if (f["assets"].contains("overview")) + { + vpcf.overviewFilename = f["assets"]["overview"]["href"]; + + if (vpcf.overviewFilename.substr(0, 2) == "./") + { + // resolve relative path + vpcf.overviewFilename = fs::weakly_canonical(filenameParent / vpcf.overviewFilename).string(); + } + } + files.push_back(vpcf); } @@ -159,20 +196,26 @@ void geometryToJson(const Geometry &geom, const BOX3D &bbox, nlohmann::json &jso bool VirtualPointCloud::write(std::string filename) { - std::ofstream outputJson(filename); + std::string filenameAbsolute = filename; + if (!fs::path(filename).is_absolute()) + { + filenameAbsolute = fs::absolute(filename); + } + + std::ofstream outputJson(filenameAbsolute); if (!outputJson.good()) { - std::cerr << "Failed to create file: " << filename << std::endl; + std::cerr << "Failed to create file: " << filenameAbsolute << std::endl; return false; } - fs::path outputPath = fs::path(filename).parent_path(); + fs::path outputPath = fs::path(filenameAbsolute).parent_path(); std::vector jFiles; for ( const File &f : files ) { std::string assetFilename; - if (Utils::startsWith(f.filename, "http://") || Utils::startsWith(f.filename, "https://")) + if (pdal::Utils::isRemote(f.filename)) { // keep remote URLs as they are assetFilename = f.filename; @@ -185,20 +228,21 @@ bool VirtualPointCloud::write(std::string filename) } std::string fileId = fs::path(f.filename).stem().string(); // TODO: we should make sure the ID is unique + pdal::Geometry boundary = !f.boundaryWkt.empty() ? pdal::Geometry(f.boundaryWkt) : pdal::Polygon(f.bbox); + // use bounding box as the geometry - // TODO: use calculated boundary polygon when available nlohmann::json jsonGeometry, jsonBbox; - geometryToJson(pdal::Polygon(f.bbox), f.bbox, jsonGeometry, jsonBbox); + geometryToJson(boundary, f.bbox, jsonGeometry, jsonBbox); // bounding box in WGS 84: reproject if possible, or keep it as is nlohmann::json jsonGeometryWgs84 = jsonGeometry, jsonBboxWgs84 = jsonBbox; if (!f.crsWkt.empty()) { - pdal::Polygon p(f.bbox); - p.setSpatialReference(pdal::SpatialReference(f.crsWkt)); - if (p.transform("EPSG:4326")) + pdal::Geometry boundaryWgs84 = boundary; + boundaryWgs84.setSpatialReference(pdal::SpatialReference(f.crsWkt)); + if (boundaryWgs84.transform("EPSG:4326")) { - geometryToJson(p, p.bounds(), jsonGeometryWgs84, jsonBboxWgs84); + geometryToJson(boundaryWgs84, boundaryWgs84.bounds(), jsonGeometryWgs84, jsonBboxWgs84); } } @@ -225,18 +269,61 @@ bool VirtualPointCloud::write(std::string filename) { "pc:encoding", "?" }, // TODO: https://github.com/stac-extensions/pointcloud/issues/6 { "pc:schemas", schemas }, - // TODO: write pc:statistics if we have it (optional) - // projection extension properties (none are required) { "proj:wkt2", f.crsWkt }, { "proj:geometry", jsonGeometry }, { "proj:bbox", jsonBbox }, }; + + if (!f.stats.empty()) + { + nlohmann::json statsArray = json::array(); + for (const VirtualPointCloud::StatsItem &s : f.stats) + { + nlohmann::json stat = { + { "name", s.name }, + { "position", s.position }, + { "average", s.average }, + { "count", s.count }, + { "maximum", s.maximum }, + { "minimum", s.minimum }, + { "stddev", s.stddev }, + { "variance", s.variance }, + }; + statsArray.push_back(stat); + } + props["pc:statistics"] = statsArray; + } + nlohmann::json links = json::array(); - nlohmann::json asset = { + nlohmann::json dataAsset = { { "href", assetFilename }, + { "roles", json::array({"data"}) }, }; + nlohmann::json assets = { { "data", dataAsset } }; + + if (!f.overviewFilename.empty()) + { + std::string overviewFilename; + if (pdal::Utils::isRemote(f.overviewFilename)) + { + // keep remote URLs as they are + overviewFilename = f.overviewFilename; + } + else + { + // turn local paths to relative + fs::path fRelative = fs::relative(f.overviewFilename, outputPath); + overviewFilename = "./" + fRelative.string(); + } + + nlohmann::json overviewAsset = { + { "href", overviewFilename }, + { "roles", json::array({"overview"}) }, + }; + assets["overview"] = overviewAsset; + } jFiles.push_back( { @@ -253,7 +340,7 @@ bool VirtualPointCloud::write(std::string filename) { "bbox", jsonBboxWgs84 }, { "properties", props }, { "links", links }, - { "assets", { { "data", asset } } }, + { "assets", assets }, }); @@ -309,10 +396,21 @@ void buildVpc(std::vector args) { std::string outputFile; std::vector inputFiles; + bool boundaries = false; + bool stats = false; + bool overview = false; + int max_threads = -1; + bool verbose = false; ProgramArgs programArgs; programArgs.add("output,o", "Output virtual point cloud file", outputFile); programArgs.add("files,f", "input files", inputFiles).setPositional(); + programArgs.add("boundary", "Calculate boundary polygons from data", boundaries); + programArgs.add("stats", "Calculate statistics from data", stats); + programArgs.add("overview", "Create overview point cloud from source data", overview); + + pdal::Arg& argThreads = programArgs.add("threads", "Max number of concurrent threads for parallel runs", max_threads); + programArgs.add("verbose", "Print extra debugging output", verbose); try { @@ -333,14 +431,32 @@ void buildVpc(std::vector args) return; } + if (!argThreads.set()) // in such case our value is reset to zero + { + // use number of cores if not specified by the user + max_threads = std::thread::hardware_concurrency(); + if (max_threads == 0) + { + // in case the value can't be detected, use something reasonable... + max_threads = 4; + } + } + // TODO: would be nice to support input directories too (recursive) VirtualPointCloud vpc; for (const std::string &inputFile : inputFiles) { + std::string inputFileAbsolute = inputFile; + if (!pdal::Utils::isRemote(inputFile) && !fs::path(inputFile).is_absolute()) + { + // convert to absolute path using the current path + inputFileAbsolute = fs::absolute(inputFile); + } + MetadataNode layout; - MetadataNode n = getReaderMetadata(inputFile, &layout); + MetadataNode n = getReaderMetadata(inputFileAbsolute, &layout); point_count_t cnt = n.findChild("count").value(); BOX3D bbox( n.findChild("minx").value(), @@ -357,7 +473,7 @@ void buildVpc(std::vector args) int year = n.findChild("creation_year").value(); VirtualPointCloud::File f; - f.filename = inputFile; + f.filename = inputFileAbsolute; f.count = cnt; f.bbox = bbox; f.crsWkt = crsWkt; @@ -374,6 +490,142 @@ void buildVpc(std::vector args) vpc.files.push_back(f); } + // + + std::string overviewFilenameBase, overviewFilenameCopc; + std::vector overviewTempFiles; + int overviewCounter = 0; + if (overview) + { + // for /tmp/hello.vpc we will use /tmp/hello-overview.laz as overview file + fs::path outputParentDir = fs::path(outputFile).parent_path(); + fs::path outputStem = outputParentDir / fs::path(outputFile).stem(); + overviewFilenameBase = std::string(outputStem); + overviewFilenameCopc = std::string(outputStem) + "-overview.copc.laz"; + } + + if (boundaries || stats || overview) + { + std::map hexbinFilters, statsFilters; + std::vector> pipelines; + + for (VirtualPointCloud::File &f : vpc.files) + { + std::unique_ptr manager( new PipelineManager ); + + Stage* last = &manager->makeReader(f.filename, ""); + if (boundaries) + { + pdal::Options hexbin_opts; + // TODO: any options? + last = &manager->makeFilter( "filters.hexbin", *last, hexbin_opts ); + hexbinFilters[f.filename] = last; + } + + if (stats) + { + pdal::Options stats_opts; + // TODO: any options? + last = &manager->makeFilter( "filters.stats", *last, stats_opts ); + statsFilters[f.filename] = last; + } + + if (overview) + { + // TODO: configurable method and step size? + pdal::Options decim_opts; + decim_opts.add(pdal::Option("step", 1000)); + last = &manager->makeFilter( "filters.decimation", *last, decim_opts ); + + std::string overviewOutput = overviewFilenameBase + "-overview-tmp-" + std::to_string(++overviewCounter) + ".las"; + overviewTempFiles.push_back(overviewOutput); + + pdal::Options writer_opts; + writer_opts.add(pdal::Option("forward", "all")); // TODO: maybe we could use lower scale than the original + manager->makeWriter(overviewOutput, "", *last, writer_opts); + } + + pipelines.push_back(std::move(manager)); + } + + runPipelineParallel(vpc.totalPoints(), true, pipelines, max_threads, verbose); + + if (overview) + { + // When doing overviews, this is the second stage where we index overview point cloud. + // We do it separately because writers.copc is not streamable. We could also use + // untwine instead of writers.copc... + + std::unique_ptr manager( new PipelineManager ); + std::vector> pipelinesCopcOverview; + + // TODO: I am not really sure why we need a merge filter, but without it + // I am only getting points in output COPC from the last reader. Example + // from the documentation suggests the merge filter should not be needed: + // https://pdal.io/en/latest/stages/writers.copc.html + + Stage &merge = manager->makeFilter("filters.merge"); + + pdal::Options writer_opts; + //writer_opts.add(pdal::Option("forward", "all")); + Stage& writer = manager->makeWriter(overviewFilenameCopc, "writers.copc", merge, writer_opts); + + for (const std::string &overviewTempFile : overviewTempFiles) + { + Stage& reader = manager->makeReader(overviewTempFile, ""); + merge.setInput(reader); + } + + if (verbose) + { + std::cout << "Indexing overview point cloud..." << std::endl; + } + pipelinesCopcOverview.push_back(std::move(manager)); + runPipelineParallel(vpc.totalPoints()/1000, false, pipelinesCopcOverview, max_threads, verbose); + + // delete tmp overviews + for (const std::string &overviewTempFile : overviewTempFiles) + { + std::filesystem::remove(overviewTempFile); + } + } + + for (VirtualPointCloud::File &f : vpc.files) + { + if (boundaries) + { + pdal::Stage *hexbinFilter = hexbinFilters[f.filename]; + std::string b = hexbinFilter->getMetadata().findChild("boundary").value(); + f.boundaryWkt = b; + } + if (stats) + { + pdal::Stage *statsFilter = statsFilters[f.filename]; + MetadataNode m = statsFilter->getMetadata(); + std::vector children = m.children("statistic"); + for (const MetadataNode &n : children) + { + VirtualPointCloud::StatsItem s( + n.findChild("name").value(), + n.findChild("position").value(), + n.findChild("average").value(), + n.findChild("count").value(), + n.findChild("maximum").value(), + n.findChild("minimum").value(), + n.findChild("stddev").value(), + n.findChild("variance").value()); + f.stats.push_back(s); + } + } + if (overview) + { + f.overviewFilename = overviewFilenameCopc; + } + } + } + + // + vpc.dump(); vpc.write(outputFile); diff --git a/external/pdal_wrench/vpc.hpp b/external/pdal_wrench/vpc.hpp index 723b6467550..3c35633bd9e 100644 --- a/external/pdal_wrench/vpc.hpp +++ b/external/pdal_wrench/vpc.hpp @@ -15,6 +15,7 @@ #include #include +#include #include #include @@ -26,6 +27,7 @@ void buildVpc(std::vector args); struct VirtualPointCloud { + //! Schema for a single attribute struct SchemaItem { SchemaItem(const std::string &n, const std::string &t, int s): name(n), type(t), size(s) {} @@ -35,14 +37,36 @@ struct VirtualPointCloud int size; }; + //! Stats for a single attribute + struct StatsItem + { + StatsItem(const std::string &n, uint32_t p, double a, point_count_t c, double max, double min, double st, double vr) + : name(n), position(p), average(a), count(c), maximum(max), minimum(min), stddev(st), variance(vr) {} + + std::string name; + uint32_t position; + double average; + point_count_t count; + double maximum; + double minimum; + double stddev; + double variance; + }; + struct File { std::string filename; point_count_t count; + std::string boundaryWkt; // not pdal::Geometry because of https://github.com/PDAL/PDAL/issues/4016 BOX3D bbox; std::string crsWkt; std::string datetime; // RFC 3339 encoded date/time - e.g. 2023-01-01T12:00:00Z std::vector schema; // we're not using it, just for STAC export + std::vector stats; + + // support for overview point clouds - currently we assume a file refers to at most a single overview file + // (when building VPC with overviews, we create one overview file for all source data) + std::string overviewFilename; }; std::vector files;