diff --git a/DeepSkyStacker.VS2022.sln b/DeepSkyStacker.VS2022.sln index ab26b9a8..16719770 100644 --- a/DeepSkyStacker.VS2022.sln +++ b/DeepSkyStacker.VS2022.sln @@ -10,18 +10,20 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DeepSkyStackerCL", "DeepSky {1747F255-9CB9-472B-8FEE-9E0BBFBAD49D} = {1747F255-9CB9-472B-8FEE-9E0BBFBAD49D} {A2F500C6-6903-4C2D-906D-CE86B99BA50D} = {A2F500C6-6903-4C2D-906D-CE86B99BA50D} {A71D2131-F425-381F-8A9A-29D60132A046} = {A71D2131-F425-381F-8A9A-29D60132A046} + {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1} = {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DeepSkyStackerLive", "DeepSkyStackerLive\DeepSkyStackerLive.vcxproj", "{6290D6C2-900F-496C-A522-D5C8BF807066}" ProjectSection(ProjectDependencies) = postProject - {D5FB2402-A821-4474-91E7-07F0DD5866F0} = {D5FB2402-A821-4474-91E7-07F0DD5866F0} {A2F500C6-6903-4C2D-906D-CE86B99BA50D} = {A2F500C6-6903-4C2D-906D-CE86B99BA50D} {A71D2131-F425-381F-8A9A-29D60132A046} = {A71D2131-F425-381F-8A9A-29D60132A046} + {D5FB2402-A821-4474-91E7-07F0DD5866F0} = {D5FB2402-A821-4474-91E7-07F0DD5866F0} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DeepSkyStacker", "DeepSkyStacker\DeepSkyStacker.vcxproj", "{90721C24-BC43-450A-ADED-3417280D075B}" ProjectSection(ProjectDependencies) = postProject {A71D2131-F425-381F-8A9A-29D60132A046} = {A71D2131-F425-381F-8A9A-29D60132A046} + {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1} = {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1} EndProjectSection EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "libjpg", "libjpg\libjpg.vcxproj", "{A2F500C6-6903-4C2D-906D-CE86B99BA50D}" @@ -37,8 +39,11 @@ EndProject Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DeepSkyStackerTest", "DeepSkyStackerTest\DeepSkyStackerTest.vcxproj", "{487E5070-BF81-4DEF-BE9F-510EEACE627B}" ProjectSection(ProjectDependencies) = postProject {1747F255-9CB9-472B-8FEE-9E0BBFBAD49D} = {1747F255-9CB9-472B-8FEE-9E0BBFBAD49D} + {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1} = {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "DeepSkyStackerKernel", "DeepSkyStackerKernel\DeepSkyStackerKernel.vcxproj", "{CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|x64 = Debug|x64 @@ -77,6 +82,10 @@ Global {487E5070-BF81-4DEF-BE9F-510EEACE627B}.Debug|x64.Build.0 = Debug|x64 {487E5070-BF81-4DEF-BE9F-510EEACE627B}.Release|x64.ActiveCfg = Release|x64 {487E5070-BF81-4DEF-BE9F-510EEACE627B}.Release|x64.Build.0 = Release|x64 + {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1}.Debug|x64.ActiveCfg = Debug|x64 + {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1}.Debug|x64.Build.0 = Debug|x64 + {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1}.Release|x64.ActiveCfg = Release|x64 + {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1}.Release|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/DeepSkyStacker/BackgroundOptions.cpp b/DeepSkyStacker/BackgroundOptions.cpp index e0cabe37..426c6b07 100644 --- a/DeepSkyStacker/BackgroundOptions.cpp +++ b/DeepSkyStacker/BackgroundOptions.cpp @@ -1,7 +1,7 @@ #include "stdafx.h" #include "BackgroundOptions.h" #include "ui/ui_BackgroundOptions.h" -#include "../DeepSkyStackerTest/AvxAccumulateTest.h" +#include "StackingTasks.h" #include "Workspace.h" diff --git a/DeepSkyStacker/DSS-versionhelpers.h b/DeepSkyStacker/DSS-versionhelpers.h index 2c00e0be..4c0003d8 100644 --- a/DeepSkyStacker/DSS-versionhelpers.h +++ b/DeepSkyStacker/DSS-versionhelpers.h @@ -8,7 +8,7 @@ * Copyright (c) Microsoft Corp. All rights reserved. * * * ******************************************************************/ -#include +#include "DSS-winapifamily.h" #include #ifdef _MSC_VER diff --git a/DeepSkyStacker/DeepSkyStacker.vcxproj b/DeepSkyStacker/DeepSkyStacker.vcxproj index e2015b2a..8e656d2f 100644 --- a/DeepSkyStacker/DeepSkyStacker.vcxproj +++ b/DeepSkyStacker/DeepSkyStacker.vcxproj @@ -87,7 +87,7 @@ - .\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;.\;..\;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;../libraw;$(Boost_1_80_0);%(AdditionalIncludeDirectories) + .\..\DeepSkyStacker;.\..\DeepSkyStackerKernel;.\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;..\libraw;$(Boost_1_80_0);%(AdditionalIncludeDirectories) _UNICODE;UNICODE;NOMINMAX;LIBRAW_NODLL;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;NDEBUG;_CRT_SECURE_NO_DEPRECATE;USE_LIBTIFF_STATIC;%(PreprocessorDefinitions) MultiThreadedDLL $(OutDir)$(TargetName).pdb @@ -158,7 +158,7 @@ $(QtToolsPath)\windeployqt $(TargetPath) - .\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;.\;..\;../Libraw;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) + .\..\DeepSkyStacker;.\..\DeepSkyStackerKernel;.\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;..\Libraw;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) _UNICODE;UNICODE;NOMINMAX;LIBRAW_NODLL;WIN32;QT_CORE_LIB;QT_GUI_LIB;QT_WIDGETS_LIB;_DEBUG;_CRT_SECURE_NO_DEPRECATE;USE_LIBTIFF_STATIC;Z_TRACE_DEVELOP;%(PreprocessorDefinitions) false Default @@ -267,121 +267,46 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) Use Use - Use Use - - - - - - - - - - - Use Use + - - - - - - - - - - Use Use - - - - - - - - Use - Use - + - - - - - Use - Use - Level3 Level3 - - Use - Use - - + - - Use - Use - - - - - - Use Use - - - - - - - Use - stdafx.h - Use - stdafx.h - - - Use - Use - input - %(Filename).moc - input - %(Filename).moc - Use Use - - - - - - Use Use @@ -408,12 +333,10 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) Use Use - Use Use - Use Use @@ -423,26 +346,22 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) Use - - + - input %(Filename).moc input %(Filename).moc - Use Use - Use Use @@ -451,15 +370,13 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) Use Use - Create Create - + - @@ -478,7 +395,6 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) - @@ -494,106 +410,34 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + - - - - - - - - - - - - - - - - - + - - - - - - - - - - - @@ -604,10 +448,7 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) - - - @@ -617,29 +458,20 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) - - + - - - - - - - - + - @@ -758,6 +590,9 @@ $(QtToolsPath)\windeployqt --pdb $(TargetPath) + + {cb7b75f1-08f4-4c8d-a7ef-2aa33e9a67f1} + {a2f500c6-6903-4c2d-906d-ce86b99ba50d} diff --git a/DeepSkyStacker/DeepStackerDlg.cpp b/DeepSkyStacker/DeepStackerDlg.cpp deleted file mode 100644 index 72409e73..00000000 --- a/DeepSkyStacker/DeepStackerDlg.cpp +++ /dev/null @@ -1,629 +0,0 @@ -// DeepStackerDlg.cpp : implementation file -// -#include "stdafx.h" -#include "DeepSkyStacker.h" -#include "DeepStackerDlg.h" -#include "DSS-versionhelpers.h" -#include "ExplorerBar.h" -#include "StackingDlg.h" - -/* ------------------------------------------------------------------- */ - -static bool GetDefaultSettingsFileName(CString & strFile) -{ - CString strBase; - TCHAR szFileName[1+_MAX_PATH]; - TCHAR szDrive[1+_MAX_DRIVE]; - TCHAR szDir[1+_MAX_DIR]; - - GetModuleFileName(nullptr, szFileName, sizeof(szFileName)/sizeof(TCHAR)); - strBase = szFileName; - _tsplitpath(strBase, szDrive, szDir, nullptr, nullptr); - - strFile = szDrive; - strFile += szDir; - strFile += "DSSSettings.DSSSettings"; - - return true; -}; - -/* ------------------------------------------------------------------- */ - -#pragma pack(push, HDSETTINGS, 2) - -constexpr std::uint32_t HDSSETTINGS_MAGIC = 0x7ABC6F10U; - -typedef struct tagHDSETTINGSHEADER -{ - std::uint32_t dwMagic; // Magic number (always HDSSETTINGS_MAGIC) - std::uint32_t dwHeaderSize; // Always sizeof(HDSETTINGSHEADER); - int lNrSettings; // Number of settings - std::uint32_t dwFlags; // Flags - char Reserved[32]; // Reserved (set to 0) -} HDSETTINGSHEADER; - -#pragma pack(pop, HDSETTINGS) - -/* ------------------------------------------------------------------- */ - -bool CDSSSettings::Load(LPCTSTR szFile) -{ - bool bResult = false; - CString strFile = szFile; - FILE * hFile = nullptr; - - if (!strFile.GetLength()) - GetDefaultSettingsFileName(strFile); - - hFile = _tfopen(strFile, _T("rb")); - if (hFile) - { - HDSETTINGSHEADER Header; - - fread(&Header, sizeof(Header), 1, hFile); - if ((Header.dwMagic == HDSSETTINGS_MAGIC) && (Header.dwHeaderSize == sizeof(Header))) - { - m_lSettings.clear(); - for (int i = 0; i < Header.lNrSettings; i++) - { - CDSSSetting cds; - cds.Load(hFile); - m_lSettings.push_back(cds); - }; - - bResult = true; - m_lSettings.sort(); - }; - - fclose(hFile); - }; - - m_bLoaded = true; - - return bResult; -}; - -/* ------------------------------------------------------------------- */ - -bool CDSSSettings::Save(LPCTSTR szFile) -{ - bool bResult = false; - CString strFile = szFile; - FILE * hFile = nullptr; - - if (!strFile.GetLength()) - GetDefaultSettingsFileName(strFile); - - hFile = _tfopen(strFile, _T("wb")); - if (hFile) - { - m_lSettings.sort(); - - HDSETTINGSHEADER Header; - - memset(&Header, 0, sizeof(Header)); - - Header.dwMagic = HDSSETTINGS_MAGIC; - Header.dwHeaderSize = sizeof(Header); - Header.lNrSettings = static_cast(m_lSettings.size()); - - fwrite(&Header, sizeof(Header), 1, hFile); - for (auto it = m_lSettings.begin(); it != m_lSettings.end(); ++it) - it->Save(hFile); - - fclose(hFile); - bResult = true; - }; - - return bResult; -}; - -/* ------------------------------------------------------------------- */ -/* ------------------------------------------------------------------- */ - -///////////////////////////////////////////////////////////////////////////// -// CDeepStackerDlg dialog - -UINT WM_TASKBAR_BUTTON_CREATED = ::RegisterWindowMessage(_T("TaskbarButtonCreated")); - -CDeepStackerDlg::CDeepStackerDlg(CWnd* pParent /*=nullptr*/) - : CDialog(CDeepStackerDlg::IDD, pParent), - CurrentTab{ 0 }, - widget{ nullptr }, - splitter{ nullptr }, - explorerBar{ nullptr }, - stackedWidget{ nullptr }, - stackingDlg{ nullptr }, - winHost{ nullptr }, - processingDlg{ CProcessingDlg(this) }, - m_taskbarList{ nullptr } -{ - //{{AFX_DATA_INIT(CDeepStackerDlg) - // NOTE: the ClassWizard will add member initialization here - //}}AFX_DATA_INIT - Create(CDeepStackerDlg::IDD, pParent); - m_progress = false; -} - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::DoDataExchange(CDataExchange* pDX) -{ - CDialog::DoDataExchange(pDX); - //{{AFX_DATA_MAP(CDeepStackerDlg) - //}}AFX_DATA_MAP - DDX_Control(pDX, IDC_BARSTATIC, m_BarStatic); -} - -/* ------------------------------------------------------------------- */ - -BEGIN_MESSAGE_MAP(CDeepStackerDlg, CDialog) - //{{AFX_MSG_MAP(CDeepStackerDlg) - ON_WM_SIZE() - //}}AFX_MSG_MAP - ON_WM_CLOSE() - ON_MESSAGE(WM_HELP, OnHTMLHelp) - ON_BN_CLICKED(IDCANCEL, &CDeepStackerDlg::OnBnClickedCancel) - ON_WM_DROPFILES() - ON_WM_ERASEBKGND() - ON_REGISTERED_MESSAGE(WM_TASKBAR_BUTTON_CREATED, &CDeepStackerDlg::OnTaskbarButtonCreated) - ON_MESSAGE(WM_PROGRESS_INIT, &CDeepStackerDlg::OnProgressInit) - ON_MESSAGE(WM_PROGRESS_UPDATE, &CDeepStackerDlg::OnProgressUpdate) - ON_MESSAGE(WM_PROGRESS_STOP, &CDeepStackerDlg::OnProgressStop) -END_MESSAGE_MAP() - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::UpdateTab() -{ - switch (CurrentTab) - { - case IDD_REGISTERING : - case IDD_STACKING : - //stackedWidget->setVisible(true); - //stackedWidget->setCurrentIndex(0); - processingDlg.ShowWindow(SW_HIDE); - stackingDlg->setVisible(true); - stackingDlg->update(); -// m_dlgLibrary.ShowWindow(SW_HIDE); - break; - //case IDD_LIBRARY : - // stackingDlg.ShowWindow(SW_HIDE); - // processingDlg.ShowWindow(SW_HIDE); - // m_dlgLibrary.ShowWindow(SW_SHOW); - // break; - case IDD_PROCESSING : - //stackedWidget->setCurrentIndex(1); - //stackingDlg->setVisible(false); - //winHost->update(); - //stackedWidget->setVisible(false); - stackingDlg->setVisible(false); - processingDlg.ShowWindow(SW_SHOW); -// m_dlgLibrary.ShowWindow(SW_HIDE); - break; - }; - explorerBar->update(); - -}; - -/* ------------------------------------------------------------------- */ - -BOOL CDeepStackerDlg::OnEraseBkgnd(CDC * pDC) -{ - return 1; -}; - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::UpdateSizes() -{ - // Resize the tab control - CRect rcDlg; - QRect rect; - - GetClientRect(&rcDlg); - - if (stackingDlg && explorerBar) - { - //auto screen = QGuiApplication::screenAt(QPoint(rcDlg.left, rcDlg.top)); - //auto devicePixelRatio = screen->devicePixelRatio(); - int width = explorerBar->width(); - - rect = QRect(rcDlg.left, rcDlg.top, rcDlg.Width(), rcDlg.Height()); - if (IDD_PROCESSING == CurrentTab) - { - rect.setWidth(width); - } - widget->setGeometry(rect); - - rect.setWidth(width); - explorerBar->setGeometry(rect); - - width += 5; - rect.setLeft(width); - rect.setWidth(rcDlg.Width() - width); - rect.setHeight(rcDlg.Height()); - stackingDlg->setGeometry(rect); - - //rect = stackedWidget->rect(); - //QPoint pos = stackedWidget->pos(); - - //CRect rcProcessing = CRect(rect.x(), rect.y(), rect.width(), rect.height()); - - //if (stackingDlg.m_hWnd) - // stackingDlg.MoveWindow(&rcDlg); - if (IDD_PROCESSING == CurrentTab && - processingDlg.m_hWnd) - { - - // processingDlg.MoveWindow(&rcProcessing); - processingDlg.SetWindowPos(&CWnd::wndTopMost, rect.x(), rect.y(), rect.width(), rect.height(), - SWP_SHOWWINDOW); - } - else - { - processingDlg.SetWindowPos(&CWnd::wndTopMost, rect.x(), rect.y(), rect.width(), rect.height(), - SWP_HIDEWINDOW); - } - //if (m_dlgLibrary.m_hWnd) - // m_dlgLibrary.MoveWindow(&rcDlg); - //if (m_ExplorerBar.m_hWnd) - // m_ExplorerBar.MoveWindow(&rcExplorerBar); - //widget->setGeometry(rcDlg); - }; -}; - - -/* ------------------------------------------------------------------- */ -///////////////////////////////////////////////////////////////////////////// -// CDeepStackerDlg message handlers - -void CDeepStackerDlg::ChangeTab(std::uint32_t dwTabID) -{ - if (dwTabID == IDD_REGISTERING) - dwTabID = IDD_STACKING; -//#ifdef DSSBETA -// if (dwTabID == IDD_STACKING && (GetAsyncKeyState(VK_CONTROL) & 0x8000)) -// dwTabID = IDD_LIBRARY; -//#endif - CurrentTab = dwTabID; - UpdateTab(); -}; - -/* ------------------------------------------------------------------- */ - -BOOL CDeepStackerDlg::OnInitDialog() -{ - ZFUNCTRACE_RUNTIME(); - ZTRACE_RUNTIME("Initializing Main Dialog"); - CDialog::OnInitDialog(); - ZTRACE_RUNTIME("Initializing Main Dialog - ok"); - - ZTRACE_RUNTIME("Restoring Window Position"); - RestoreWindowPosition(this, "Position"); - ZTRACE_RUNTIME("Restoring Window Position - ok"); - - CRect rect; - GetWindowRect(&rect); - - widget = new QWinWidget(this); - widget->setObjectName("winWidget"); - - QHBoxLayout* horizontalLayout { new QHBoxLayout(widget) }; - horizontalLayout->setObjectName("horizontalLayout"); - //widget->setLayout(horizontalLayout); - //ZTRACE_RUNTIME("Creating Horizontal Splitter"); - //splitter = new QSplitter(Qt::Horizontal, widget); - //splitter->setObjectName("splitter"); - - ZTRACE_RUNTIME("Creating Explorer Bar (Left Panel)"); - explorerBar = new ExplorerBar(widget); - explorerBar->setObjectName("explorerBar"); - //horizontalLayout->addWidget(explorerBar); - - //ZTRACE_RUNTIME("Creating stackedWidget"); - //stackedWidget = new QStackedWidget(splitter); - //stackedWidget->setObjectName("stackedWidget"); - //splitter->addWidget(stackedWidget); - - ZTRACE_RUNTIME("Creating Stacking Panel"); - stackingDlg = new DSS::StackingDlg(widget); - stackingDlg->setObjectName("stackingDlg"); - //horizontalLayout->addWidget(stackingDlg); - - //ZTRACE_RUNTIME("Adding Stacking Panel to stackedWidget"); - //stackedWidget->addWidget(stackingDlg); - - //winHost = new QWinHost(stackedWidget); - //winHost->setObjectName("winHost"); - //stackedWidget->addWidget(winHost); - - ZTRACE_RUNTIME("Creating Processing Panel"); - processingDlg.Create(IDD_PROCESSING, this); - - //winHost->setWindow(processingDlg.m_hWnd); - - //splitter->setStretchFactor(1, 1); // Want Stacking part to take any spare space. - - //horizontalLayout->addWidget(splitter); - widget->setSizePolicy(QSizePolicy::Expanding, QSizePolicy::Expanding); - - //stackedWidget->show(); - //splitter->show(); - widget->show(); - - CString strMask; - CString strTitle; - - // - // The call to CWnd::DragAcceptFiles() was moved here from DeepSkyStacker.cpp because it can only be called once - // the HWND for the dialog is valid (not nullptr). - // - // This HWND is only valid once CDialog::OnInitDialog() above has been called. - // - this->DragAcceptFiles(true); - - GetWindowText(strMask); - strTitle.Format(strMask, _T(VERSION_DEEPSKYSTACKER)); - SetWindowText(strTitle); - m_strBaseTitle = strTitle; - - SetIcon(AfxGetApp()->LoadIcon(IDI_APP), true); - stackingDlg->setStartingFileList(m_strStartFileList); - - - //m_dlgLibrary.Create(IDD_LIBRARY, this); - - - CurrentTab = IDD_REGISTERING; - ZTRACE_RUNTIME("Updating All Panels"); - UpdateTab(); - ZTRACE_RUNTIME("Updating All Panels - ok"); - ZTRACE_RUNTIME("Updating Sizes"); - UpdateSizes(); - ZTRACE_RUNTIME("Updating Sizes - ok"); - - ShowWindow(true); - return true; // return true unless you set the focus to a control - // EXCEPTION: OCX Property Pages should return false -} - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::SetCurrentFileInTitle(LPCTSTR szFile) -{ - CString strFileName = szFile; - if (strFileName.GetLength()) - { - TCHAR szFileName[1+_MAX_FNAME]; - TCHAR szExt[1+_MAX_EXT]; - - _tsplitpath(szFile, nullptr, nullptr, szFileName, szExt); - - CString strTitle; - - strTitle.Format(_T("%s - %s%s"), (LPCTSTR)m_strBaseTitle, szFileName, szExt); - SetWindowText(strTitle); - } - else - SetWindowText(m_strBaseTitle); -}; - - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::OnDropFiles(HDROP hDropInfo) -{ - //if (hDropInfo && stackingDlg.m_hWnd) - //{ - // SetForegroundWindow(); - // BringWindowToTop(); - // SetActiveWindow(); - // stackingDlg.DropFiles(hDropInfo); - //}; -}; - -LRESULT CDeepStackerDlg::OnTaskbarButtonCreated(WPARAM /*wParam*/, LPARAM /*lParam*/) -{ - if (IsWindows7OrGreater()) - { - HRESULT hr = ::CoCreateInstance(CLSID_TaskbarList, nullptr, CLSCTX_INPROC_SERVER, IID_ITaskbarList3, reinterpret_cast(&m_taskbarList)); - - if (FAILED(hr)) - return 0; - - hr = m_taskbarList->HrInit(); - - m_taskbarList->SetProgressState(m_hWnd, TBPF_NORMAL); - } - return 0; -} - -LRESULT CDeepStackerDlg::OnProgressInit(WPARAM /*wParam*/, LPARAM /*lParam*/) -{ - if (IsWindows7OrGreater()) - { - m_taskbarList->SetProgressState(m_hWnd, TBPF_NORMAL); - - m_progress = true; - } - - - return 0; -} - -LRESULT CDeepStackerDlg::OnProgressUpdate(WPARAM wParam, LPARAM lParam) -{ - if (IsWindows7OrGreater()) - { - // do not update if progress wasn't started manually - if (m_progress) - m_taskbarList->SetProgressValue(m_hWnd, wParam, lParam); - } - - return 0; -} - -LRESULT CDeepStackerDlg::OnProgressStop(WPARAM /*wParam*/, LPARAM /*lParam*/) -{ - if (IsWindows7OrGreater()) - { - m_taskbarList->SetProgressState(m_hWnd, TBPF_NOPROGRESS); - - m_progress = false; - } - - return 0; -} - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::OnSize(UINT nType, int cx, int cy) -{ - CDialog::OnSize(nType, cx, cy); - - UpdateSizes(); - // Resize all dialogs -} - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::OnClose() -{ - if (// stackingDlg.SaveOnClose() && - processingDlg.SaveOnClose()) - { - SaveWindowPosition(this, "Position"); - - CDialog::OnClose(); - }; -} - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::OnBnClickedCancel() -{ - if (!(GetKeyState(VK_ESCAPE) & 0x8000)) - OnCancel(); -} - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::OnOK() -{ - -}; - -/* ------------------------------------------------------------------- */ - -LRESULT CDeepStackerDlg::OnHTMLHelp(WPARAM, LPARAM) -{ - OnHelp(); - return 1; -}; - -/* ------------------------------------------------------------------- */ - -void CDeepStackerDlg::OnHelp() -{ - //if (m_ExplorerBar.m_hWnd) - explorerBar->onHelp(); -}; - -/* ------------------------------------------------------------------- */ -/* ------------------------------------------------------------------- */ - -void SaveWindowPosition(CWnd * pWnd, LPCSTR szRegistryPath) -{ - ZFUNCTRACE_RUNTIME(); - std::uint32_t dwMaximized = 0; - std::uint32_t dwTop = 0; - std::uint32_t dwLeft = 0; - std::uint32_t dwWidth = 0; - std::uint32_t dwHeight = 0; - - QSettings settings; - - WINDOWPLACEMENT wp; - - memset(&wp, 0, sizeof(wp)); - wp.length = sizeof(wp); - - pWnd->GetWindowPlacement(&wp); - dwMaximized = (wp.showCmd == SW_SHOWMAXIMIZED); - dwLeft = wp.rcNormalPosition.left; - dwTop = wp.rcNormalPosition.top; - - dwWidth = wp.rcNormalPosition.right-wp.rcNormalPosition.left; - dwHeight = wp.rcNormalPosition.bottom-wp.rcNormalPosition.top; - - ZTRACE_RUNTIME("Saving window position to: %s", szRegistryPath); - QString regBase(szRegistryPath); - QString key = regBase + "/Maximized"; - settings.setValue(key, (uint)dwMaximized); - - key = regBase + "/Top"; - settings.setValue(key, (uint)dwTop); - - key = regBase + "/Left"; - settings.setValue(key, (uint)dwLeft); - - key = regBase + "/Width"; - settings.setValue(key, (uint)dwWidth); - - key = regBase + "/Height"; - settings.setValue(key, (uint)dwHeight); - -}; - -/* ------------------------------------------------------------------- */ - -void RestoreWindowPosition(CWnd * pWnd, LPCSTR szRegistryPath, bool bCenter) -{ - ZFUNCTRACE_RUNTIME(); - std::uint32_t dwMaximized = 0; - std::uint32_t dwTop = 0; - std::uint32_t dwLeft = 0; - std::uint32_t dwWidth = 0; - std::uint32_t dwHeight = 0; - - QSettings settings; - - ZTRACE_RUNTIME("Restoring window position from: %s", szRegistryPath); - - QString regBase(szRegistryPath); - QString key = regBase + "/Maximized"; - dwMaximized = settings.value(key).toUInt(); - - key = regBase + "/Top"; - dwTop = settings.value(key).toUInt(); - - key = regBase + "/Left"; - dwLeft = settings.value(key).toUInt(); - - key = regBase + "/Width"; - dwWidth = settings.value(key).toUInt(); - - key = regBase += "/Height"; - dwHeight = settings.value(key).toUInt(); - - if (dwTop && dwLeft && dwWidth && dwHeight) - { - WINDOWPLACEMENT wp; - - memset(&wp, 0, sizeof(wp)); - wp.length = sizeof(wp); - wp.flags = 0; - wp.showCmd = dwMaximized ? SW_SHOWMAXIMIZED : SW_SHOWNORMAL; - wp.rcNormalPosition.left = dwLeft; - wp.rcNormalPosition.top = dwTop; - wp.rcNormalPosition.right = wp.rcNormalPosition.left+dwWidth; - wp.rcNormalPosition.bottom = wp.rcNormalPosition.top+dwHeight; - - pWnd->SetWindowPlacement(&wp); - if (bCenter) - pWnd->CenterWindow(); - }; -}; - -/* ------------------------------------------------------------------- */ diff --git a/DeepSkyStacker/DeepStackerDlg.h b/DeepSkyStacker/DeepStackerDlg.h deleted file mode 100644 index 845eb18f..00000000 --- a/DeepSkyStacker/DeepStackerDlg.h +++ /dev/null @@ -1,386 +0,0 @@ -#pragma once -// DeepStackerDlg.h : header file -// - -#include "DeepStack.h" -#include "DeepSkyStacker.h" - -class CDSSSetting -{ -public : - CString m_strName; - CBezierAdjust m_BezierAdjust; - CRGBHistogramAdjust m_HistoAdjust; - -private : - void CopyFrom(const CDSSSetting & cds) - { - m_strName = cds.m_strName; - m_BezierAdjust = cds.m_BezierAdjust; - m_HistoAdjust = cds.m_HistoAdjust; - }; - -public : - CDSSSetting() {}; - virtual ~CDSSSetting() {}; - - CDSSSetting(const CDSSSetting & cds) - { - CopyFrom(cds); - }; - - CDSSSetting & operator = (const CDSSSetting & cds) - { - CopyFrom(cds); - return (*this); - }; - - bool operator < (const CDSSSetting & cds) const - { - int nCompare; - nCompare = m_strName.CompareNoCase(cds.m_strName); - - if (nCompare < 0) - return true; - else - return false; - }; - - bool Load(FILE * hFile) - { - int lNameSize; - TCHAR szName[2000] = { _T('\0') }; - - fread(&lNameSize, sizeof(lNameSize), 1, hFile); - fread(szName, sizeof(TCHAR), lNameSize, hFile); - m_strName = szName; - return m_BezierAdjust.Load(hFile) && m_HistoAdjust.Load(hFile); - }; - - bool Save(FILE * hFile) - { - int lNameSize = m_strName.GetLength() + 1; - fwrite(&lNameSize, sizeof(lNameSize), 1, hFile); - fwrite((LPCTSTR)m_strName, sizeof(TCHAR), lNameSize, hFile); - - return m_BezierAdjust.Save(hFile) && m_HistoAdjust.Save(hFile); - }; -}; - -typedef std::list DSSSETTINGLIST; -typedef DSSSETTINGLIST::iterator DSSSETTINGITERATOR; - -class CDSSSettings -{ -private : - std::list m_lSettings; - bool m_bLoaded; - -public : - CDSSSettings() - { - m_bLoaded = false; - }; - virtual ~CDSSSettings() {}; - - bool IsLoaded() - { - return m_bLoaded; - }; - bool Load(LPCTSTR szFile = nullptr); - bool Save(LPCTSTR szFile = nullptr); - - int Count() - { - return static_cast(m_lSettings.size()); - }; - - bool GetItem(int lIndice, CDSSSetting & cds) - { - bool bResult = false; - - if (lIndice < m_lSettings.size()) - { - DSSSETTINGITERATOR it; - - it = m_lSettings.begin(); - while (lIndice) - { - it++; - lIndice--; - }; - - cds = (*it); - bResult = true; - }; - - return bResult; - }; - - bool Add(const CDSSSetting & cds) - { - m_lSettings.push_back(cds); - return true; - }; - - bool Remove(int lIndice) - { - bool bResult = false; - - if (lIndice < m_lSettings.size()) - { - DSSSETTINGITERATOR it; - - it = m_lSettings.begin(); - while (lIndice) - { - it++; - lIndice--; - }; - - m_lSettings.erase(it); - bResult = true; - }; - - return bResult; - }; -}; -class QSplitter; -class QStackedWidget; - - -#include "ExplorerBar.h" -#include "StackingDlg.h" - -#include "ProcessingDlg.h" -//#include "LibraryDlg.h" - -#include "afxwin.h" - -#include "qwinwidget.h" -#include "qwinhost.h" - - - -///////////////////////////////////////////////////////////////////////////// -// CDeepStackerDlg dialog - -enum DeepStackerDlgMessages -{ - WM_PROGRESS_INIT = WM_USER + 10000, - WM_PROGRESS_UPDATE, - WM_PROGRESS_STOP, -}; - -class CDeepStackerDlg : public CDialog -{ -private : - QWinWidget* widget; - QSplitter* splitter; - ExplorerBar* explorerBar; - QStackedWidget* stackedWidget; - DSS::StackingDlg* stackingDlg; - QWinHost* winHost; - - CProcessingDlg processingDlg; - //CLibraryDlg m_dlgLibrary; - - CDeepStack m_DeepStack; - CDSSSettings m_Settings; - std::uint32_t CurrentTab; - CString m_strStartFileList; - CString m_strBaseTitle; - ITaskbarList3* m_taskbarList; - bool m_progress; - -// Construction -public: - CDeepStackerDlg(CWnd* pParent = nullptr); // standard constructor - - ~CDeepStackerDlg() - { - if (explorerBar) - delete explorerBar; - if (stackingDlg) - delete stackingDlg; - if (widget) - delete widget; - }; - - void ChangeTab(std::uint32_t dwTabID); - std::uint32_t GetCurrentTab() - { - return CurrentTab; - }; - - void SetStartingFileList(LPCTSTR szStartFileList) - { - m_strStartFileList = szStartFileList; - }; - - inline void disableSubDialogs() - { - stackingDlg->setEnabled(false); - processingDlg.EnableWindow(false); - //m_dlgLibrary.EnableWindow(false); - explorerBar->setEnabled(false); - }; - - inline void enableSubDialogs() - { - stackingDlg->setEnabled(true); - processingDlg.EnableWindow(true); - //m_dlgLibrary.EnableWindow(true); - explorerBar->setEnabled(true); - }; - -// Dialog Data - //{{AFX_DATA(CDeepStackerDlg) - enum { IDD = IDD_DEEPSTACKER }; - CStatic m_BarStatic; - //}}AFX_DATA - - CDeepStack & GetDeepStack() - { - return m_DeepStack; - }; - - CDSSSettings & GetDSSSettings() - { - if (!m_Settings.IsLoaded()) - m_Settings.Load(); - - return m_Settings; - }; - - DSS::StackingDlg & GetStackingDlg() - { - return *stackingDlg; - }; - - CProcessingDlg & GetProcessingDlg() - { - return processingDlg; - }; - - - ExplorerBar & GetExplorerBar() - { - return *explorerBar; - }; - - void SetCurrentFileInTitle(LPCTSTR szFileName); - -// Overrides - // ClassWizard generated virtual function overrides - //{{AFX_VIRTUAL(CDeepStackerDlg) - protected: - virtual void DoDataExchange(CDataExchange* pDX); // DDX/DDV support - //}}AFX_VIRTUAL - -// Implementation -protected: - - // Generated message map functions - //{{AFX_MSG(CDeepStackerDlg) - virtual BOOL OnInitDialog(); - afx_msg void OnSize(UINT nType, int cx, int cy); - afx_msg BOOL OnEraseBkgnd(CDC * pDC); - //}}AFX_MSG - afx_msg LRESULT OnHTMLHelp(WPARAM, LPARAM); - afx_msg LRESULT OnOpenStartFileList(WPARAM, LPARAM); - - DECLARE_MESSAGE_MAP() - -private : - void UpdateTab(); - void UpdateSizes(); - afx_msg void OnOK(); - afx_msg void OnClose(); - afx_msg void OnBnClickedCancel(); - afx_msg void OnHelp(); - - afx_msg void OnDropFiles(HDROP hDropInfo); - afx_msg LRESULT OnTaskbarButtonCreated(WPARAM wParam, LPARAM lParam); - afx_msg LRESULT OnProgressInit(WPARAM wParam, LPARAM lParam); - afx_msg LRESULT OnProgressUpdate(WPARAM wParam, LPARAM lParam); - afx_msg LRESULT OnProgressStop(WPARAM wParam, LPARAM lParam); -}; - -/* ------------------------------------------------------------------- */ - -inline CDeepStackerDlg * GetDeepStackerDlg(CWnd * pDialog) -{ - if (pDialog) - { - CWnd * pParent = pDialog->GetParent(); - CDeepStackerDlg * pDlg = dynamic_cast(pParent); - - if (!pDlg) - pDlg = dynamic_cast(GetDSSApp()->m_pMainDlg); - - return pDlg; - } - else - { - CWnd * pWnd = GetDSSApp()->m_pMainDlg; - CDeepStackerDlg * pDlg = dynamic_cast(pWnd); - - return pDlg; - }; -}; - -/* ------------------------------------------------------------------- */ - -inline CDeepStack & GetDeepStack(CWnd * pDialog) -{ - CDeepStackerDlg * pDlg = GetDeepStackerDlg(pDialog); - - return pDlg->GetDeepStack(); -}; - -/* ------------------------------------------------------------------- */ - -inline CDSSSettings & GetDSSSettings(CWnd * pDialog) -{ - CDeepStackerDlg * pDlg = GetDeepStackerDlg(pDialog); - - return pDlg->GetDSSSettings(); -}; - -/* ------------------------------------------------------------------- */ - -inline DSS::StackingDlg & GetStackingDlg(CWnd * pDialog) -{ - CDeepStackerDlg * pDlg = GetDeepStackerDlg(pDialog); - - return pDlg->GetStackingDlg(); -}; - -/* ------------------------------------------------------------------- */ - -inline CProcessingDlg & GetProcessingDlg(CWnd * pDialog) -{ - CDeepStackerDlg * pDlg = GetDeepStackerDlg(pDialog); - - return pDlg->GetProcessingDlg(); -}; - - -/* ------------------------------------------------------------------- */ - -inline void SetCurrentFileInTitle(LPCTSTR szFileName) -{ - CDeepStackerDlg * pDlg = GetDeepStackerDlg(nullptr); - - if (pDlg) - pDlg->SetCurrentFileInTitle(szFileName); -}; - -/* ------------------------------------------------------------------- */ - -void SaveWindowPosition(CWnd * pWnd, LPCSTR szRegistryPath); -void RestoreWindowPosition(CWnd * pWnd, LPCSTR szRegistryPath, bool bCenter = false); - -//{{AFX_INSERT_LOCATION}} -// Microsoft Visual C++ will insert additional declarations immediately before the previous line. \ No newline at end of file diff --git a/DeepSkyStacker/RGBTab.cpp b/DeepSkyStacker/RGBTab.cpp index 440e6522..a041be3e 100644 --- a/DeepSkyStacker/RGBTab.cpp +++ b/DeepSkyStacker/RGBTab.cpp @@ -260,26 +260,13 @@ static void PopulateHistoAdjustMenu(CMenu & menu) menu.CreatePopupMenu(); - HistoAdjustTypeText(HAT_LINEAR, strText); - menu.AppendMenu(MF_STRING, HAT_LINEAR, strText); - - HistoAdjustTypeText(HAT_CUBEROOT, strText); - menu.AppendMenu(MF_STRING, HAT_CUBEROOT, strText); - - HistoAdjustTypeText(HAT_SQUAREROOT, strText); - menu.AppendMenu(MF_STRING, HAT_SQUAREROOT, strText); - - HistoAdjustTypeText(HAT_LOG, strText); - menu.AppendMenu(MF_STRING, HAT_LOG, strText); - - HistoAdjustTypeText(HAT_LOGLOG, strText); - menu.AppendMenu(MF_STRING, HAT_LOGLOG, strText); - - HistoAdjustTypeText(HAT_LOGSQUAREROOT, strText); - menu.AppendMenu(MF_STRING, HAT_LOGSQUAREROOT, strText); - - HistoAdjustTypeText(HAT_ASINH, strText); - menu.AppendMenu(MF_STRING, HAT_ASINH, strText); + menu.AppendMenu(MF_STRING, HAT_LINEAR, HistoAdjustTypeText(HAT_LINEAR).toStdWString().c_str()); + menu.AppendMenu(MF_STRING, HAT_CUBEROOT, HistoAdjustTypeText(HAT_CUBEROOT).toStdWString().c_str()); + menu.AppendMenu(MF_STRING, HAT_SQUAREROOT, HistoAdjustTypeText(HAT_SQUAREROOT).toStdWString().c_str()); + menu.AppendMenu(MF_STRING, HAT_LOG, HistoAdjustTypeText(HAT_LOG).toStdWString().c_str()); + menu.AppendMenu(MF_STRING, HAT_LOGLOG, HistoAdjustTypeText(HAT_LOGLOG).toStdWString().c_str()); + menu.AppendMenu(MF_STRING, HAT_LOGSQUAREROOT, HistoAdjustTypeText(HAT_LOGSQUAREROOT).toStdWString().c_str()); + menu.AppendMenu(MF_STRING, HAT_ASINH, HistoAdjustTypeText(HAT_ASINH).toStdWString().c_str()); } void CRGBTab::OnRedHat() diff --git a/DeepSkyStacker/RGBTab.h b/DeepSkyStacker/RGBTab.h index fa1d1380..93d9cc47 100644 --- a/DeepSkyStacker/RGBTab.h +++ b/DeepSkyStacker/RGBTab.h @@ -3,6 +3,7 @@ #include "Histogram.h" #include "BtnST.h" #include "gradientctrl.h" +#include "resource.h" ///////////////////////////////////////////////////////////////////////////// // CRGBTab dialog diff --git a/DeepSkyStacker/SettingsDlg.cpp b/DeepSkyStacker/SettingsDlg.cpp index 1763dac1..8c24ff39 100644 --- a/DeepSkyStacker/SettingsDlg.cpp +++ b/DeepSkyStacker/SettingsDlg.cpp @@ -5,6 +5,7 @@ #include "deepskystacker.h" #include "SettingsDlg.h" #include "dss_settings.h" +#include "resource.h" ///////////////////////////////////////////////////////////////////////////// // CSettingsDlg dialog diff --git a/DeepSkyStacker/SettingsDlg.h b/DeepSkyStacker/SettingsDlg.h index e2087fcb..675c370d 100644 --- a/DeepSkyStacker/SettingsDlg.h +++ b/DeepSkyStacker/SettingsDlg.h @@ -1,5 +1,6 @@ #pragma once #include "dss_settings.h" +#include "resource.h" ///////////////////////////////////////////////////////////////////////////// // CSettingsDlg dialog diff --git a/DeepSkyStacker/Version.rc b/DeepSkyStacker/Version.rc index 8f5f0114..fc594dfe 100644 --- a/DeepSkyStacker/Version.rc +++ b/DeepSkyStacker/Version.rc @@ -3,7 +3,7 @@ // Version // -#include "DSSVersion.h" +#include "./../DeepSkyStackerKernel/DSSVersion.h" VS_VERSION_INFO VERSIONINFO FILEVERSION DSSVER_MAJOR,DSSVER_MINOR,DSSVER_SUB,DSSVER_BUILD diff --git a/DeepSkyStackerCL/DeepSkyStackerCL.vcxproj b/DeepSkyStackerCL/DeepSkyStackerCL.vcxproj index a0c38b57..83810be4 100644 --- a/DeepSkyStackerCL/DeepSkyStackerCL.vcxproj +++ b/DeepSkyStackerCL/DeepSkyStackerCL.vcxproj @@ -88,7 +88,7 @@ false EnableFastChecks MultiThreadedDebugDLL - .\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;C:\Program Files (x86)\Visual Leak Detector\include;.\;..\;..\DeepSkyStacker;../Libraw;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) + .\..\DeepSkyStackerKernel;.\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;C:\Program Files (x86)\Visual Leak Detector\include;.\;..\;..\DeepSkyStacker;../Libraw;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) $(OutDir)$(TargetName).pdb stdcpp20 true @@ -129,7 +129,7 @@ X64 - .\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;.\;..\;..\DeepSkyStacker;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;../libraw;$(Boost_1_80_0);%(AdditionalIncludeDirectories) + .\..\DeepSkyStackerKernel;.\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;.\;..\;..\DeepSkyStacker;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;../libraw;$(Boost_1_80_0);%(AdditionalIncludeDirectories) _UNICODE;UNICODE;NOMINMAX;LIBRAW_NODLL;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;USE_LIBTIFF_STATIC;%(PreprocessorDefinitions) MultiThreadedDLL $(OutDir)$(TargetName).pdb @@ -171,65 +171,7 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Use - Use - - - Use - Use - - - - - - - - - - - - - - - input %(Filename).moc @@ -242,69 +184,14 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -323,6 +210,9 @@ + + {cb7b75f1-08f4-4c8d-a7ef-2aa33e9a67f1} + {a2f500c6-6903-4c2d-906d-ce86b99ba50d} diff --git a/DeepSkyStackerCL/DeepSkyStackerCL.vcxproj.filters b/DeepSkyStackerCL/DeepSkyStackerCL.vcxproj.filters index 89d7ae2d..db98e87c 100644 --- a/DeepSkyStackerCL/DeepSkyStackerCL.vcxproj.filters +++ b/DeepSkyStackerCL/DeepSkyStackerCL.vcxproj.filters @@ -13,12 +13,6 @@ {67DA6AB6-F800-4c08-8B7A-83BB121AAD01} rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav - - {7a33a916-abde-4e02-8cfc-986c77615128} - - - {44a3746c-269b-4d46-913f-ddbd39bb1268} - @@ -27,346 +21,28 @@ Source Files - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - Source Files - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Source Files - Header Files - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Source Files - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - Header Files - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel - - - Kernel + + Header Files - - Kernel + + Header Files - + Header Files - - Resource Files - Resource Files @@ -659,11 +335,8 @@ - - Kernel - - Source Files + Header Files \ No newline at end of file diff --git a/DeepSkyStacker/AHDDemosaicing.cpp b/DeepSkyStackerKernel/AHDDemosaicing.cpp similarity index 100% rename from DeepSkyStacker/AHDDemosaicing.cpp rename to DeepSkyStackerKernel/AHDDemosaicing.cpp diff --git a/DeepSkyStacker/AHDDemosaicing.h b/DeepSkyStackerKernel/AHDDemosaicing.h similarity index 100% rename from DeepSkyStacker/AHDDemosaicing.h rename to DeepSkyStackerKernel/AHDDemosaicing.h diff --git a/DeepSkyStacker/BackgroundCalibration.cpp b/DeepSkyStackerKernel/BackgroundCalibration.cpp similarity index 99% rename from DeepSkyStacker/BackgroundCalibration.cpp rename to DeepSkyStackerKernel/BackgroundCalibration.cpp index 48a7cb32..18300b68 100644 --- a/DeepSkyStacker/BackgroundCalibration.cpp +++ b/DeepSkyStackerKernel/BackgroundCalibration.cpp @@ -1,5 +1,5 @@ #include -#include "resource.h" +//#include "resource.h" #include "BitmapBase.h" #include "BackgroundCalibration.h" #include "DSSProgress.h" diff --git a/DeepSkyStacker/BackgroundCalibration.h b/DeepSkyStackerKernel/BackgroundCalibration.h similarity index 100% rename from DeepSkyStacker/BackgroundCalibration.h rename to DeepSkyStackerKernel/BackgroundCalibration.h diff --git a/DeepSkyStacker/Bayer.cpp b/DeepSkyStackerKernel/Bayer.cpp similarity index 100% rename from DeepSkyStacker/Bayer.cpp rename to DeepSkyStackerKernel/Bayer.cpp diff --git a/DeepSkyStacker/Bayer.h b/DeepSkyStackerKernel/Bayer.h similarity index 100% rename from DeepSkyStacker/Bayer.h rename to DeepSkyStackerKernel/Bayer.h diff --git a/DeepSkyStacker/BezierAdjust.h b/DeepSkyStackerKernel/BezierAdjust.h similarity index 100% rename from DeepSkyStacker/BezierAdjust.h rename to DeepSkyStackerKernel/BezierAdjust.h diff --git a/DeepSkyStacker/BilinearParameters.cpp b/DeepSkyStackerKernel/BilinearParameters.cpp similarity index 100% rename from DeepSkyStacker/BilinearParameters.cpp rename to DeepSkyStackerKernel/BilinearParameters.cpp diff --git a/DeepSkyStacker/BilinearParameters.h b/DeepSkyStackerKernel/BilinearParameters.h similarity index 100% rename from DeepSkyStacker/BilinearParameters.h rename to DeepSkyStackerKernel/BilinearParameters.h diff --git a/DeepSkyStacker/BitMapFiller.cpp b/DeepSkyStackerKernel/BitMapFiller.cpp similarity index 97% rename from DeepSkyStacker/BitMapFiller.cpp rename to DeepSkyStackerKernel/BitMapFiller.cpp index 0f268d1d..58895200 100644 --- a/DeepSkyStacker/BitMapFiller.cpp +++ b/DeepSkyStackerKernel/BitMapFiller.cpp @@ -1,210 +1,210 @@ -#include "stdafx.h" -#include "BitMapFiller.h" -#include "avx_bitmap_filler.h" -#include "avx_support.h" -#include "DSSProgress.h" -#include "ZExcept.h" - -using namespace DSS; - -// ---------------------------------- -// BitmapFillerInterface -// ---------------------------------- - -std::unique_ptr BitmapFillerInterface::makeBitmapFiller(CMemoryBitmap* pBitmap, ProgressBase* pProgress, const double redWb, const double greenWb, const double blueWb) -{ - if (AvxSupport::checkSimdAvailability()) - return std::make_unique(pBitmap, pProgress, redWb, greenWb, blueWb); - else - return std::make_unique(pBitmap, pProgress, redWb, greenWb, blueWb); -} - -bool BitmapFillerInterface::isThreadSafe() const { return false; } - - -// ---------------------------------- -// BitmapFillerBase -// ---------------------------------- - -BitmapFillerBase::BitmapFillerBase(CMemoryBitmap* pB, ProgressBase* pP, const double redWb, const double greenWb, const double blueWb) : - pBitmap{ pB }, - pProgress{ pP }, - redScale{ static_cast(redWb) }, - greenScale{ static_cast(greenWb) }, - blueScale{ static_cast(blueWb) }, - - cfaType{ CFATYPE_NONE }, - isGray{ true }, - width{ 0 }, - height{ 0 }, - bytesPerChannel{ 0 }, - redBuffer{}, - greenBuffer{}, - blueBuffer{}, - cfaFactors{ 1.0f, 1.0f, 1.0f, 1.0f } -{} - -void BitmapFillerBase::SetCFAType(CFATYPE cfaTp) -{ - this->cfaType = cfaTp; - if (auto* pGray16Bitmap = dynamic_cast(pBitmap)) - pGray16Bitmap->SetCFAType(cfaTp); - setCfaFactors(); -} - -void BitmapFillerBase::setCfaFactors() -{ - const auto setFactors = [this](const float f0, const float f1, const float f2, const float f3) -> void - { - this->cfaFactors.assign({ f0, f1, f2, f3 }); - }; - - switch (cfaType) - { - case CFATYPE_BGGR: return setFactors(blueScale, greenScale, greenScale, redScale); - case CFATYPE_GRBG: return setFactors(greenScale, redScale, blueScale, greenScale); - case CFATYPE_GBRG: return setFactors(greenScale, blueScale, redScale, greenScale); - case CFATYPE_RGGB: return setFactors(redScale, greenScale, greenScale, blueScale); - } -}; - -bool BitmapFillerBase::isRgbBayerPattern() const -{ - switch (this->cfaType) - { - case CFATYPE_BGGR: - case CFATYPE_GRBG: - case CFATYPE_GBRG: - case CFATYPE_RGGB: return true; - } - return false; -} - -void BitmapFillerBase::setGrey(bool grey) -{ - this->isGray = grey; -} - -void BitmapFillerBase::setWidth(int w) -{ - this->width = w; -} - -void BitmapFillerBase::setHeight(int h) -{ - this->height = h; - if (pProgress != nullptr) - pProgress->Start2(pBitmap->Height()); -} - -void BitmapFillerBase::setMaxColors(int maxcolors) -{ - this->bytesPerChannel = maxcolors > 255 ? 2 : 1; -} - - -// --------------------------------- -// Non-AVX Bitmap Filler -// --------------------------------- - -NonAvxBitmapFiller::NonAvxBitmapFiller(CMemoryBitmap* pB, ProgressBase* pP, const double redWb, const double greenWb, const double blueWb) : - BitmapFillerBase{ pB, pP, redWb, greenWb, blueWb } -{} - -bool NonAvxBitmapFiller::isThreadSafe() const { return true; } - -std::unique_ptr NonAvxBitmapFiller::clone() -{ - return std::make_unique(*this); -} - -size_t NonAvxBitmapFiller::Write(const void* source, const size_t bytesPerPixel, const size_t nrPixels, const size_t rowIndex) -{ - ZASSERTSTATE(0 != this->width); - ZASSERTSTATE(0 != this->height); - ZASSERTSTATE(0 != this->bytesPerChannel); - ZASSERTSTATE((nrPixels % static_cast(this->width)) == 0); - - if (this->isGray) - { - ZASSERTSTATE(bytesPerPixel == this->bytesPerChannel); - // constexpr size_t vectorLen = 16; - redBuffer.resize(nrPixels); - - if (this->bytesPerChannel == 1) - { - const std::uint8_t* const pData = static_cast(source); - for (size_t i = 0; i < nrPixels; ++i) - redBuffer[i] = static_cast(static_cast(pData[i]) << 8); - } - else - { - const std::uint16_t* const pData = static_cast(source); - for (size_t i = 0; i < nrPixels; ++i) - redBuffer[i] = static_cast(_byteswap_ushort(pData[i])); // Load an convert to little endian - } - - if (this->isRgbBayerPattern()) - { - const size_t y = 2 * (rowIndex % 2); // 0, 2, 0, 2, ... - const float adjustFactors[2] = { this->cfaFactors[y], this->cfaFactors[y + 1] }; // {0, 1} or {2, 3}, depending on the line number. - for (size_t i = 0; i < nrPixels; ++i) - redBuffer[i] = adjustColor(redBuffer[i], adjustFactors[i % 2]); - } - - auto* pGray16Bitmap = dynamic_cast(pBitmap); - ZASSERTSTATE(pGray16Bitmap != nullptr); - std::uint16_t* const pOut = pGray16Bitmap->m_vPixels.data() + rowIndex * nrPixels; - for (size_t i = 0; i < nrPixels; ++i) - pOut[i] = static_cast(redBuffer[i]); - } - else - { - ZASSERTSTATE(bytesPerPixel == this->bytesPerChannel * 3); - redBuffer.resize(nrPixels); - greenBuffer.resize(nrPixels); - blueBuffer.resize(nrPixels); - - if (this->bytesPerChannel == 1) - { - const std::uint8_t* pData = static_cast(source); - for (size_t i = 0; i < nrPixels; ++i, pData += 3) - { - redBuffer[i] = static_cast(static_cast(pData[0]) << 8); - greenBuffer[i] = static_cast(static_cast(pData[1]) << 8); - blueBuffer[i] = static_cast(static_cast(pData[2]) << 8); - } - } - else - { - const std::uint16_t* pData = static_cast(source); - for (size_t i = 0; i < nrPixels; ++i, pData += 3) - { - redBuffer[i] = static_cast(_byteswap_ushort(pData[0])); - greenBuffer[i] = static_cast(_byteswap_ushort(pData[1])); - blueBuffer[i] = static_cast(_byteswap_ushort(pData[2])); - } - } - - std::for_each(redBuffer.begin(), redBuffer.end(), [this](float& v) { v = adjustColor(v, this->redScale); }); - std::for_each(greenBuffer.begin(), greenBuffer.end(), [this](float& v) { v = adjustColor(v, this->greenScale); }); - std::for_each(blueBuffer.begin(), blueBuffer.end(), [this](float& v) { v = adjustColor(v, this->blueScale); }); - - auto* pColor16Bitmap = dynamic_cast(pBitmap); - ZASSERTSTATE(pColor16Bitmap != nullptr); - std::uint16_t* const pOutRed = pColor16Bitmap->m_Red.m_vPixels.data() + rowIndex * nrPixels; - std::uint16_t* const pOutGreen = pColor16Bitmap->m_Green.m_vPixels.data() + rowIndex * nrPixels; - std::uint16_t* const pOutBlue = pColor16Bitmap->m_Blue.m_vPixels.data() + rowIndex * nrPixels; - for (size_t i = 0; i < nrPixels; ++i) - { - pOutRed[i] = static_cast(redBuffer[i]); - pOutGreen[i] = static_cast(greenBuffer[i]); - pOutBlue[i] = static_cast(blueBuffer[i]); - } - } - - //if (((rowIndex + 1) % 32) == 0 && this->pProgress != nullptr) - // this->pProgress->Progress2(static_cast(rowIndex + 1)); - - return nrPixels; -} +#include "stdafx.h" +#include "BitMapFiller.h" +#include "avx_bitmap_filler.h" +#include "avx_support.h" +#include "DSSProgress.h" +#include "ZExcept.h" + +using namespace DSS; + +// ---------------------------------- +// BitmapFillerInterface +// ---------------------------------- + +std::unique_ptr BitmapFillerInterface::makeBitmapFiller(CMemoryBitmap* pBitmap, ProgressBase* pProgress, const double redWb, const double greenWb, const double blueWb) +{ + if (AvxSupport::checkSimdAvailability()) + return std::make_unique(pBitmap, pProgress, redWb, greenWb, blueWb); + else + return std::make_unique(pBitmap, pProgress, redWb, greenWb, blueWb); +} + +bool BitmapFillerInterface::isThreadSafe() const { return false; } + + +// ---------------------------------- +// BitmapFillerBase +// ---------------------------------- + +BitmapFillerBase::BitmapFillerBase(CMemoryBitmap* pB, ProgressBase* pP, const double redWb, const double greenWb, const double blueWb) : + pBitmap{ pB }, + pProgress{ pP }, + redScale{ static_cast(redWb) }, + greenScale{ static_cast(greenWb) }, + blueScale{ static_cast(blueWb) }, + + cfaType{ CFATYPE_NONE }, + isGray{ true }, + width{ 0 }, + height{ 0 }, + bytesPerChannel{ 0 }, + redBuffer{}, + greenBuffer{}, + blueBuffer{}, + cfaFactors{ 1.0f, 1.0f, 1.0f, 1.0f } +{} + +void BitmapFillerBase::SetCFAType(CFATYPE cfaTp) +{ + this->cfaType = cfaTp; + if (auto* pGray16Bitmap = dynamic_cast(pBitmap)) + pGray16Bitmap->SetCFAType(cfaTp); + setCfaFactors(); +} + +void BitmapFillerBase::setCfaFactors() +{ + const auto setFactors = [this](const float f0, const float f1, const float f2, const float f3) -> void + { + this->cfaFactors.assign({ f0, f1, f2, f3 }); + }; + + switch (cfaType) + { + case CFATYPE_BGGR: return setFactors(blueScale, greenScale, greenScale, redScale); + case CFATYPE_GRBG: return setFactors(greenScale, redScale, blueScale, greenScale); + case CFATYPE_GBRG: return setFactors(greenScale, blueScale, redScale, greenScale); + case CFATYPE_RGGB: return setFactors(redScale, greenScale, greenScale, blueScale); + } +}; + +bool BitmapFillerBase::isRgbBayerPattern() const +{ + switch (this->cfaType) + { + case CFATYPE_BGGR: + case CFATYPE_GRBG: + case CFATYPE_GBRG: + case CFATYPE_RGGB: return true; + } + return false; +} + +void BitmapFillerBase::setGrey(bool grey) +{ + this->isGray = grey; +} + +void BitmapFillerBase::setWidth(int w) +{ + this->width = w; +} + +void BitmapFillerBase::setHeight(int h) +{ + this->height = h; + if (pProgress != nullptr) + pProgress->Start2(pBitmap->Height()); +} + +void BitmapFillerBase::setMaxColors(int maxcolors) +{ + this->bytesPerChannel = maxcolors > 255 ? 2 : 1; +} + + +// --------------------------------- +// Non-AVX Bitmap Filler +// --------------------------------- + +NonAvxBitmapFiller::NonAvxBitmapFiller(CMemoryBitmap* pB, ProgressBase* pP, const double redWb, const double greenWb, const double blueWb) : + BitmapFillerBase{ pB, pP, redWb, greenWb, blueWb } +{} + +bool NonAvxBitmapFiller::isThreadSafe() const { return true; } + +std::unique_ptr NonAvxBitmapFiller::clone() +{ + return std::make_unique(*this); +} + +size_t NonAvxBitmapFiller::Write(const void* source, const size_t bytesPerPixel, const size_t nrPixels, const size_t rowIndex) +{ + ZASSERTSTATE(0 != this->width); + ZASSERTSTATE(0 != this->height); + ZASSERTSTATE(0 != this->bytesPerChannel); + ZASSERTSTATE((nrPixels % static_cast(this->width)) == 0); + + if (this->isGray) + { + ZASSERTSTATE(bytesPerPixel == this->bytesPerChannel); + // constexpr size_t vectorLen = 16; + redBuffer.resize(nrPixels); + + if (this->bytesPerChannel == 1) + { + const std::uint8_t* const pData = static_cast(source); + for (size_t i = 0; i < nrPixels; ++i) + redBuffer[i] = static_cast(static_cast(pData[i]) << 8); + } + else + { + const std::uint16_t* const pData = static_cast(source); + for (size_t i = 0; i < nrPixels; ++i) + redBuffer[i] = static_cast(_byteswap_ushort(pData[i])); // Load an convert to little endian + } + + if (this->isRgbBayerPattern()) + { + const size_t y = 2 * (rowIndex % 2); // 0, 2, 0, 2, ... + const float adjustFactors[2] = { this->cfaFactors[y], this->cfaFactors[y + 1] }; // {0, 1} or {2, 3}, depending on the line number. + for (size_t i = 0; i < nrPixels; ++i) + redBuffer[i] = adjustColor(redBuffer[i], adjustFactors[i % 2]); + } + + auto* pGray16Bitmap = dynamic_cast(pBitmap); + ZASSERTSTATE(pGray16Bitmap != nullptr); + std::uint16_t* const pOut = pGray16Bitmap->m_vPixels.data() + rowIndex * nrPixels; + for (size_t i = 0; i < nrPixels; ++i) + pOut[i] = static_cast(redBuffer[i]); + } + else + { + ZASSERTSTATE(bytesPerPixel == this->bytesPerChannel * 3); + redBuffer.resize(nrPixels); + greenBuffer.resize(nrPixels); + blueBuffer.resize(nrPixels); + + if (this->bytesPerChannel == 1) + { + const std::uint8_t* pData = static_cast(source); + for (size_t i = 0; i < nrPixels; ++i, pData += 3) + { + redBuffer[i] = static_cast(static_cast(pData[0]) << 8); + greenBuffer[i] = static_cast(static_cast(pData[1]) << 8); + blueBuffer[i] = static_cast(static_cast(pData[2]) << 8); + } + } + else + { + const std::uint16_t* pData = static_cast(source); + for (size_t i = 0; i < nrPixels; ++i, pData += 3) + { + redBuffer[i] = static_cast(_byteswap_ushort(pData[0])); + greenBuffer[i] = static_cast(_byteswap_ushort(pData[1])); + blueBuffer[i] = static_cast(_byteswap_ushort(pData[2])); + } + } + + std::for_each(redBuffer.begin(), redBuffer.end(), [this](float& v) { v = adjustColor(v, this->redScale); }); + std::for_each(greenBuffer.begin(), greenBuffer.end(), [this](float& v) { v = adjustColor(v, this->greenScale); }); + std::for_each(blueBuffer.begin(), blueBuffer.end(), [this](float& v) { v = adjustColor(v, this->blueScale); }); + + auto* pColor16Bitmap = dynamic_cast(pBitmap); + ZASSERTSTATE(pColor16Bitmap != nullptr); + std::uint16_t* const pOutRed = pColor16Bitmap->m_Red.m_vPixels.data() + rowIndex * nrPixels; + std::uint16_t* const pOutGreen = pColor16Bitmap->m_Green.m_vPixels.data() + rowIndex * nrPixels; + std::uint16_t* const pOutBlue = pColor16Bitmap->m_Blue.m_vPixels.data() + rowIndex * nrPixels; + for (size_t i = 0; i < nrPixels; ++i) + { + pOutRed[i] = static_cast(redBuffer[i]); + pOutGreen[i] = static_cast(greenBuffer[i]); + pOutBlue[i] = static_cast(blueBuffer[i]); + } + } + + //if (((rowIndex + 1) % 32) == 0 && this->pProgress != nullptr) + // this->pProgress->Progress2(static_cast(rowIndex + 1)); + + return nrPixels; +} diff --git a/DeepSkyStacker/BitMapFiller.h b/DeepSkyStackerKernel/BitMapFiller.h similarity index 96% rename from DeepSkyStacker/BitMapFiller.h rename to DeepSkyStackerKernel/BitMapFiller.h index 07961bb7..e66fc3a6 100644 --- a/DeepSkyStacker/BitMapFiller.h +++ b/DeepSkyStackerKernel/BitMapFiller.h @@ -1,59 +1,59 @@ -#pragma once -#include "cfa.h" - -namespace DSS { class ProgressBase; } -class CMemoryBitmap; - -class BitmapFillerInterface -{ -public: - static std::unique_ptr makeBitmapFiller(CMemoryBitmap* pBitmap, DSS::ProgressBase* pProgress, const double redWb, const double greenWb, const double blueWb); - virtual ~BitmapFillerInterface() {} - - virtual bool isThreadSafe() const; - virtual std::unique_ptr clone() = 0; - - virtual void SetCFAType(CFATYPE CFAType) = 0; - virtual void setGrey(bool grey) = 0; - virtual void setWidth(int width) = 0; - virtual void setHeight(int height) = 0; - virtual void setMaxColors(int maxcolors) = 0; - virtual size_t Write(const void* source, const size_t size, const size_t count, const size_t rowIndex) = 0; -}; - -class BitmapFillerBase : public BitmapFillerInterface -{ -protected: - DSS::ProgressBase* pProgress; - CMemoryBitmap* pBitmap; - const float redScale; - const float greenScale; - const float blueScale; - CFATYPE cfaType; - bool isGray; - int width; - int height; - int bytesPerChannel; - std::vector redBuffer; - std::vector greenBuffer; - std::vector blueBuffer; - std::vector cfaFactors; -public: - BitmapFillerBase(CMemoryBitmap* pB, DSS::ProgressBase* pP, const double redWb, const double greenWb, const double blueWb); - virtual ~BitmapFillerBase() {} - - virtual void SetCFAType(CFATYPE cfaType) override; - virtual void setGrey(bool grey) override; - virtual void setWidth(int width) override; - virtual void setHeight(int height) override; - virtual void setMaxColors(int maxcolors) override; -protected: - void setCfaFactors(); - bool isRgbBayerPattern() const; - - inline static float adjustColor(const float color, const float adjustFactor) - { - constexpr float Maximum = static_cast(std::numeric_limits::max() - 1); - return std::min(color * adjustFactor, Maximum); - }; -}; +#pragma once +#include "cfa.h" + +namespace DSS { class ProgressBase; } +class CMemoryBitmap; + +class BitmapFillerInterface +{ +public: + static std::unique_ptr makeBitmapFiller(CMemoryBitmap* pBitmap, DSS::ProgressBase* pProgress, const double redWb, const double greenWb, const double blueWb); + virtual ~BitmapFillerInterface() {} + + virtual bool isThreadSafe() const; + virtual std::unique_ptr clone() = 0; + + virtual void SetCFAType(CFATYPE CFAType) = 0; + virtual void setGrey(bool grey) = 0; + virtual void setWidth(int width) = 0; + virtual void setHeight(int height) = 0; + virtual void setMaxColors(int maxcolors) = 0; + virtual size_t Write(const void* source, const size_t size, const size_t count, const size_t rowIndex) = 0; +}; + +class BitmapFillerBase : public BitmapFillerInterface +{ +protected: + DSS::ProgressBase* pProgress; + CMemoryBitmap* pBitmap; + const float redScale; + const float greenScale; + const float blueScale; + CFATYPE cfaType; + bool isGray; + int width; + int height; + int bytesPerChannel; + std::vector redBuffer; + std::vector greenBuffer; + std::vector blueBuffer; + std::vector cfaFactors; +public: + BitmapFillerBase(CMemoryBitmap* pB, DSS::ProgressBase* pP, const double redWb, const double greenWb, const double blueWb); + virtual ~BitmapFillerBase() {} + + virtual void SetCFAType(CFATYPE cfaType) override; + virtual void setGrey(bool grey) override; + virtual void setWidth(int width) override; + virtual void setHeight(int height) override; + virtual void setMaxColors(int maxcolors) override; +protected: + void setCfaFactors(); + bool isRgbBayerPattern() const; + + inline static float adjustColor(const float color, const float adjustFactor) + { + constexpr float Maximum = static_cast(std::numeric_limits::max() - 1); + return std::min(color * adjustFactor, Maximum); + }; +}; diff --git a/DeepSkyStacker/BitmapBase.cpp b/DeepSkyStackerKernel/BitmapBase.cpp similarity index 96% rename from DeepSkyStacker/BitmapBase.cpp rename to DeepSkyStackerKernel/BitmapBase.cpp index f025d625..2708115a 100644 --- a/DeepSkyStacker/BitmapBase.cpp +++ b/DeepSkyStackerKernel/BitmapBase.cpp @@ -1,66 +1,66 @@ -#include "stdafx.h" -#include "Ztrace.h" -#include "BitmapBase.h" -#include "BitmapCharacteristics.h" -#include "GrayBitmap.h" -#include "ColorBitmap.h" - -std::shared_ptr CreateBitmap(const CBitmapCharacteristics& bc) -{ - ZFUNCTRACE_RUNTIME(); - - if (bc.m_lNrChannels == 1) - { - if (bc.m_lBitsPerPixel == 8) - { - ZTRACE_RUNTIME("Creating 8 Gray bit memory bitmap"); - return std::make_shared(); - } - else if (bc.m_lBitsPerPixel == 16) - { - ZTRACE_RUNTIME("Creating 16 Gray bit memory bitmap"); - return std::make_shared(); - } - else if (bc.m_lBitsPerPixel == 32) - { - if (bc.m_bFloat) - { - ZTRACE_RUNTIME("Creating 32 float Gray bit memory bitmap"); - return std::make_shared(); - } - else - { - ZTRACE_RUNTIME("Creating 32 Gray bit memory bitmap"); - return std::make_shared(); - } - } - } - else if (bc.m_lNrChannels == 3) - { - if (bc.m_lBitsPerPixel == 8) - { - ZTRACE_RUNTIME("Creating 8 RGB bit memory bitmap"); - return std::make_shared(); - } - else if (bc.m_lBitsPerPixel == 16) - { - ZTRACE_RUNTIME("Creating 16 RGB bit memory bitmap"); - return std::make_shared(); - } - else if (bc.m_lBitsPerPixel == 32) - { - if (bc.m_bFloat) - { - ZTRACE_RUNTIME("Creating 32 float RGB bit memory bitmap"); - return std::make_shared(); - } - else - { - ZTRACE_RUNTIME("Creating 32 RGB bit memory bitmap"); - return std::make_shared(); - } - } - } - - return std::shared_ptr{}; -} +#include "stdafx.h" +#include "Ztrace.h" +#include "BitmapBase.h" +#include "BitmapCharacteristics.h" +#include "GrayBitmap.h" +#include "ColorBitmap.h" + +std::shared_ptr CreateBitmap(const CBitmapCharacteristics& bc) +{ + ZFUNCTRACE_RUNTIME(); + + if (bc.m_lNrChannels == 1) + { + if (bc.m_lBitsPerPixel == 8) + { + ZTRACE_RUNTIME("Creating 8 Gray bit memory bitmap"); + return std::make_shared(); + } + else if (bc.m_lBitsPerPixel == 16) + { + ZTRACE_RUNTIME("Creating 16 Gray bit memory bitmap"); + return std::make_shared(); + } + else if (bc.m_lBitsPerPixel == 32) + { + if (bc.m_bFloat) + { + ZTRACE_RUNTIME("Creating 32 float Gray bit memory bitmap"); + return std::make_shared(); + } + else + { + ZTRACE_RUNTIME("Creating 32 Gray bit memory bitmap"); + return std::make_shared(); + } + } + } + else if (bc.m_lNrChannels == 3) + { + if (bc.m_lBitsPerPixel == 8) + { + ZTRACE_RUNTIME("Creating 8 RGB bit memory bitmap"); + return std::make_shared(); + } + else if (bc.m_lBitsPerPixel == 16) + { + ZTRACE_RUNTIME("Creating 16 RGB bit memory bitmap"); + return std::make_shared(); + } + else if (bc.m_lBitsPerPixel == 32) + { + if (bc.m_bFloat) + { + ZTRACE_RUNTIME("Creating 32 float RGB bit memory bitmap"); + return std::make_shared(); + } + else + { + ZTRACE_RUNTIME("Creating 32 RGB bit memory bitmap"); + return std::make_shared(); + } + } + } + + return std::shared_ptr{}; +} diff --git a/DeepSkyStacker/BitmapBase.h b/DeepSkyStackerKernel/BitmapBase.h similarity index 96% rename from DeepSkyStacker/BitmapBase.h rename to DeepSkyStackerKernel/BitmapBase.h index 86ffd5aa..611635d2 100644 --- a/DeepSkyStacker/BitmapBase.h +++ b/DeepSkyStackerKernel/BitmapBase.h @@ -1,6 +1,6 @@ -#pragma once - -class CMemoryBitmap; -class CBitmapCharacteristics; - -std::shared_ptr CreateBitmap(const CBitmapCharacteristics& bc); +#pragma once + +class CMemoryBitmap; +class CBitmapCharacteristics; + +std::shared_ptr CreateBitmap(const CBitmapCharacteristics& bc); diff --git a/DeepSkyStacker/BitmapCharacteristics.h b/DeepSkyStackerKernel/BitmapCharacteristics.h similarity index 100% rename from DeepSkyStacker/BitmapCharacteristics.h rename to DeepSkyStackerKernel/BitmapCharacteristics.h diff --git a/DeepSkyStacker/BitmapConstants.h b/DeepSkyStackerKernel/BitmapConstants.h similarity index 100% rename from DeepSkyStacker/BitmapConstants.h rename to DeepSkyStackerKernel/BitmapConstants.h diff --git a/DeepSkyStacker/BitmapExt.cpp b/DeepSkyStackerKernel/BitmapExt.cpp similarity index 100% rename from DeepSkyStacker/BitmapExt.cpp rename to DeepSkyStackerKernel/BitmapExt.cpp diff --git a/DeepSkyStacker/BitmapExt.h b/DeepSkyStackerKernel/BitmapExt.h similarity index 100% rename from DeepSkyStacker/BitmapExt.h rename to DeepSkyStackerKernel/BitmapExt.h diff --git a/DeepSkyStacker/BitmapExtraInfo.h b/DeepSkyStackerKernel/BitmapExtraInfo.h similarity index 100% rename from DeepSkyStacker/BitmapExtraInfo.h rename to DeepSkyStackerKernel/BitmapExtraInfo.h diff --git a/DeepSkyStacker/BitmapInfo.cpp b/DeepSkyStackerKernel/BitmapInfo.cpp similarity index 100% rename from DeepSkyStacker/BitmapInfo.cpp rename to DeepSkyStackerKernel/BitmapInfo.cpp diff --git a/DeepSkyStacker/BitmapInfo.h b/DeepSkyStackerKernel/BitmapInfo.h similarity index 100% rename from DeepSkyStacker/BitmapInfo.h rename to DeepSkyStackerKernel/BitmapInfo.h diff --git a/DeepSkyStacker/BitmapIterator.h b/DeepSkyStackerKernel/BitmapIterator.h similarity index 96% rename from DeepSkyStacker/BitmapIterator.h rename to DeepSkyStackerKernel/BitmapIterator.h index 814dd46c..40ac90eb 100644 --- a/DeepSkyStacker/BitmapIterator.h +++ b/DeepSkyStackerKernel/BitmapIterator.h @@ -1,88 +1,88 @@ -#pragma once -class CMemoryBitmap; -template -class BitmapIt -{ - static_assert(std::is_same_v || std::is_same_v || std::is_same_v> || std::is_same_v>); - static_assert(std::is_same_v || std::is_same_v); -protected: - T bitmap; - Ptr pRed; - Ptr pGreen; - Ptr pBlue; -private: - size_t elementSize; -public: - BitmapIt(T p) : - bitmap{ p } - { - Reset(0, 0); - } - - BitmapIt(const BitmapIt&) = delete; - - void Reset(const size_t x, const size_t y) - { - bitmap->InitIterator(pRed, pGreen, pBlue, elementSize, x, y); - } - - void GetPixel(double& r, double& g, double& b) const - { - const auto [vr, vg, vb] = bitmap->ConvertValue3(this->pRed, this->pGreen, this->pBlue); - r = vr; - g = vg; - b = vb; - } - double GetPixel() const - { - return bitmap->ConvertValue1(this->pRed, this->pGreen, this->pBlue); - } - - void SetPixel(const double r, const double g, const double b) - { - static_assert(std::is_same_v); - bitmap->ReceiveValue(this->pRed, this->pGreen, this->pBlue, r, g, b); - } - void SetPixel(const double gray) - { - static_assert(std::is_same_v); - bitmap->ReceiveValue(this->pRed, this->pGreen, this->pBlue, gray); - } - - void operator++() - { - if constexpr (std::is_same_v) - { - pRed = static_cast(pRed) + this->elementSize; - pGreen = static_cast(pGreen) + this->elementSize; - pBlue = static_cast(pBlue) + this->elementSize; - } - else - { - pRed = static_cast(pRed) + this->elementSize; - pGreen = static_cast(pGreen) + this->elementSize; - pBlue = static_cast(pBlue) + this->elementSize; - } - } - void operator+=(const size_t n) - { - if constexpr (std::is_same_v) - { - pRed = static_cast(pRed) + n * this->elementSize; - pGreen = static_cast(pGreen) + n * this->elementSize; - pBlue = static_cast(pBlue) + n * this->elementSize; - } - else - { - pRed = static_cast(pRed) + n * this->elementSize; - pGreen = static_cast(pGreen) + n * this->elementSize; - pBlue = static_cast(pBlue) + n * this->elementSize; - } - } -}; - -template -using BitmapIteratorConst = BitmapIt; - -template -using BitmapIterator = BitmapIt; +#pragma once +class CMemoryBitmap; +template +class BitmapIt +{ + static_assert(std::is_same_v || std::is_same_v || std::is_same_v> || std::is_same_v>); + static_assert(std::is_same_v || std::is_same_v); +protected: + T bitmap; + Ptr pRed; + Ptr pGreen; + Ptr pBlue; +private: + size_t elementSize; +public: + BitmapIt(T p) : + bitmap{ p } + { + Reset(0, 0); + } + + BitmapIt(const BitmapIt&) = delete; + + void Reset(const size_t x, const size_t y) + { + bitmap->InitIterator(pRed, pGreen, pBlue, elementSize, x, y); + } + + void GetPixel(double& r, double& g, double& b) const + { + const auto [vr, vg, vb] = bitmap->ConvertValue3(this->pRed, this->pGreen, this->pBlue); + r = vr; + g = vg; + b = vb; + } + double GetPixel() const + { + return bitmap->ConvertValue1(this->pRed, this->pGreen, this->pBlue); + } + + void SetPixel(const double r, const double g, const double b) + { + static_assert(std::is_same_v); + bitmap->ReceiveValue(this->pRed, this->pGreen, this->pBlue, r, g, b); + } + void SetPixel(const double gray) + { + static_assert(std::is_same_v); + bitmap->ReceiveValue(this->pRed, this->pGreen, this->pBlue, gray); + } + + void operator++() + { + if constexpr (std::is_same_v) + { + pRed = static_cast(pRed) + this->elementSize; + pGreen = static_cast(pGreen) + this->elementSize; + pBlue = static_cast(pBlue) + this->elementSize; + } + else + { + pRed = static_cast(pRed) + this->elementSize; + pGreen = static_cast(pGreen) + this->elementSize; + pBlue = static_cast(pBlue) + this->elementSize; + } + } + void operator+=(const size_t n) + { + if constexpr (std::is_same_v) + { + pRed = static_cast(pRed) + n * this->elementSize; + pGreen = static_cast(pGreen) + n * this->elementSize; + pBlue = static_cast(pBlue) + n * this->elementSize; + } + else + { + pRed = static_cast(pRed) + n * this->elementSize; + pGreen = static_cast(pGreen) + n * this->elementSize; + pBlue = static_cast(pBlue) + n * this->elementSize; + } + } +}; + +template +using BitmapIteratorConst = BitmapIt; + +template +using BitmapIterator = BitmapIt; diff --git a/DeepSkyStacker/BitmapPartFile.h b/DeepSkyStackerKernel/BitmapPartFile.h similarity index 100% rename from DeepSkyStacker/BitmapPartFile.h rename to DeepSkyStackerKernel/BitmapPartFile.h diff --git a/DeepSkyStacker/CFABitmapInfo.h b/DeepSkyStackerKernel/CFABitmapInfo.h similarity index 100% rename from DeepSkyStacker/CFABitmapInfo.h rename to DeepSkyStackerKernel/CFABitmapInfo.h diff --git a/DeepSkyStacker/ChannelAlign.cpp b/DeepSkyStackerKernel/ChannelAlign.cpp similarity index 100% rename from DeepSkyStacker/ChannelAlign.cpp rename to DeepSkyStackerKernel/ChannelAlign.cpp diff --git a/DeepSkyStacker/ChannelAlign.h b/DeepSkyStackerKernel/ChannelAlign.h similarity index 100% rename from DeepSkyStacker/ChannelAlign.h rename to DeepSkyStackerKernel/ChannelAlign.h diff --git a/DeepSkyStacker/ColorBitmap.cpp b/DeepSkyStackerKernel/ColorBitmap.cpp similarity index 100% rename from DeepSkyStacker/ColorBitmap.cpp rename to DeepSkyStackerKernel/ColorBitmap.cpp diff --git a/DeepSkyStacker/ColorBitmap.h b/DeepSkyStackerKernel/ColorBitmap.h similarity index 100% rename from DeepSkyStacker/ColorBitmap.h rename to DeepSkyStackerKernel/ColorBitmap.h diff --git a/DeepSkyStacker/ColorHelpers.cpp b/DeepSkyStackerKernel/ColorHelpers.cpp similarity index 100% rename from DeepSkyStacker/ColorHelpers.cpp rename to DeepSkyStackerKernel/ColorHelpers.cpp diff --git a/DeepSkyStacker/ColorHelpers.h b/DeepSkyStackerKernel/ColorHelpers.h similarity index 100% rename from DeepSkyStacker/ColorHelpers.h rename to DeepSkyStackerKernel/ColorHelpers.h diff --git a/DeepSkyStacker/ColorMultiBitmap.cpp b/DeepSkyStackerKernel/ColorMultiBitmap.cpp similarity index 100% rename from DeepSkyStacker/ColorMultiBitmap.cpp rename to DeepSkyStackerKernel/ColorMultiBitmap.cpp diff --git a/DeepSkyStacker/ColorMultiBitmap.h b/DeepSkyStackerKernel/ColorMultiBitmap.h similarity index 100% rename from DeepSkyStacker/ColorMultiBitmap.h rename to DeepSkyStackerKernel/ColorMultiBitmap.h diff --git a/DeepSkyStacker/ColorRef.h b/DeepSkyStackerKernel/ColorRef.h similarity index 100% rename from DeepSkyStacker/ColorRef.h rename to DeepSkyStackerKernel/ColorRef.h diff --git a/DeepSkyStacker/CosmeticEngine.cpp b/DeepSkyStackerKernel/CosmeticEngine.cpp similarity index 100% rename from DeepSkyStacker/CosmeticEngine.cpp rename to DeepSkyStackerKernel/CosmeticEngine.cpp diff --git a/DeepSkyStacker/CosmeticEngine.h b/DeepSkyStackerKernel/CosmeticEngine.h similarity index 100% rename from DeepSkyStacker/CosmeticEngine.h rename to DeepSkyStackerKernel/CosmeticEngine.h diff --git a/DeepSkyStacker/DSSCommon.h b/DeepSkyStackerKernel/DSSCommon.h similarity index 100% rename from DeepSkyStacker/DSSCommon.h rename to DeepSkyStackerKernel/DSSCommon.h diff --git a/DeepSkyStacker/DSSProgress.cpp b/DeepSkyStackerKernel/DSSProgress.cpp similarity index 100% rename from DeepSkyStacker/DSSProgress.cpp rename to DeepSkyStackerKernel/DSSProgress.cpp diff --git a/DeepSkyStacker/DSSProgress.h b/DeepSkyStackerKernel/DSSProgress.h similarity index 100% rename from DeepSkyStacker/DSSProgress.h rename to DeepSkyStackerKernel/DSSProgress.h diff --git a/DeepSkyStacker/DSSTools.cpp b/DeepSkyStackerKernel/DSSTools.cpp similarity index 100% rename from DeepSkyStacker/DSSTools.cpp rename to DeepSkyStackerKernel/DSSTools.cpp diff --git a/DeepSkyStacker/DSSTools.h b/DeepSkyStackerKernel/DSSTools.h similarity index 100% rename from DeepSkyStacker/DSSTools.h rename to DeepSkyStackerKernel/DSSTools.h diff --git a/DeepSkyStacker/DSSVersion.h b/DeepSkyStackerKernel/DSSVersion.h similarity index 100% rename from DeepSkyStacker/DSSVersion.h rename to DeepSkyStackerKernel/DSSVersion.h diff --git a/DeepSkyStacker/DarkFrame.cpp b/DeepSkyStackerKernel/DarkFrame.cpp similarity index 100% rename from DeepSkyStacker/DarkFrame.cpp rename to DeepSkyStackerKernel/DarkFrame.cpp diff --git a/DeepSkyStacker/DarkFrame.h b/DeepSkyStackerKernel/DarkFrame.h similarity index 100% rename from DeepSkyStacker/DarkFrame.h rename to DeepSkyStackerKernel/DarkFrame.h diff --git a/DeepSkyStacker/DeBloom.cpp b/DeepSkyStackerKernel/DeBloom.cpp similarity index 100% rename from DeepSkyStacker/DeBloom.cpp rename to DeepSkyStackerKernel/DeBloom.cpp diff --git a/DeepSkyStacker/DeBloom.h b/DeepSkyStackerKernel/DeBloom.h similarity index 100% rename from DeepSkyStacker/DeBloom.h rename to DeepSkyStackerKernel/DeBloom.h diff --git a/DeepSkyStackerKernel/DeepSkyStackerKernel.vcxproj b/DeepSkyStackerKernel/DeepSkyStackerKernel.vcxproj new file mode 100644 index 00000000..1d9127f4 --- /dev/null +++ b/DeepSkyStackerKernel/DeepSkyStackerKernel.vcxproj @@ -0,0 +1,350 @@ + + + + + Debug + x64 + + + Release + x64 + + + + {CB7B75F1-08F4-4C8D-A7EF-2AA33E9A67F1} + DeepSkyStackerKernel + Win32Proj + QtVS_v304 + $(MSBuildProjectDirectory)\QtMsBuild + 10.0 + + + + StaticLibrary + v143 + MultiByte + true + Dynamic + + + StaticLibrary + v143 + MultiByte + Dynamic + + + + + + + 6.4.0_msvc2019_64 + core + debug + + + 6.4.0_msvc2019_64 + core + release + + + + + + + + + + + + + + + + + + + <_ProjectFileVersion>15.0.27413.0 + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + ..\libs\Win64\$(Configuration)Libs;$(VC_LibraryPath_x64);$(WindowsSdk_71A_LibraryPath_x64);$(LibraryPath) + $(QTDIR)\include;$(QTDIR)\include\QtCore;$(VC_IncludePath);$(WindowsSDK_IncludePath); + + + + $(SolutionDir)$(Platform)\$(Configuration)\ + $(Platform)\$(Configuration)\ + ..\libs\Win64\$(Configuration)Libs;$(VC_LibraryPath_x64);$(WindowsSdk_71A_LibraryPath_x64);$(LibraryPath) + $(QTDIR)\include;$(QTDIR)\include\QtCore;$(VC_IncludePath);$(WindowsSDK_IncludePath); + + + + + X64 + + + _UNICODE;UNICODE;NOMINMAX;LIBRAW_NODLL;WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;%(PreprocessorDefinitions) + false + EnableFastChecks + MultiThreadedDebugDLL + .\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;C:\Program Files (x86)\Visual Leak Detector\include;..\Libraw;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) + $(OutDir)$(TargetName).pdb + stdcpp20 + true + -Zc:__cplusplus /openmp:experimental /wd4828 /wd4652 /wd4702 /wd4244 %(AdditionalOptions) + Level3 + Level4 + false + + + DSS_COMMANDLINE;%(PreprocessorDefinitions) + + + true + Console + false + + MachineX64 + gdi32.lib;gdiplus.lib;zlibstatic.lib;%(AdditionalDependencies) + MSVCRT + Default + + + .\GeneratedFiles\ + + + .\GeneratedFiles\%(RelativeDir) + + + .\GeneratedFiles\%(RelativeDir) + + + lrelease + %(RelativeDir) + + + + + X64 + + + .\GeneratedFiles\$(ConfigurationName);.\GeneratedFiles;..\ZClass;..\tools;..\LibTIFF;..\CFitsIO;..\Zlib;..\libraw;$(Boost_1_80_0);%(AdditionalIncludeDirectories) + _UNICODE;UNICODE;NOMINMAX;LIBRAW_NODLL;WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_DEPRECATE;USE_LIBTIFF_STATIC;%(PreprocessorDefinitions) + MultiThreadedDLL + $(OutDir)$(TargetName).pdb + false + stdcpp20 + true + -Zc:__cplusplus /openmp:experimental /wd4828 /wd4652 /wd4702 /wd4244 %(AdditionalOptions) + Level3 + Level4 + false + false + + + DSS_COMMANDLINE;%(PreprocessorDefinitions) + + + gdi32.lib;gdiplus.lib;zlibstatic.lib;%(AdditionalDependencies) + + + Console + true + false + + MachineX64 + Default + + + .\GeneratedFiles\ + + + .\GeneratedFiles\%(RelativeDir) + + + .\GeneratedFiles\%(RelativeDir) + + + lrelease + %(RelativeDir) + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Use + Use + + + Use + Use + + + + + + + + + + + + + + + + + + + + + + + Create + Create + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + {a2f500c6-6903-4c2d-906d-ce86b99ba50d} + + + {a71d2131-f425-381f-8a9a-29d60132a046} + + + {d5fb2402-a821-4474-91e7-07f0dd5866f0} + + + {1747f255-9cb9-472b-8fee-9e0bbfbad49d} + + + + + + + + + \ No newline at end of file diff --git a/DeepSkyStackerKernel/DeepSkyStackerKernel.vcxproj.filters b/DeepSkyStackerKernel/DeepSkyStackerKernel.vcxproj.filters new file mode 100644 index 00000000..8eab1b1e --- /dev/null +++ b/DeepSkyStackerKernel/DeepSkyStackerKernel.vcxproj.filters @@ -0,0 +1,451 @@ + + + + + {4FC737F1-C7A5-4376-A066-2A32D7EEA2FF} + cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx + + + {93995380-89BD-4b04-88EB-625FBEAAEBFB} + h;hpp;hxx;hm;inl;inc;xsd + + + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + Header Files + + + + + Header Files + + + Header Files + + + \ No newline at end of file diff --git a/DeepSkyStacker/DeepStack.cpp b/DeepSkyStackerKernel/DeepStack.cpp similarity index 100% rename from DeepSkyStacker/DeepStack.cpp rename to DeepSkyStackerKernel/DeepStack.cpp diff --git a/DeepSkyStacker/DeepStack.h b/DeepSkyStackerKernel/DeepStack.h similarity index 97% rename from DeepSkyStacker/DeepStack.h rename to DeepSkyStackerKernel/DeepStack.h index 9bd78af1..89979a2c 100644 --- a/DeepSkyStacker/DeepStack.h +++ b/DeepSkyStackerKernel/DeepStack.h @@ -68,7 +68,7 @@ public : m_StackedBitmap.SetBezierAdjust(BezierAdjust); m_StackedBitmap.SetHistogramAdjust(HistogramAdjust); - return m_StackedBitmap.GetBitmap(m_Bitmap, &rcProcess); + return m_StackedBitmap.GetHBitmap(m_Bitmap, &rcProcess); }; CStackedBitmap& GetStackedBitmap() diff --git a/DeepSkyStacker/DynamicStats.h b/DeepSkyStackerKernel/DynamicStats.h similarity index 100% rename from DeepSkyStacker/DynamicStats.h rename to DeepSkyStackerKernel/DynamicStats.h diff --git a/DeepSkyStacker/EntropyInfo.cpp b/DeepSkyStackerKernel/EntropyInfo.cpp similarity index 100% rename from DeepSkyStacker/EntropyInfo.cpp rename to DeepSkyStackerKernel/EntropyInfo.cpp diff --git a/DeepSkyStacker/EntropyInfo.h b/DeepSkyStackerKernel/EntropyInfo.h similarity index 100% rename from DeepSkyStacker/EntropyInfo.h rename to DeepSkyStackerKernel/EntropyInfo.h diff --git a/DeepSkyStacker/ExtraInfo.h b/DeepSkyStackerKernel/ExtraInfo.h similarity index 100% rename from DeepSkyStacker/ExtraInfo.h rename to DeepSkyStackerKernel/ExtraInfo.h diff --git a/DeepSkyStacker/FITSUtil.cpp b/DeepSkyStackerKernel/FITSUtil.cpp similarity index 99% rename from DeepSkyStacker/FITSUtil.cpp rename to DeepSkyStackerKernel/FITSUtil.cpp index bcb4bd72..59d3166a 100644 --- a/DeepSkyStacker/FITSUtil.cpp +++ b/DeepSkyStackerKernel/FITSUtil.cpp @@ -1,5 +1,5 @@ #include -#include "resource.h" +//#include "resource.h" #include "Workspace.h" #include "FITSUtil.h" #include "..\CFitsio\fitsio.h" @@ -912,12 +912,14 @@ bool CFITSReadInMemoryBitmap::OnOpen() static bool eightBitWarningIssued = false; if (!eightBitWarningIssued) { - CString errorMessage; - errorMessage.Format(IDS_8BIT_FITS_NODEBAYER); + //CString errorMessage; + //errorMessage.Format(IDS_8BIT_FITS_NODEBAYER); + const QString errorMessage(QCoreApplication::translate("FitsUtils", "DeepSkyStacker will not de-Bayer 8 bit images", "IDS_8BIT_FITS_NODEBAYER")); + #if defined(_CONSOLE) - std::wcerr << errorMessage; + std::wcerr << errorMessage.toStdWString().c_str(); #else - AfxMessageBox(errorMessage, MB_OK | MB_ICONWARNING); + AfxMessageBox(errorMessage.toStdWString().c_str(), MB_OK | MB_ICONWARNING); #endif // Remember we already said we won't do that! eightBitWarningIssued = true; diff --git a/DeepSkyStacker/FITSUtil.h b/DeepSkyStackerKernel/FITSUtil.h similarity index 100% rename from DeepSkyStacker/FITSUtil.h rename to DeepSkyStackerKernel/FITSUtil.h diff --git a/DeepSkyStacker/File.cpp b/DeepSkyStackerKernel/File.cpp similarity index 100% rename from DeepSkyStacker/File.cpp rename to DeepSkyStackerKernel/File.cpp diff --git a/DeepSkyStacker/File.h b/DeepSkyStackerKernel/File.h similarity index 100% rename from DeepSkyStacker/File.h rename to DeepSkyStackerKernel/File.h diff --git a/DeepSkyStacker/Filters.cpp b/DeepSkyStackerKernel/Filters.cpp similarity index 100% rename from DeepSkyStacker/Filters.cpp rename to DeepSkyStackerKernel/Filters.cpp diff --git a/DeepSkyStacker/Filters.h b/DeepSkyStackerKernel/Filters.h similarity index 100% rename from DeepSkyStacker/Filters.h rename to DeepSkyStackerKernel/Filters.h diff --git a/DeepSkyStacker/FlatFrame.cpp b/DeepSkyStackerKernel/FlatFrame.cpp similarity index 96% rename from DeepSkyStacker/FlatFrame.cpp rename to DeepSkyStackerKernel/FlatFrame.cpp index e5dd8fb6..cd6e4c6b 100644 --- a/DeepSkyStacker/FlatFrame.cpp +++ b/DeepSkyStackerKernel/FlatFrame.cpp @@ -5,7 +5,7 @@ #include "MemoryBitmap.h" #include "CFABitmapInfo.h" #include "Ztrace.h" -#include "resource.h" +//#include "resource.h" using namespace DSS; @@ -31,8 +31,9 @@ bool CFlatFrame::ApplyFlat(std::shared_ptr pTarget, ProgressBase { ZFUNCTRACE_RUNTIME(); bool bResult = false; - CStringA strText; - strText.LoadString(IDS_APPLYINGFLAT); + //CStringA strText; + //strText.LoadString(IDS_APPLYINGFLAT); + const QString strText(QCoreApplication::translate("FlatFrame", "Applying Flat Frame", "IDS_APPLYINGFLAT")); // Check and remove super pixel settings CFATRANSFORMATION CFATransform = CFAT_NONE; @@ -105,7 +106,7 @@ bool CFlatFrame::ApplyFlat(std::shared_ptr pTarget, ProgressBase { ZTRACE_RUNTIME("Target.RealWidth = %d, Source.RealWidth = %d", pTarget->RealWidth(), m_pFlatFrame->RealWidth()); ZTRACE_RUNTIME("Target.RealHeight = %d, Source.RealHeight = %d", pTarget->RealHeight(), m_pFlatFrame->RealHeight()); - ZTRACE_RUNTIME("Did not perform %s", (LPCSTR)strText); + ZTRACE_RUNTIME("Did not perform %s", strText.toStdWString().c_str()); } } diff --git a/DeepSkyStacker/FlatFrame.h b/DeepSkyStackerKernel/FlatFrame.h similarity index 100% rename from DeepSkyStacker/FlatFrame.h rename to DeepSkyStackerKernel/FlatFrame.h diff --git a/DeepSkyStacker/FlatPart.h b/DeepSkyStackerKernel/FlatPart.h similarity index 100% rename from DeepSkyStacker/FlatPart.h rename to DeepSkyStackerKernel/FlatPart.h diff --git a/DeepSkyStacker/FrameInfo.cpp b/DeepSkyStackerKernel/FrameInfo.cpp similarity index 100% rename from DeepSkyStacker/FrameInfo.cpp rename to DeepSkyStackerKernel/FrameInfo.cpp diff --git a/DeepSkyStacker/FrameInfo.h b/DeepSkyStackerKernel/FrameInfo.h similarity index 100% rename from DeepSkyStacker/FrameInfo.h rename to DeepSkyStackerKernel/FrameInfo.h diff --git a/DeepSkyStacker/FrameInfoSupport.h b/DeepSkyStackerKernel/FrameInfoSupport.h similarity index 53% rename from DeepSkyStacker/FrameInfoSupport.h rename to DeepSkyStackerKernel/FrameInfoSupport.h index 1b1e399a..8d15b6f3 100644 --- a/DeepSkyStacker/FrameInfoSupport.h +++ b/DeepSkyStackerKernel/FrameInfoSupport.h @@ -1,101 +1,117 @@ -#pragma once -#include "commonresource.h" -inline void ExposureToString(double fExposure, CString& strText) -{ - // DELETE THIS ONE DAY - if (fExposure) - { - int lExposure; - - if (fExposure >= 1) - { - lExposure = fExposure; - std::uint32_t dwRemainingTime = lExposure; - std::uint32_t dwHour, - dwMin, - dwSec; - - dwHour = dwRemainingTime / 3600; - dwRemainingTime -= dwHour * 3600; - dwMin = dwRemainingTime / 60; - dwRemainingTime -= dwMin * 60; - dwSec = dwRemainingTime; - - if (dwHour) - strText.Format(IDS_EXPOSURETIME3, dwHour, dwMin, dwSec); - else if (dwMin) - strText.Format(IDS_EXPOSURETIME2, dwMin, dwSec); - else - strText.Format(IDS_EXPOSURETIME1, dwSec); - } - else - { - lExposure = 1.0 / fExposure + 0.5; - strText.Format(IDS_EXPOSUREFORMAT_INF, lExposure); - }; - } - else - strText = "-"; -} - -inline QString exposureToString(double fExposure) -{ - QString strText; - - if (fExposure) - { - qint64 exposure; - - if (fExposure >= 1) - { - exposure = fExposure; - qint64 remainingTime = exposure; - qint64 hours, mins, secs; - - hours = remainingTime / 3600; - remainingTime -= hours * 3600; - mins = remainingTime / 60; - remainingTime -= mins * 60; - secs = remainingTime; - - if (hours) - strText = QCoreApplication::translate("StackRecap", "%1 hr %2 mn %3 s", "IDS_EXPOSURETIME3") - .arg(hours) - .arg(mins) - .arg(secs); - else if (mins) - strText = QCoreApplication::translate("StackRecap", "%1 mn %2 s", "IDS_EXPOSURETIME2") - .arg(mins) - .arg(secs); - else - strText = QCoreApplication::translate("StackRecap", "%1 s", "IDS_EXPOSURETIME1") - .arg(secs); - } - else - { - exposure = 1.0 / fExposure + 0.5; - strText = QCoreApplication::translate("StackRecap", "1/%1 s", "IDS_EXPOSUREFORMAT_INF") - .arg(exposure); - }; - } - else - strText = "-"; - - return strText; -} - -inline void ISOToString(int lISOSpeed, CString& strText) -{ - if (lISOSpeed) - strText.Format(_T("%ld"), lISOSpeed); - else - strText = "-"; -} - -inline void GainToString(int lGain, CString& strText) -{ - if (lGain >= 0) - strText.Format(_T("%ld"), lGain); - else - strText = "-"; -} +#pragma once +//#include "commonresource.h" +// inline void ExposureToString(double fExposure, CString& strText) +// { +// // DELETE THIS ONE DAY +// if (fExposure) +// { +// int lExposure; +// +// if (fExposure >= 1) +// { +// lExposure = fExposure; +// std::uint32_t dwRemainingTime = lExposure; +// std::uint32_t dwHour, +// dwMin, +// dwSec; +// +// dwHour = dwRemainingTime / 3600; +// dwRemainingTime -= dwHour * 3600; +// dwMin = dwRemainingTime / 60; +// dwRemainingTime -= dwMin * 60; +// dwSec = dwRemainingTime; +// +// if (dwHour) +// strText.Format(IDS_EXPOSURETIME3, dwHour, dwMin, dwSec); +// else if (dwMin) +// strText.Format(IDS_EXPOSURETIME2, dwMin, dwSec); +// else +// strText.Format(IDS_EXPOSURETIME1, dwSec); +// } +// else +// { +// lExposure = 1.0 / fExposure + 0.5; +// strText.Format(IDS_EXPOSUREFORMAT_INF, lExposure); +// }; +// } +// else +// strText = "-"; +// } + +inline QString exposureToString(double fExposure) +{ + QString strText; + + if (fExposure) + { + qint64 exposure; + + if (fExposure >= 1) + { + exposure = fExposure; + qint64 remainingTime = exposure; + qint64 hours, mins, secs; + + hours = remainingTime / 3600; + remainingTime -= hours * 3600; + mins = remainingTime / 60; + remainingTime -= mins * 60; + secs = remainingTime; + + if (hours) + strText = QCoreApplication::translate("StackRecap", "%1 hr %2 mn %3 s", "IDS_EXPOSURETIME3") + .arg(hours) + .arg(mins) + .arg(secs); + else if (mins) + strText = QCoreApplication::translate("StackRecap", "%1 mn %2 s", "IDS_EXPOSURETIME2") + .arg(mins) + .arg(secs); + else + strText = QCoreApplication::translate("StackRecap", "%1 s", "IDS_EXPOSURETIME1") + .arg(secs); + } + else + { + exposure = 1.0 / fExposure + 0.5; + strText = QCoreApplication::translate("StackRecap", "1/%1 s", "IDS_EXPOSUREFORMAT_INF") + .arg(exposure); + }; + } + else + strText = "-"; + + return strText; +} + +inline void ISOToString(int lISOSpeed, CString& strText) +{ + if (lISOSpeed) + strText.Format(_T("%ld"), lISOSpeed); + else + strText = "-"; +} + +inline void GainToString(int lGain, CString& strText) +{ + if (lGain >= 0) + strText.Format(_T("%ld"), lGain); + else + strText = "-"; +} + +inline void ISOToString(int lISOSpeed, QString& strText) +{ + if (lISOSpeed) + strText = QString::number(lISOSpeed); + else + strText = "-"; +} + +inline void GainToString(int lGain, QString& strText) +{ + if (lGain >= 0) + strText = QString::number(lGain); + else + strText = "-"; +} diff --git a/DeepSkyStacker/FrameList.cpp b/DeepSkyStackerKernel/FrameList.cpp similarity index 100% rename from DeepSkyStacker/FrameList.cpp rename to DeepSkyStackerKernel/FrameList.cpp diff --git a/DeepSkyStacker/FrameList.h b/DeepSkyStackerKernel/FrameList.h similarity index 100% rename from DeepSkyStacker/FrameList.h rename to DeepSkyStackerKernel/FrameList.h diff --git a/DeepSkyStacker/GrayBitmap.cpp b/DeepSkyStackerKernel/GrayBitmap.cpp similarity index 100% rename from DeepSkyStacker/GrayBitmap.cpp rename to DeepSkyStackerKernel/GrayBitmap.cpp diff --git a/DeepSkyStacker/GrayBitmap.h b/DeepSkyStackerKernel/GrayBitmap.h similarity index 100% rename from DeepSkyStacker/GrayBitmap.h rename to DeepSkyStackerKernel/GrayBitmap.h diff --git a/DeepSkyStacker/GreyMultiBitmap.cpp b/DeepSkyStackerKernel/GreyMultiBitmap.cpp similarity index 100% rename from DeepSkyStacker/GreyMultiBitmap.cpp rename to DeepSkyStackerKernel/GreyMultiBitmap.cpp diff --git a/DeepSkyStacker/GreyMultiBitmap.h b/DeepSkyStackerKernel/GreyMultiBitmap.h similarity index 100% rename from DeepSkyStacker/GreyMultiBitmap.h rename to DeepSkyStackerKernel/GreyMultiBitmap.h diff --git a/DeepSkyStacker/Histogram.h b/DeepSkyStackerKernel/Histogram.h similarity index 95% rename from DeepSkyStacker/Histogram.h rename to DeepSkyStackerKernel/Histogram.h index 2a116c62..d9cd71dd 100644 --- a/DeepSkyStacker/Histogram.h +++ b/DeepSkyStackerKernel/Histogram.h @@ -1,7 +1,7 @@ #pragma once #include "ZExcBase.h" -#include "resource.h" +//#include "resource.h" /* ------------------------------------------------------------------- */ @@ -72,32 +72,33 @@ typedef enum HISTOADJUSTTYPE }HISTOADJUSTTYPE; -inline void HistoAdjustTypeText(HISTOADJUSTTYPE hat, CString & strText) +inline QString HistoAdjustTypeText(HISTOADJUSTTYPE hat) { switch (hat) { case HAT_LINEAR : - strText.LoadString(IDS_HAT_LINEAR); + return QCoreApplication::translate("Histogram", "Linear", "IDS_HAT_LINEAR"); break; case HAT_CUBEROOT : - strText.LoadString(IDS_HAT_CUBEROOT); + return QCoreApplication::translate("Histogram", "Cube Root", "IDS_HAT_CUBEROOT"); break; case HAT_SQUAREROOT : - strText.LoadString(IDS_HAT_SQUAREROOT); + return QCoreApplication::translate("Histogram", "Square Root", "IDS_HAT_SQUAREROOT"); break; case HAT_LOG : - strText.LoadString(IDS_HAT_LOG); + return QCoreApplication::translate("Histogram", "Logarithm", "IDS_HAT_LOG"); break; case HAT_LOGLOG : - strText.LoadString(IDS_HAT_LOGLOG); + return QCoreApplication::translate("Histogram", "Log(Log)", "IDS_HAT_LOGLOG"); break; case HAT_LOGSQUAREROOT : - strText.LoadString(IDS_HAT_LOGSQUAREROOT); + return QCoreApplication::translate("Histogram", "Log(Square Root)", "IDS_HAT_LOGSQUAREROOT"); break; case HAT_ASINH : - strText.LoadString(IDS_HAT_ASINH); + return QCoreApplication::translate("Histogram", "ASinH", "IDS_HAT_ASINH"); break; }; + return ""; }; class CHistogramAdjust diff --git a/DeepSkyStacker/ImageListModel.cpp b/DeepSkyStackerKernel/ImageListModel.cpp similarity index 100% rename from DeepSkyStacker/ImageListModel.cpp rename to DeepSkyStackerKernel/ImageListModel.cpp diff --git a/DeepSkyStacker/ImageListModel.h b/DeepSkyStackerKernel/ImageListModel.h similarity index 100% rename from DeepSkyStacker/ImageListModel.h rename to DeepSkyStackerKernel/ImageListModel.h diff --git a/DeepSkyStacker/LinearInterpolationh.h b/DeepSkyStackerKernel/LinearInterpolationh.h similarity index 100% rename from DeepSkyStacker/LinearInterpolationh.h rename to DeepSkyStackerKernel/LinearInterpolationh.h diff --git a/DeepSkyStacker/MasterFrames.cpp b/DeepSkyStackerKernel/MasterFrames.cpp similarity index 99% rename from DeepSkyStacker/MasterFrames.cpp rename to DeepSkyStackerKernel/MasterFrames.cpp index 36b8bd0d..66c3fca3 100644 --- a/DeepSkyStacker/MasterFrames.cpp +++ b/DeepSkyStackerKernel/MasterFrames.cpp @@ -1,5 +1,5 @@ #include -#include "resource.h" +//#include "resource.h" #include "MasterFrames.h" #include "DSSProgress.h" #include "DeBloom.h" diff --git a/DeepSkyStacker/MasterFrames.h b/DeepSkyStackerKernel/MasterFrames.h similarity index 100% rename from DeepSkyStacker/MasterFrames.h rename to DeepSkyStackerKernel/MasterFrames.h diff --git a/DeepSkyStacker/MatchingStars.cpp b/DeepSkyStackerKernel/MatchingStars.cpp similarity index 100% rename from DeepSkyStacker/MatchingStars.cpp rename to DeepSkyStackerKernel/MatchingStars.cpp diff --git a/DeepSkyStacker/MatchingStars.h b/DeepSkyStackerKernel/MatchingStars.h similarity index 100% rename from DeepSkyStacker/MatchingStars.h rename to DeepSkyStackerKernel/MatchingStars.h diff --git a/DeepSkyStacker/MedianFilterEngine.cpp b/DeepSkyStackerKernel/MedianFilterEngine.cpp similarity index 100% rename from DeepSkyStacker/MedianFilterEngine.cpp rename to DeepSkyStackerKernel/MedianFilterEngine.cpp diff --git a/DeepSkyStacker/MedianFilterEngine.h b/DeepSkyStackerKernel/MedianFilterEngine.h similarity index 100% rename from DeepSkyStacker/MedianFilterEngine.h rename to DeepSkyStackerKernel/MedianFilterEngine.h diff --git a/DeepSkyStacker/MemoryBitmap.cpp b/DeepSkyStackerKernel/MemoryBitmap.cpp similarity index 100% rename from DeepSkyStacker/MemoryBitmap.cpp rename to DeepSkyStackerKernel/MemoryBitmap.cpp diff --git a/DeepSkyStacker/MemoryBitmap.h b/DeepSkyStackerKernel/MemoryBitmap.h similarity index 100% rename from DeepSkyStacker/MemoryBitmap.h rename to DeepSkyStackerKernel/MemoryBitmap.h diff --git a/DeepSkyStacker/MultiBitmap.h b/DeepSkyStackerKernel/MultiBitmap.h similarity index 100% rename from DeepSkyStacker/MultiBitmap.h rename to DeepSkyStackerKernel/MultiBitmap.h diff --git a/DeepSkyStacker/MultiBitmapProcess.cpp b/DeepSkyStackerKernel/MultiBitmapProcess.cpp similarity index 100% rename from DeepSkyStacker/MultiBitmapProcess.cpp rename to DeepSkyStackerKernel/MultiBitmapProcess.cpp diff --git a/DeepSkyStacker/MultiBitmapProcess.h b/DeepSkyStackerKernel/MultiBitmapProcess.h similarity index 100% rename from DeepSkyStacker/MultiBitmapProcess.h rename to DeepSkyStackerKernel/MultiBitmapProcess.h diff --git a/DeepSkyStacker/Multitask.cpp b/DeepSkyStackerKernel/Multitask.cpp similarity index 100% rename from DeepSkyStacker/Multitask.cpp rename to DeepSkyStackerKernel/Multitask.cpp diff --git a/DeepSkyStacker/Multitask.h b/DeepSkyStackerKernel/Multitask.h similarity index 100% rename from DeepSkyStacker/Multitask.h rename to DeepSkyStackerKernel/Multitask.h diff --git a/DeepSkyStacker/PixelTransform.h b/DeepSkyStackerKernel/PixelTransform.h similarity index 100% rename from DeepSkyStacker/PixelTransform.h rename to DeepSkyStackerKernel/PixelTransform.h diff --git a/DeepSkyStacker/RAWUtils.cpp b/DeepSkyStackerKernel/RAWUtils.cpp similarity index 98% rename from DeepSkyStacker/RAWUtils.cpp rename to DeepSkyStackerKernel/RAWUtils.cpp index 47009f6f..7690417e 100644 --- a/DeepSkyStacker/RAWUtils.cpp +++ b/DeepSkyStackerKernel/RAWUtils.cpp @@ -4,7 +4,7 @@ #include "Ztrace.h" #include "Workspace.h" #include "DSSProgress.h" -#include "resource.h" +//#include "resource.h" #include "MemoryBitmap.h" #include "Multitask.h" #include "MedianFilterEngine.h" @@ -460,12 +460,14 @@ namespace { // Only use in this .cpp file // if (false == result) { - CString errorMessage; - errorMessage.Format(IDS_CAMERA_NOT_SUPPORTED, (LPCTSTR)strModel); + //CString errorMessage; + const QString errorMessage(QCoreApplication::translate("RawUtils", "Sorry, LibRaw doesn't support your %1 camera", "IDS_CAMERA_NOT_SUPPORTED").arg(camera)); + +// errorMessage.Format(IDS_CAMERA_NOT_SUPPORTED, (LPCTSTR)strModel); #if defined(_CONSOLE) - std::wcerr << errorMessage; + std::wcerr << errorMessage.toStdWString().c_str(); #else - AfxMessageBox(errorMessage, MB_OK | MB_ICONWARNING); + AfxMessageBox(errorMessage.toStdWString().c_str(), MB_OK | MB_ICONWARNING); #endif } diff --git a/DeepSkyStacker/RAWUtils.h b/DeepSkyStackerKernel/RAWUtils.h similarity index 100% rename from DeepSkyStacker/RAWUtils.h rename to DeepSkyStackerKernel/RAWUtils.h diff --git a/DeepSkyStacker/RationalInterpolation.h b/DeepSkyStackerKernel/RationalInterpolation.h similarity index 100% rename from DeepSkyStacker/RationalInterpolation.h rename to DeepSkyStackerKernel/RationalInterpolation.h diff --git a/DeepSkyStacker/RegisterEngine.cpp b/DeepSkyStackerKernel/RegisterEngine.cpp similarity index 100% rename from DeepSkyStacker/RegisterEngine.cpp rename to DeepSkyStackerKernel/RegisterEngine.cpp diff --git a/DeepSkyStacker/RegisterEngine.h b/DeepSkyStackerKernel/RegisterEngine.h similarity index 100% rename from DeepSkyStacker/RegisterEngine.h rename to DeepSkyStackerKernel/RegisterEngine.h diff --git a/DeepSkyStacker/RunningStackingEngine.cpp b/DeepSkyStackerKernel/RunningStackingEngine.cpp similarity index 100% rename from DeepSkyStacker/RunningStackingEngine.cpp rename to DeepSkyStackerKernel/RunningStackingEngine.cpp diff --git a/DeepSkyStacker/RunningStackingEngine.h b/DeepSkyStackerKernel/RunningStackingEngine.h similarity index 100% rename from DeepSkyStacker/RunningStackingEngine.h rename to DeepSkyStackerKernel/RunningStackingEngine.h diff --git a/DeepSkyStacker/Settings.cpp b/DeepSkyStackerKernel/Settings.cpp similarity index 99% rename from DeepSkyStacker/Settings.cpp rename to DeepSkyStackerKernel/Settings.cpp index 77458a28..8d45d582 100644 --- a/DeepSkyStacker/Settings.cpp +++ b/DeepSkyStackerKernel/Settings.cpp @@ -1,6 +1,6 @@ #include -#include "resource.h" +//#include "resource.h" #include "StackingTasks.h" #include "DSSTools.h" diff --git a/DeepSkyStacker/Settings.h b/DeepSkyStackerKernel/Settings.h similarity index 100% rename from DeepSkyStacker/Settings.h rename to DeepSkyStackerKernel/Settings.h diff --git a/DeepSkyStacker/SkyBackground.h b/DeepSkyStackerKernel/SkyBackground.h similarity index 100% rename from DeepSkyStacker/SkyBackground.h rename to DeepSkyStackerKernel/SkyBackground.h diff --git a/DeepSkyStacker/StackedBitmap.cpp b/DeepSkyStackerKernel/StackedBitmap.cpp similarity index 97% rename from DeepSkyStacker/StackedBitmap.cpp rename to DeepSkyStackerKernel/StackedBitmap.cpp index 1d2cd2d5..5910aa20 100644 --- a/DeepSkyStacker/StackedBitmap.cpp +++ b/DeepSkyStackerKernel/StackedBitmap.cpp @@ -461,56 +461,52 @@ void CStackedBitmap::SaveDSImage(LPCTSTR szStackedFile, LPRECT pRect, ProgressBa }; /* ------------------------------------------------------------------- */ - -#if !defined(_CONSOLE) -class CPixel -{ -public : - double m_fRed, - m_fGreen, - m_fBlue; -private : - void CopyFrom(const CPixel & px) - { - m_fRed = px.m_fRed; - m_fGreen = px.m_fGreen; - m_fBlue = px.m_fBlue; - }; -public : - CPixel(double fRed = 0, double fGreen = 0, double fBlue = 0) - { - m_fRed = fRed; - m_fGreen = fGreen; - m_fBlue = fBlue; - }; - CPixel(const CPixel & px) - { - CopyFrom(px); - }; - - CPixel& operator=(CPixel const& other) = delete; - - ~CPixel() {}; - - bool operator < (const CPixel & px) const - { - if (m_fRedpx.m_fRed) - return false; - else if (m_fGreenpx.m_fGreen) - return false; - else - return m_fBlue PIXELSET; -typedef PIXELSET::iterator PIXELITERATOR; - -HBITMAP CStackedBitmap::GetBitmap(C32BitsBitmap & Bitmap, RECT * pRect) +// +// class CPixel +// { +// public : +// double m_fRed, +// m_fGreen, +// m_fBlue; +// private : +// void CopyFrom(const CPixel & px) +// { +// m_fRed = px.m_fRed; +// m_fGreen = px.m_fGreen; +// m_fBlue = px.m_fBlue; +// }; +// public : +// CPixel(double fRed = 0, double fGreen = 0, double fBlue = 0) +// { +// m_fRed = fRed; +// m_fGreen = fGreen; +// m_fBlue = fBlue; +// }; +// CPixel(const CPixel & px) +// { +// CopyFrom(px); +// }; +// +// CPixel& operator=(CPixel const& other) = delete; +// +// ~CPixel() {}; +// +// bool operator < (const CPixel & px) const +// { +// if (m_fRedpx.m_fRed) +// return false; +// else if (m_fGreenpx.m_fGreen) +// return false; +// else +// return m_fBlue CStackedBitmap::GetBitmap(ProgressBase* const pProgress) diff --git a/DeepSkyStacker/StackedBitmap.h b/DeepSkyStackerKernel/StackedBitmap.h similarity index 98% rename from DeepSkyStacker/StackedBitmap.h rename to DeepSkyStackerKernel/StackedBitmap.h index 6f45d341..526de833 100644 --- a/DeepSkyStacker/StackedBitmap.h +++ b/DeepSkyStackerKernel/StackedBitmap.h @@ -3,9 +3,9 @@ #include "Histogram.h" #include "ColorRef.h" -#ifndef _CONSOLE +//#ifndef _CONSOLE #include "BitmapExt.h" -#endif//_CONSOLE +//#endif//_CONSOLE namespace DSS { class ProgressBase; } @@ -306,9 +306,7 @@ public : void SaveTIFF32Bitmap(LPCTSTR szBitmapFile, LPRECT pRect = nullptr, DSS::ProgressBase* pProgress = nullptr, bool bApplySettings = true, bool bFloat = false, TIFFCOMPRESSION TiffComp = TC_NONE); void SaveFITS16Bitmap(LPCTSTR szBitmapFile, LPRECT pRect = nullptr, DSS::ProgressBase* pProgress = nullptr, bool bApplySettings = true); void SaveFITS32Bitmap(LPCTSTR szBitmapFile, LPRECT pRect = nullptr, DSS::ProgressBase* pProgress = nullptr, bool bApplySettings = true, bool bFloat = false); -#if !defined(_CONSOLE) - HBITMAP GetBitmap(C32BitsBitmap & Bitmap, RECT * pRect = nullptr); -#endif + HBITMAP GetHBitmap(C32BitsBitmap & Bitmap, RECT * pRect = nullptr); std::shared_ptr GetBitmap(DSS::ProgressBase* const pProgress = nullptr); void Clear() diff --git a/DeepSkyStacker/StackingEngine.cpp b/DeepSkyStackerKernel/StackingEngine.cpp similarity index 81% rename from DeepSkyStacker/StackingEngine.cpp rename to DeepSkyStackerKernel/StackingEngine.cpp index 2275c088..725b06cb 100644 --- a/DeepSkyStacker/StackingEngine.cpp +++ b/DeepSkyStackerKernel/StackingEngine.cpp @@ -43,7 +43,7 @@ void CLightFramesStackingInfo::SetReferenceFrame(LPCTSTR szReferenceFrame) _tsplitpath(szReferenceFrame, szDrive, szDir, szName, nullptr); m_strReferenceFrame = szReferenceFrame; - m_strStackingFileInfo.Format(_T("%s%s%s.stackinfo.txt"), szDrive, szDir, szName); + m_strStackingFileInfo = QString("%1%2%3.stackinfo.txt").arg(szDrive).arg(szDir).arg(szName); unsigned int dwAlignmentTransformation = 2; Workspace workspace; @@ -52,85 +52,94 @@ void CLightFramesStackingInfo::SetReferenceFrame(LPCTSTR szReferenceFrame) // Init from the file m_vLightFrameStackingInfo.clear(); - FILE* hFile = _tfopen((LPCTSTR)m_strStackingFileInfo, _T("rt")); - if (hFile) - { - bool bEnd = false; - CHAR szLine[10000]; + + QFile file(m_strStackingFileInfo); + if (!file.open(QIODevice::Text | QIODevice::ReadOnly)) + return; - if (fgets(szLine, sizeof(szLine), hFile)) - { - int lSavedAlignmentTransformation; - CString strValue; + // Process line by line. + QTextStream stream(&file); + QString currentLine; - strValue = CA2TEX(szLine); - strValue.TrimRight(_T("\n")); + bool bEnd = false; + static const int nMaxRead = 10000; - lSavedAlignmentTransformation = _ttol((LPCTSTR)strValue); + currentLine = file.readLine(nMaxRead).simplified(); + if (currentLine.isEmpty()) + { + bEnd = true; + } + else + { + bool bOK = false; + int lSavedAlignmentTransformation = currentLine.toInt(&bOK); - if (lSavedAlignmentTransformation != static_cast(dwAlignmentTransformation)) - bEnd = true; - } - else + if (lSavedAlignmentTransformation != static_cast(dwAlignmentTransformation)) bEnd = true; + } - if (!bEnd) + if (!bEnd) + { + currentLine = file.readLine(nMaxRead).simplified(); + if (currentLine.isEmpty()) { - if (fgets(szLine, sizeof(szLine), hFile)) - { - CString strInfoFileName; - CString strStoredInfoFileName; - - strStoredInfoFileName = CA2TEX(szLine); - strStoredInfoFileName.TrimRight(_T("\n")); - - GetInfoFileName((LPCTSTR)m_strReferenceFrame, strInfoFileName); - if (strInfoFileName.CompareNoCase(strStoredInfoFileName)) - bEnd = true; - } - else - bEnd = true; - }; - - while (!bEnd) + bEnd = true; + } + else { - CLightFrameStackingInfo lfsi; - bool bResult = true; + CString strInfoFileName; + CString strStoredInfoFileName(currentLine.toStdWString().c_str()); - if (fgets(szLine, sizeof(szLine), hFile)) - { - lfsi.m_strInfoFileName = szLine; - lfsi.m_strInfoFileName.TrimRight(_T("\n")); - } - else + GetInfoFileName((LPCTSTR)m_strReferenceFrame, strInfoFileName); + if (strInfoFileName.CompareNoCase(strStoredInfoFileName)) bEnd = true; + } + }; - if (fgets(szLine, sizeof(szLine), hFile)) - { - lfsi.m_strFileName = szLine; - lfsi.m_strFileName.TrimRight(_T("\n")); - } - else - bEnd = true; + while (!bEnd) + { + CLightFrameStackingInfo lfsi; + bool bResult = true; - if (fgets(szLine, sizeof(szLine), hFile)) - { - CString strParameters; + currentLine = file.readLine(nMaxRead).simplified(); + if (currentLine.isEmpty()) + { + bEnd = true; + } + else + { + lfsi.m_strInfoFileName = currentLine.toStdWString().c_str(); + } - strParameters = szLine; - strParameters.TrimRight(_T("\n")); - bResult = lfsi.m_BilinearParameters.FromText((LPCTSTR)strParameters); - } - else - bEnd = true; + currentLine = file.readLine(nMaxRead).simplified(); + if (currentLine.isEmpty()) + { + bEnd = true; + } + else + { + lfsi.m_strFileName = currentLine.toStdWString().c_str(); + } - if (!bEnd && bResult) - m_vLightFrameStackingInfo.push_back(lfsi); - }; + currentLine = file.readLine(nMaxRead).simplified(); + if (currentLine.isEmpty()) + { + bEnd = true; + } + else + { + CString strParameters; - fclose(hFile); - std::sort(m_vLightFrameStackingInfo.begin(), m_vLightFrameStackingInfo.end()); + strParameters = currentLine.toStdWString().c_str(); + bResult = lfsi.m_BilinearParameters.FromText((LPCTSTR)strParameters); + } + + if (!bEnd && bResult) + m_vLightFrameStackingInfo.push_back(lfsi); }; + + file.close(); + std::sort(m_vLightFrameStackingInfo.begin(), m_vLightFrameStackingInfo.end()); }; /* ------------------------------------------------------------------- */ @@ -225,35 +234,34 @@ void CLightFramesStackingInfo::Save() { ZFUNCTRACE_RUNTIME(); - if (m_strReferenceFrame.GetLength() && m_strStackingFileInfo.GetLength()) + if (m_strReferenceFrame.GetLength() && m_strStackingFileInfo.length()) { - FILE* hFile = _tfopen((LPCTSTR)m_strStackingFileInfo, _T("wt")); - if (hFile) - { - // Save the alignment transformation used - unsigned int dwAlignmentTransformation = 2; - Workspace workspace; + QFile file(m_strStackingFileInfo); + if (!file.open(QIODevice::Text | QIODevice::WriteOnly | QIODevice::Truncate)) + return; + QTextStream stream(&file); - dwAlignmentTransformation = workspace.value("Stacking/AlignmentTransformation", (uint)2).toUInt(); - fprintf(hFile,"%ld\n", dwAlignmentTransformation); - - CString strInfoFileName; - GetInfoFileName((LPCTSTR)m_strReferenceFrame, strInfoFileName); - fprintf(hFile, "%s\n", (LPCSTR)CT2CA(strInfoFileName, CP_UTF8)); + // Save the alignment transformation used + unsigned int dwAlignmentTransformation = 2; + Workspace workspace; - for (const auto& stackingInfo : m_vLightFrameStackingInfo) - { - fprintf(hFile, "%s\n", (LPCSTR)CT2CA(stackingInfo.m_strInfoFileName, CP_UTF8)); - fprintf(hFile, "%s\n", (LPCSTR)CT2CA(stackingInfo.m_strFileName, CP_UTF8)); + dwAlignmentTransformation = workspace.value("Stacking/AlignmentTransformation", (uint)2).toUInt(); + stream << dwAlignmentTransformation << Qt::endl; - CString strParameters; + CString strInfoFileName; + GetInfoFileName((LPCTSTR)m_strReferenceFrame, strInfoFileName); + stream << strInfoFileName.GetString() << Qt::endl; - stackingInfo.m_BilinearParameters.ToText(strParameters); - fprintf(hFile, "%s\n", (LPCSTR)CT2CA(strParameters, CP_UTF8)); - }; + for (const auto& stackingInfo : m_vLightFrameStackingInfo) + { + stream << stackingInfo.m_strInfoFileName.GetString() << Qt::endl; + stream << stackingInfo.m_strFileName.GetString() << Qt::endl; - fclose(hFile); + CString strParameters; + stackingInfo.m_BilinearParameters.ToText(strParameters); + stream << strParameters.GetString() << Qt::endl; }; + file.close(); }; }; @@ -762,7 +770,7 @@ bool computeOffsets(CStackingEngine* const pStackingEngine, ProgressBase* const std::atomic_bool stop{ false }; std::atomic nLoopCount{ 1 }; - const QString strText(QCoreApplication::translate("StackingEngine", "Computing offsets", "IDS_COMPUTINGOFFSETS")); + const QString strText(QCoreApplication::translate("Kernel", "Computing offsets", "IDS_COMPUTINGOFFSETS")); if (pProg != nullptr) pProg->Progress1(strText, 0); @@ -819,7 +827,7 @@ void CStackingEngine::ComputeOffsets() else m_lNrStackable = std::min(static_cast(m_vBitmaps.size()), 1); m_lNrCometStackable = 0; - const QString strText(QCoreApplication::translate("StackingEngine", "Computing offsets", "IDS_COMPUTINGOFFSETS")); + const QString strText(QCoreApplication::translate("Kernel", "Computing offsets", "IDS_COMPUTINGOFFSETS")); const int lLast = static_cast(m_vBitmaps.size() * m_fKeptPercentage / 100.0); if (m_pProgress) @@ -1125,7 +1133,7 @@ void CStackingEngine::ComputeBitmap() QString strMethod; FormatFromMethod(strMethod, m_pLightTask->m_Method, m_pLightTask->m_fKappa, m_pLightTask->m_lNrIterations); - const QString strText(QCoreApplication::translate("StackingEngine", "Computing Final Picture (%1)", "IDS_COMPUTINGMEDIANLIGHT").arg(strMethod)); + const QString strText(QCoreApplication::translate("Kernel", "Computing Final Picture (%1)", "IDS_COMPUTINGMEDIANLIGHT").arg(strMethod)); m_pProgress->Start1(strText, 1, true); m_pProgress->Progress1(strText, 0); @@ -1226,7 +1234,7 @@ bool CStackingEngine::AdjustBayerDrizzleCoverage() std::unique_ptr pCover = std::make_unique(); pCover->Init(m_rcResult.width(), m_rcResult.height()); - strText = QCoreApplication::translate("StackingEngine", "Stacking - Adjust Bayer - Compute adjustment", "IDS_STACKING_COMPUTINGADJUSTMENT"); + strText = QCoreApplication::translate("Kernel", "Stacking - Adjust Bayer - Compute adjustment", "IDS_STACKING_COMPUTINGADJUSTMENT"); if (m_pProgress) m_pProgress->Start1(strText, static_cast(m_vPixelTransforms.size()), false); @@ -1236,7 +1244,7 @@ bool CStackingEngine::AdjustBayerDrizzleCoverage() ++lNrBitmaps; if (m_pProgress != nullptr) { - strText = QCoreApplication::translate("StackingEngine", "Compute adjustment %1 of %2", "IDS_COMPUTINGADJUSTMENT").arg(lNrBitmaps + 1).arg(m_vPixelTransforms.size()); + strText = QCoreApplication::translate("Kernel", "Compute adjustment %1 of %2", "IDS_COMPUTINGADJUSTMENT").arg(lNrBitmaps + 1).arg(m_vPixelTransforms.size()); m_pProgress->Progress1(strText, lNrBitmaps + 1); m_pProgress->Start2(QString(" "), m_rcResult.width() * m_rcResult.height()); } @@ -1291,9 +1299,9 @@ bool CStackingEngine::AdjustBayerDrizzleCoverage() lProgress = 0; if (m_pProgress != nullptr) { - strText = QCoreApplication::translate("StackingEngine", "Stacking - Adjust Bayer - Apply adjustment", "IDS_STACKING_APPLYINGADJUSTMENT"); + strText = QCoreApplication::translate("Kernel", "Stacking - Adjust Bayer - Apply adjustment", "IDS_STACKING_APPLYINGADJUSTMENT"); m_pProgress->Start1(strText, 2, false); - strText = QCoreApplication::translate("StackingEngine", "Compute maximum adjustment", "IDS_STACKING_COMPUTEMAXADJUSTMENT"); + strText = QCoreApplication::translate("Kernel", "Compute maximum adjustment", "IDS_STACKING_COMPUTEMAXADJUSTMENT"); m_pProgress->Start2(strText, m_rcResult.width() * m_rcResult.height()); }; @@ -1328,7 +1336,7 @@ bool CStackingEngine::AdjustBayerDrizzleCoverage() lProgress = 0; if (m_pProgress != nullptr) { - strText = QCoreApplication::translate("StackingEngine", "Applying adjustment", "IDS_STACKING_APPLYADJUSTMENT"); + strText = QCoreApplication::translate("Kernel", "Applying adjustment", "IDS_STACKING_APPLYADJUSTMENT"); m_pProgress->Start2(strText, m_rcResult.width() * m_rcResult.height()); } @@ -1399,7 +1407,7 @@ bool CStackingEngine::SaveCalibratedAndRegisteredLightFrame(CMemoryBitmap* pBitm if (m_pProgress) { - const QString strText(QCoreApplication::translate("StackingEngine", "Saving Registered and Calibrated image in %1", "IDS_SAVINGINTERMEDIATE").arg(QString::fromWCharArray(strOutputFile.GetString()))); + const QString strText(QCoreApplication::translate("Kernel", "Saving Registered and Calibrated image in %1", "IDS_SAVINGINTERMEDIATE").arg(QString::fromWCharArray(strOutputFile.GetString()))); m_pProgress->Start2(strText, 0); }; if (m_IntermediateFileFormat == IFF_TIFF) @@ -1446,7 +1454,7 @@ bool CStackingEngine::SaveCalibratedLightFrame(std::shared_ptr pB if (m_pProgress) { - const QString strText(QCoreApplication::translate("StackingEngine", "Saving Calibrated image in %1", "IDS_SAVINGCALIBRATED").arg(QString::fromWCharArray(strOutputFile.GetString()))); + const QString strText(QCoreApplication::translate("Kernel", "Saving Calibrated image in %1", "IDS_SAVINGCALIBRATED").arg(QString::fromWCharArray(strOutputFile.GetString()))); m_pProgress->Start2(strText, 0); }; @@ -1558,7 +1566,7 @@ bool CStackingEngine::SaveCometImage(CMemoryBitmap* pBitmap) const if (m_pProgress) { - const QString strText(QCoreApplication::translate("StackingEngine", "Saving Calibrated image in %1", "IDS_SAVINGCALIBRATED").arg(QString::fromWCharArray(strOutputFile.GetString()))); + const QString strText(QCoreApplication::translate("Kernel", "Saving Calibrated image in %1", "IDS_SAVINGCALIBRATED").arg(QString::fromWCharArray(strOutputFile.GetString()))); m_pProgress->Start2(strText, 0); }; if (m_IntermediateFileFormat == IFF_TIFF) @@ -1604,7 +1612,7 @@ bool CStackingEngine::SaveCometlessImage(CMemoryBitmap* pBitmap) const if (m_pProgress) { - const QString strText(QCoreApplication::translate("StackingEngine", "Saving Calibrated image in %1", "IDS_SAVINGCALIBRATED").arg(QString::fromWCharArray(strOutputFile.GetString()))); + const QString strText(QCoreApplication::translate("Kernel", "Saving Calibrated image in %1", "IDS_SAVINGCALIBRATED").arg(QString::fromWCharArray(strOutputFile.GetString()))); m_pProgress->Start2(strText, 0); } @@ -1833,7 +1841,7 @@ std::pair CStackingEngine::StackLightFrame(std::shared_ptrStart2(strText, 0); }; AHDDemosaicing(pGrayBitmap, pBitmap, m_pProgress); @@ -1876,7 +1884,7 @@ std::pair CStackingEngine::StackLightFrame(std::shared_ptrStart2(strText, 0); } StackTask.m_EntropyWindow.Init(pBitmap, 10, m_pProgress); @@ -1888,7 +1896,7 @@ std::pair CStackingEngine::StackLightFrame(std::shared_ptrStart2(strText, 0); } m_BackgroundCalibration.ComputeBackgroundCalibration(pBitmap.get(), bFirst, m_pProgress); @@ -2101,8 +2109,8 @@ bool CStackingEngine::StackAll(CAllStackingTasks& tasks, std::shared_ptr ulFreeSpace)) { - SpaceToString(ulFreeSpace, strFreeSpace); - SpaceToString(ulNeededSpace, strNeededSpace); + SpaceToQString(ulFreeSpace, strFreeSpace); + SpaceToQString(ulNeededSpace, strNeededSpace); - const QString strText(QCoreApplication::translate("StackingEngine", "The process needs temporarily %1 of free space on the %2 drive.\nOnly %3 are available on this drive.", "IDS_RECAP_WARNINGDISKSPACE").arg(QString::fromWCharArray(strNeededSpace)).arg(QString::fromWCharArray(strDrive)).arg(QString::fromWCharArray(strFreeSpace)) + - QCoreApplication::translate("StackingEngine", "\nDo you really want to continue?", "IDS_WANTTOCONTINUE")); + const QString strText(QCoreApplication::translate("Kernel", "The process needs temporarily %1 of free space on the %2 drive.\nOnly %3 are available on this drive.", "IDS_RECAP_WARNINGDISKSPACE").arg(strNeededSpace).arg(QString::fromWCharArray(strDrive)).arg(strFreeSpace) + + QCoreApplication::translate("Kernel", "\nDo you really want to continue?", "IDS_WANTTOCONTINUE")); bContinue = m_pProgress->Warning(strText); } } break; @@ -2285,9 +2293,9 @@ bool CStackingEngine::StackAll(CAllStackingTasks& tasks, std::shared_ptr(strDescription)).arg(static_cast(lightframeInfo.filePath.c_str())); + strText = QCoreApplication::translate("Kernel", "Stacking %1 bit/ch %2 light frame\n%3", "IDS_STACKRGBLIGHT").arg(lightframeInfo.m_lBitPerChannels).arg(static_cast(strDescription)).arg(static_cast(lightframeInfo.filePath.c_str())); else - strText = QCoreApplication::translate("StackingEngine", "Stacking %1 bits gray %2 light frame\n%3", "IDS_STACKGRAYLIGHT").arg(lightframeInfo.m_lBitPerChannels).arg(static_cast(strDescription)).arg(static_cast(lightframeInfo.filePath.c_str())); + strText = QCoreApplication::translate("Kernel", "Stacking %1 bits gray %2 light frame\n%3", "IDS_STACKGRAYLIGHT").arg(lightframeInfo.m_lBitPerChannels).arg(static_cast(strDescription)).arg(static_cast(lightframeInfo.filePath.c_str())); ZTRACE_RUNTIME(strText); // First apply transformations @@ -2435,8 +2443,8 @@ bool CStackingEngine::StackLightFrames(CAllStackingTasks& tasks, ProgressBase* c if (pProgress != nullptr && (m_lNrStackable == 1) && (tasks.GetNrLightFrames() > 1)) { - const QString strText(QCoreApplication::translate("StackingEngine", "Only one frame (out of %1) will be stacked.\n\nYou should check/change the star detection threshold to detect more stars\nand help DeepSkyStacker find a transformation between the reference frame and the others.\n", "IDS_WARNING_ONLYONEFRAME").arg(tasks.GetNrLightFrames()) + - QCoreApplication::translate("StackingEngine", "\nDo you really want to continue?", "IDS_WANTTOCONTINUE")); + const QString strText(QCoreApplication::translate("Kernel", "Only one frame (out of %1) will be stacked.\n\nYou should check/change the star detection threshold to detect more stars\nand help DeepSkyStacker find a transformation between the reference frame and the others.\n", "IDS_WARNING_ONLYONEFRAME").arg(tasks.GetNrLightFrames()) + + QCoreApplication::translate("Kernel", "\nDo you really want to continue?", "IDS_WANTTOCONTINUE")); bContinue = m_pProgress->Warning(strText); } @@ -2446,7 +2454,7 @@ bool CStackingEngine::StackLightFrames(CAllStackingTasks& tasks, ProgressBase* c if (tasks.IsCometAvailable() && tasks.GetCometStackingMode() == CSM_COMETSTAR) m_lNrCurrentStackable = m_lNrCometStackable; - const QString strText(QCoreApplication::translate("StackingEngine", "Stacking", "IDS_STACKING")); + const QString strText(QCoreApplication::translate("Kernel", "Stacking", "IDS_STACKING")); if (pProgress != nullptr) pProgress->Start1(strText, m_lNrCurrentStackable, true); @@ -2648,18 +2656,17 @@ bool CStackingEngine::GetDefaultOutputFileName(CString & strFileName, LPCTSTR sz }; /* ------------------------------------------------------------------- */ -static void GetISOGainStrings(CTaskInfo *pTask, CString const &strISO, CString const &strGain, - CString const **ppstrISOGainText, CString *strISOGainValue) +static void GetISOGainStrings(CTaskInfo *pTask, const QString& strISO, const QString& strGain, QString& strISOGainText, QString& strISOGainValue) { if (pTask->HasISOSpeed()) { - ISOToString(pTask->m_lISOSpeed, *strISOGainValue); - *ppstrISOGainText = &strISO; + ISOToString(pTask->m_lISOSpeed, strISOGainValue); + strISOGainText = strISO; } else { - GainToString(pTask->m_lGain, *strISOGainValue); - *ppstrISOGainText = &strGain; + GainToString(pTask->m_lGain, strISOGainValue); + strISOGainText = strGain; } } @@ -2683,79 +2690,78 @@ void CStackingEngine::WriteDescription(CAllStackingTasks& tasks, LPCTSTR szOutpu strOutputFile += szDir; strOutputFile += szName; strOutputFile += _T(".html"); - FILE * hFile; - hFile = _tfopen(strOutputFile, _T("wt")); - if (hFile) + QFile file(QString::fromWCharArray(strOutputFile.GetString())); + if (!file.open(QIODevice::Text | QIODevice::WriteOnly | QIODevice::Truncate)) + return; + QTextStream stream(&file); + { - CString strText; QString strTempText; _tsplitpath(strOutputFile, nullptr, nullptr, szName, nullptr); - fprintf(hFile, "\n"); + stream << "" << Qt::endl; - fprintf(hFile, "\n"); - fprintf(hFile, ""); - fprintf(hFile, "DeepSkyStacker - %s", (LPCSTR)CT2CA(szName, CP_UTF8)); - fprintf(hFile, "\n"); + stream << "" << Qt::endl; + stream << ""; + stream << "DeepSkyStacker - " << szName << ""; + stream << "" << Qt::endl; - fprintf(hFile, "\n"); - fprintf(hFile, "-> %s

\n", (LPCSTR)CT2CA(szName, CP_UTF8)); + stream << "" << Qt::endl; + stream << "-> " << szName << "

" << Qt::endl; // Stacking Mode - strText.Format(IDS_RECAP_STACKINGMODE); - fprintf(hFile, "%s", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "Stacking mode: ", "IDS_RECAP_STACKINGMODE"); switch (tasks.getStackingMode()) { case SM_NORMAL : - strText.Format(IDS_RECAP_STACKINGMODE_NORMAL); + stream << QCoreApplication::translate("Kernel", "Standard", "IDS_RECAP_STACKINGMODE_NORMAL"); break; case SM_MOSAIC : - strText.Format(IDS_RECAP_STACKINGMODE_MOSAIC); + stream << QCoreApplication::translate("Kernel", "Mosaic", "IDS_RECAP_STACKINGMODE_MOSAIC"); break; case SM_INTERSECTION : - strText.Format(IDS_RECAP_STACKINGMODE_INTERSECTION); + stream << QCoreApplication::translate("Kernel", "Intersection", "IDS_RECAP_STACKINGMODE_INTERSECTION"); break; case SM_CUSTOM : - strText.Format(IDS_RECAP_STACKINGMODE_CUSTOM); + stream << QCoreApplication::translate("Kernel", "Custom Rectangle", "IDS_RECAP_STACKINGMODE_CUSTOM"); break; }; - fprintf(hFile, "%s
", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << "
"; // Alignment method - strText.Format(IDS_RECAP_ALIGNMENT); - fprintf(hFile, "%s", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "Alignment method: ", "IDS_RECAP_ALIGNMENT"); switch (tasks.GetAlignmentMethod()) { case 0 : case 1 : - strText.Format(IDS_ALIGN_AUTO); + stream << QCoreApplication::translate("Kernel", "Automatic", "IDS_ALIGN_AUTO"); break; case 2 : - strText.Format(IDS_ALIGN_BILINEAR); + stream << QCoreApplication::translate("Kernel", "Bilinear", "IDS_ALIGN_BILINEAR"); break; case 3 : - strText.Format(IDS_ALIGN_BISQUARED); + stream << QCoreApplication::translate("Kernel", "Bisquared", "IDS_ALIGN_BISQUARED"); break; case 4 : - strText.Format(IDS_ALIGN_BICUBIC); + stream << QCoreApplication::translate("Kernel", "Bicubic", "IDS_ALIGN_BICUBIC"); break; case 5 : - strText.Format(IDS_ALIGN_NONE); + stream << QCoreApplication::translate("Kernel", "No Alignment", "IDS_ALIGN_NONE"); break; }; - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << "
" << Qt::endl; // Drizzle ? const int dwDrizzle = tasks.GetPixelSizeMultiplier(); if (dwDrizzle > 1) { - strText.Format(IDS_RECAP_DRIZZLE, dwDrizzle); - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "Drizzle x%1 enabled", "IDS_RECAP_DRIZZLE").arg(dwDrizzle); + stream << "
" << Qt::endl; }; // Comet @@ -2764,21 +2770,20 @@ void CStackingEngine::WriteDescription(CAllStackingTasks& tasks, LPCTSTR szOutpu COMETSTACKINGMODE CometStackingMode; CometStackingMode = tasks.GetCometStackingMode(); - strText.Format(IDS_RECAP_COMETSTACKING); - fprintf(hFile, CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "Comet processing: ", "IDS_RECAP_COMETSTACKING"); switch (CometStackingMode) { case CSM_STANDARD : - strText.Format(IDS_RECAP_COMETSTACKING_NONE); + stream << QCoreApplication::translate("Kernel", "Align on stars (no specific processing)", "IDS_RECAP_COMETSTACKING_NONE"); break; case CSM_COMETONLY : - strText.Format(IDS_RECAP_COMETSTACKING_COMET); + stream << QCoreApplication::translate("Kernel", "Align on comet", "IDS_RECAP_COMETSTACKING_COMET"); break; case CSM_COMETSTAR : - strText.Format(IDS_RECAP_COMETSTACKING_BOTH); + stream << QCoreApplication::translate("Kernel", "Align on stars and comet", "IDS_RECAP_COMETSTACKING_BOTH"); break; }; - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8));; + stream << "
" << Qt::endl; }; // Post calibration settings @@ -2787,52 +2792,50 @@ void CStackingEngine::WriteDescription(CAllStackingTasks& tasks, LPCTSTR szOutpu tasks.GetPostCalibrationSettings(pcs); if (pcs.m_bHot) { - strText.Format(IDS_RECAP_COSMETICHOT, pcs.m_lHotFilter, pcs.m_fHotDetection); - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "Cosmetic applied to hot pixels (Filter = %ld px, Detection Threshold = %.1f%%)\n", "IDS_RECAP_COSMETICHOT").arg(pcs.m_lHotFilter).arg(pcs.m_fHotDetection); + stream << "
" << Qt::endl; }; if (pcs.m_bCold) { - strText.Format(IDS_RECAP_COSMETICCOLD, pcs.m_lColdFilter, pcs.m_fColdDetection); - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "Cosmetic applied to cold pixels (Filter = %ld px, Detection Threshold = %.1f%%)\n", "IDS_RECAP_COSMETICCOLD").arg(pcs.m_lColdFilter).arg(pcs.m_fColdDetection); + stream << "
" << Qt::endl; }; if (pcs.m_bHot || pcs.m_bCold) - fprintf(hFile, "

"); + stream << "

"; // Now the list of tasks int i, j; int lTotalExposure = 0; - CString strBackgroundCalibration; - CString strPerChannelBackgroundCalibration; + QString strBackgroundCalibration; + QString strPerChannelBackgroundCalibration; CString strDarkOptimization; CString strDarkFactor; - CString strExposure; - CString strISOGainValue; - CString const *pstrISOGainText; - CString strISOText; - CString strGainText; + QString strExposure; + QString strISOGainValue; + QString strISOGainText; + QString strISOText; + QString strGainText; CString strHotPixels; - CString strYesNo; + QString strYesNo; + QString strYes(QCoreApplication::translate("Kernel", "Yes", "IDS_YES")); + QString strNo(QCoreApplication::translate("Kernel", "No", "IDS_NO")); BACKGROUNDCALIBRATIONMODE CalibrationMode; CalibrationMode = tasks.GetBackgroundCalibrationMode(); - strISOText.LoadString(IDS_ISO); - strGainText.LoadString(IDS_GAIN); - - strYesNo.LoadString((CalibrationMode == BCM_RGB) ? IDS_YES : IDS_NO); - strBackgroundCalibration.Format(IDS_RECAP_BACKGROUNDCALIBRATION, strYesNo.GetString()); - - strYesNo.LoadString((CalibrationMode == BCM_PERCHANNEL) ? IDS_YES : IDS_NO); - strPerChannelBackgroundCalibration.Format(IDS_RECAP_PERCHANNELBACKGROUNDCALIBRATION, strYesNo.GetString()); + strISOText = QCoreApplication::translate("Kernel", "ISO", "IDS_ISO"); + strGainText = QCoreApplication::translate("Kernel", "ISO", "IDS_GAIN"); + strBackgroundCalibration = QCoreApplication::translate("Kernel", "RGB Channels Background Calibration: %1\n", "IDS_RECAP_BACKGROUNDCALIBRATION").arg((CalibrationMode == BCM_RGB) ? strYes : strNo); + strPerChannelBackgroundCalibration = QCoreApplication::translate("Kernel", "Per Channel Background Calibration: %1\n", "IDS_RECAP_PERCHANNELBACKGROUNDCALIBRATION").arg((CalibrationMode == BCM_PERCHANNEL) ? strYes : strNo); for (i = 0;i"); + stream << "
"; int lTaskExposure = 0; for (j = 0;jm_vBitmaps.size();j++) @@ -2840,235 +2843,189 @@ void CStackingEngine::WriteDescription(CAllStackingTasks& tasks, LPCTSTR szOutpu lTotalExposure += lTaskExposure; - ExposureToString(lTaskExposure, strExposure); - GetISOGainStrings(si.m_pLightTask, strISOText, strGainText, &pstrISOGainText, &strISOGainValue); - - strText.Format(IDS_RECAP_STEP, i+1, si.m_pLightTask->m_vBitmaps.size(), pstrISOGainText->GetString(), strISOGainValue.GetString()); - fprintf(hFile, "%s", i, (LPCSTR)CT2CA(strText, CP_UTF8)); - fprintf(hFile, CT2CA(strExposure, CP_UTF8)); - fprintf(hFile, "
"); - fprintf(hFile, "
    "); - fprintf(hFile, CT2CA(strBackgroundCalibration, CP_UTF8)); - fprintf(hFile, "
    "); - fprintf(hFile, CT2CA(strPerChannelBackgroundCalibration, CP_UTF8)); - fprintf(hFile, "
"); + strExposure = exposureToString(lTaskExposure); + GetISOGainStrings(si.m_pLightTask, strISOText, strGainText, strISOGainText, strISOGainValue); + + QString strText(QCoreApplication::translate("Kernel", "Stacking step %1\n ->%2 frames (%3: %4) - total exposure: ", "IDS_RECAP_STEP") + .arg(i + 1) + .arg(si.m_pLightTask->m_vBitmaps.size()) + .arg(strISOGainText) + .arg(strISOGainValue)); + + stream << "" << strText << ""; + stream << strExposure << "
"; + stream << "
    " << strBackgroundCalibration << "
    " << strPerChannelBackgroundCalibration << "
"; + if (si.m_pLightTask->m_vBitmaps.size()>1) { - fprintf(hFile, "
    "); - strText.Format(IDS_RECAP_METHOD); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - FormatFromMethod(strTempText, si.m_pLightTask->m_Method, si.m_pLightTask->m_fKappa, si.m_pLightTask->m_lNrIterations); - strText = strTempText.toStdWString().c_str(); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - fprintf(hFile, "
"); + FormatFromMethod(strTempText, si.m_pLightTask->m_Method, si.m_pLightTask->m_fKappa, si.m_pLightTask->m_lNrIterations); + stream << "
    " << QCoreApplication::translate("Kernel", "Method: ", "IDS_RECAP_METHOD") << strTempText << "
"; if ((si.m_pLightTask->m_Method != MBP_AVERAGE) && (IsRawBayer() || IsFITSRawBayer())) { - fprintf(hFile, "
"); - strText.Format(IDS_RECAP_WARNINGBAYERDRIZZLE); - fprintf(hFile, CT2CA(strText, CP_UTF8)); + stream << "
" << QCoreApplication::translate("Kernel", "Warning: the Bayer Drizzle option selected in the RAW DDP settings may lead to strange results with a method other than average.", "IDS_RECAP_WARNINGBAYERDRIZZLE"); }; }; - fprintf(hFile, "
"); + stream << "
"; if (si.m_pDarkTask || si.m_pOffsetTask || si.m_pFlatTask || si.m_pDarkFlatTask) - fprintf(hFile, "
    "); + stream << "
      "; if (si.m_pOffsetTask) { - ExposureToString(si.m_pOffsetTask->m_fExposure, strExposure); - GetISOGainStrings(si.m_pOffsetTask, strISOText, strGainText, &pstrISOGainText, &strISOGainValue); + strExposure = exposureToString(si.m_pOffsetTask->m_fExposure); + GetISOGainStrings(si.m_pOffsetTask, strISOText, strGainText, strISOGainText, strISOGainValue); - strText.Format(IDS_RECAP_OFFSET, si.m_pOffsetTask->m_vBitmaps.size(), pstrISOGainText->GetString(), strISOGainValue.GetString(), strExposure.GetString()); - fprintf(hFile, CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "\t-> Offset: %1 frames (%2: %3) exposure: %4\n", "IDS_RECAP_OFFSET") + .arg(si.m_pOffsetTask->m_vBitmaps.size()) + .arg(strISOGainText) + .arg(strISOGainValue) + .arg(strExposure); if (si.m_pOffsetTask->m_vBitmaps.size()>1) { - fprintf(hFile, "
        "); - strText.Format(IDS_RECAP_METHOD); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - FormatFromMethod(strTempText, si.m_pOffsetTask->m_Method, si.m_pOffsetTask->m_fKappa, si.m_pOffsetTask->m_lNrIterations); - strText = strTempText.toStdWString().c_str(); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - fprintf(hFile, "
      "); + FormatFromMethod(strTempText, si.m_pOffsetTask->m_Method, si.m_pOffsetTask->m_fKappa, si.m_pOffsetTask->m_lNrIterations); + stream << "
        " << QCoreApplication::translate("Kernel", "Method: ", "IDS_RECAP_METHOD") << strTempText << "
      "; } else - fprintf(hFile, "
      "); + stream << "
      "; if (si.m_pOffsetTask->HasISOSpeed()) { if (si.m_pOffsetTask->m_lISOSpeed != si.m_pLightTask->m_lISOSpeed) - { - strText.Format(IDS_RECAP_ISOWARNING); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "\t\tWarning: ISO speed not matching with light frames\n", "IDS_RECAP_ISOWARNING"); } else { if (si.m_pOffsetTask->m_lGain != si.m_pLightTask->m_lGain) - { - strText.Format(IDS_RECAP_GAINWARNING); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "\t\tWarning: Gain does not match Light frame gain\n", "IDS_RECAP_GAINWARNING"); }; - fprintf(hFile, "
    "); + stream << "
"; } else { - strText.Format(IDS_RECAP_NOOFFSET); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "\t-> No Offset\n", "IDS_RECAP_NOOFFSET"); + } + if (si.m_pDarkTask) { - ExposureToString(si.m_pDarkTask->m_fExposure, strExposure); - GetISOGainStrings(si.m_pDarkTask, strISOText, strGainText, &pstrISOGainText, &strISOGainValue); + strExposure = exposureToString(si.m_pDarkTask->m_fExposure); + GetISOGainStrings(si.m_pDarkTask, strISOText, strGainText, strISOGainText, strISOGainValue); - strText.Format(IDS_RECAP_DARK, si.m_pDarkTask->m_vBitmaps.size(), pstrISOGainText->GetString(), strISOGainValue.GetString(), strExposure.GetString()); - fprintf(hFile, CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "\t-> Dark: %1 frames (%2 : %3) exposure: %4\n", "IDS_RECAP_DARK") + .arg(si.m_pDarkTask->m_vBitmaps.size()) + .arg(strISOGainText) + .arg(strISOGainValue) + .arg(strExposure); if (si.m_pDarkTask->m_vBitmaps.size()>1) { - fprintf(hFile, "
    "); - strText.Format(IDS_RECAP_METHOD); - fprintf(hFile, CT2CA(strText, CP_UTF8)); + stream << "
      " << QCoreApplication::translate("Kernel", "Method: ", "IDS_RECAP_METHOD"); FormatFromMethod(strTempText, si.m_pDarkTask->m_Method, si.m_pDarkTask->m_fKappa, si.m_pDarkTask->m_lNrIterations); - strText = strTempText.toStdWString().c_str(); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - fprintf(hFile, "
    "); - }; + stream << strTempText << "
"; + } - fprintf(hFile, "
    "); - fprintf(hFile, CT2CA(strDarkOptimization, CP_UTF8)); - fprintf(hFile, CT2CA(strHotPixels, CP_UTF8)); + stream << "
      " << strDarkOptimization << strHotPixels; if (strDarkFactor.GetLength()) - { - fprintf(hFile, CT2CA(strDarkFactor, CP_UTF8)); - fprintf(hFile, "
      "); - }; + stream << strDarkFactor << "
      "; if (si.m_pDarkTask->HasISOSpeed()) { if (si.m_pDarkTask->m_lISOSpeed != si.m_pLightTask->m_lISOSpeed) - { - strText.Format(IDS_RECAP_ISOWARNING); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - fprintf(hFile, "
      "); - }; + stream << QCoreApplication::translate("Kernel", "\t\tWarning: ISO speed not matching with light frames\n", "IDS_RECAP_ISOWARNING") << "
      "; } else { if (si.m_pDarkTask->m_lGain != si.m_pLightTask->m_lGain) - { - strText.Format(IDS_RECAP_GAINWARNING); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - fprintf(hFile, "
      "); - }; - }; + stream << QCoreApplication::translate("Kernel", "\t\tWarning: Gain does not match Light frame gain\n", "IDS_RECAP_GAINWARNING") << "
      "; + } if (!AreExposureEquals(si.m_pDarkTask->m_fExposure, si.m_pLightTask->m_fExposure)) - { - strText.Format(IDS_RECAP_EXPOSUREWARNING); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - fprintf(hFile, "
      "); - }; - fprintf(hFile, "
    "); + stream << QCoreApplication::translate("Kernel", "\t\tWarning: exposure time not matching with light frames\n", "IDS_RECAP_EXPOSUREWARNING") << "
    "; + stream << "
"; } else { - strText.Format(IDS_RECAP_NODARK); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "\t-> No Dark\n", "IDS_RECAP_NODARK"); + } + if (si.m_pDarkFlatTask && si.m_pFlatTask) { - ExposureToString(si.m_pDarkFlatTask->m_fExposure, strExposure); - GetISOGainStrings(si.m_pDarkFlatTask, strISOText, strGainText, &pstrISOGainText, &strISOGainValue); + strExposure = exposureToString(si.m_pDarkFlatTask->m_fExposure); + GetISOGainStrings(si.m_pDarkFlatTask, strISOText, strGainText, strISOGainText, strISOGainValue); - strText.Format(IDS_RECAP_DARKFLAT, si.m_pDarkFlatTask->m_vBitmaps.size(), pstrISOGainText->GetString(), strISOGainValue.GetString(), strExposure.GetString()); - fprintf(hFile, CT2CA(strText, CP_UTF8)); + stream << QCoreApplication::translate("Kernel", "\t-> Dark Flat: %1 frames (%2 : %3) exposure: %4\n", "IDS_RECAP_DARKFLAT") + .arg(si.m_pDarkFlatTask->m_vBitmaps.size()) + .arg(strISOGainText) + .arg(strISOGainValue) + .arg(strExposure); if (si.m_pDarkFlatTask->m_vBitmaps.size()>1) { - fprintf(hFile, "
    "); - strText.Format(IDS_RECAP_METHOD); - fprintf(hFile, (LPCSTR)CT2CA(strText+"
    ", CP_UTF8)); + stream << "
      " << QCoreApplication::translate("Kernel", "Method: ", "IDS_RECAP_METHOD") << "
      "; FormatFromMethod(strTempText, si.m_pDarkFlatTask->m_Method, si.m_pDarkFlatTask->m_fKappa, si.m_pDarkFlatTask->m_lNrIterations); - strText = strTempText.toStdWString().c_str(); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - fprintf(hFile, "
    "); + stream << strTempText << "
"; } else - fprintf(hFile, "
"); + stream << "
"; if (si.m_pDarkFlatTask->HasISOSpeed()) { if (si.m_pDarkFlatTask->m_lISOSpeed != si.m_pFlatTask->m_lISOSpeed) - { - strText.Format(IDS_RECAP_ISOWARNINGDARKFLAT); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "\t\tWarning: ISO speed not matching with flat frames\n", "IDS_RECAP_ISOWARNINGDARKFLAT"); } else { if (si.m_pDarkFlatTask->m_lGain != si.m_pFlatTask->m_lGain) - { - strText.Format(IDS_RECAP_GAINWARNINGDARKFLAT); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; - }; + stream << QCoreApplication::translate("Kernel", "\t\tWarning: Dark Flat frame gain does not match Flat frame gain\n", "IDS_RECAP_GAINWARNINGDARKFLAT"); + } if (!AreExposureEquals(si.m_pDarkFlatTask->m_fExposure, si.m_pFlatTask->m_fExposure)) - { - strText.Format(IDS_RECAP_EXPOSUREWARNINGDARKFLAT); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; - fprintf(hFile, ""); - }; + stream << QCoreApplication::translate("Kernel", "\t\tWarning: exposure time not matching with flat frames\n", "IDS_RECAP_EXPOSUREWARNINGDARKFLAT"); + stream << ""; + } + if (si.m_pFlatTask) { - ExposureToString(si.m_pFlatTask->m_fExposure, strExposure); - GetISOGainStrings(si.m_pFlatTask, strISOText, strGainText, &pstrISOGainText, &strISOGainValue); + strExposure = exposureToString(si.m_pFlatTask->m_fExposure); + GetISOGainStrings(si.m_pFlatTask, strISOText, strGainText, strISOGainText, strISOGainValue); + + GetISOGainStrings(si.m_pDarkFlatTask, strISOText, strGainText, strISOGainText, strISOGainValue); + + stream << QCoreApplication::translate("Kernel", "\t-> Flat: %1 frames (%2: %3) exposure: %4\n", "IDS_RECAP_FLAT") + .arg(si.m_pFlatTask->m_vBitmaps.size()) + .arg(strISOGainText) + .arg(strISOGainValue) + .arg(strExposure); - strText.Format(IDS_RECAP_FLAT, si.m_pFlatTask->m_vBitmaps.size(), pstrISOGainText->GetString(), strISOGainValue.GetString(), strExposure.GetString()); - fprintf(hFile, CT2CA(strText, CP_UTF8)); if (si.m_pFlatTask->m_vBitmaps.size()>1) { - fprintf(hFile, "
    "); - strText.Format(IDS_RECAP_METHOD); - fprintf(hFile, CT2CA(strText, CP_UTF8)); + stream << "
      " << QCoreApplication::translate("Kernel", "Method: ", "IDS_RECAP_METHOD") << "
      "; FormatFromMethod(strTempText, si.m_pFlatTask->m_Method, si.m_pFlatTask->m_fKappa, si.m_pFlatTask->m_lNrIterations); - strText = strTempText.toStdWString().c_str(); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - fprintf(hFile, "
    "); - }; + stream << strTempText << "
"; + } if (si.m_pFlatTask->HasISOSpeed()) { if (si.m_pFlatTask->m_lISOSpeed != si.m_pLightTask->m_lISOSpeed) - { - strText.Format(IDS_RECAP_ISOWARNING); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "\t\tWarning: ISO speed not matching with light frames\n", "IDS_RECAP_ISOWARNING"); } else { if (si.m_pFlatTask->m_lGain != si.m_pLightTask->m_lGain) - { - strText.Format(IDS_RECAP_GAINWARNING); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; - }; - fprintf(hFile, ""); + stream << QCoreApplication::translate("Kernel", "\t\tWarning: Gain does not match Light frame gain\n", "IDS_RECAP_GAINWARNING"); + } + stream << ""; } else { - strText.Format(IDS_RECAP_NOFLAT); - fprintf(hFile, CT2CA(strText, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "\t-> No Flat\n", "IDS_RECAP_NOFLAT"); + } if (si.m_pDarkTask || si.m_pOffsetTask || si.m_pFlatTask || si.m_pDarkFlatTask) - fprintf(hFile, ""); - fprintf(hFile, "

"); - }; - }; + stream << ""; + stream << "
"; + } + } if (m_vBitmaps.size()) { @@ -3078,72 +3035,54 @@ void CStackingEngine::WriteDescription(CAllStackingTasks& tasks, LPCTSTR szOutpu if (si.m_pLightTask) { - fprintf(hFile, "

\n"); - fprintf(hFile, "", i); - strText.LoadString(IDS_TYPE_LIGHT); - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); - for (j = 0;jm_vBitmaps.size();j++) - fprintf(hFile, "%s
", reinterpret_cast(si.m_pLightTask->m_vBitmaps[j].filePath.generic_u8string().c_str())); + stream << "

" << Qt::endl; + stream << ""; + + stream << "" << QCoreApplication::translate("Kernel", "Light", "IDS_TYPE_LIGHT") << "
\n"; + for (j = 0; j < si.m_pLightTask->m_vBitmaps.size(); j++) + stream << si.m_pLightTask->m_vBitmaps[j].filePath.generic_u8string().c_str() << "
"; if (si.m_pOffsetTask && si.m_pOffsetTask->m_vBitmaps.size()) { - strText.LoadString(IDS_TYPE_OFFSET); - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << "" << QCoreApplication::translate("Kernel", "Bias/Offset", "IDS_TYPE_OFFSET") << "
\n"; if (si.m_pOffsetTask->m_strOutputFile != si.m_pOffsetTask->m_vBitmaps[0].filePath.c_str()) - { - strText.LoadString(IDS_TYPE_MASTEROFFSET); - fprintf(hFile, "%s -> %s
", (LPCSTR)CT2CA(strText, CP_UTF8), (LPCSTR)CT2CA(si.m_pOffsetTask->m_strOutputFile, CP_UTF8)); - }; - for (j = 0;jm_vBitmaps.size();j++) - fprintf(hFile, "%s
", reinterpret_cast(si.m_pOffsetTask->m_vBitmaps[j].filePath.generic_u8string().c_str())); - }; + stream << QCoreApplication::translate("Kernel", "Master Offset", "IDS_TYPE_MASTEROFFSET") << " -> " << si.m_pOffsetTask->m_strOutputFile << "
"; + for (j = 0; j < si.m_pOffsetTask->m_vBitmaps.size(); j++) + stream << si.m_pOffsetTask->m_vBitmaps[j].filePath.generic_u8string().c_str() << "
"; + } if (si.m_pDarkTask && si.m_pDarkTask->m_vBitmaps.size()) { - strText.LoadString(IDS_TYPE_DARK); - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << "" << QCoreApplication::translate("Kernel", "Dark", "IDS_TYPE_DARK") << "
\n"; if (si.m_pDarkTask->m_strOutputFile != si.m_pDarkTask->m_vBitmaps[0].filePath.c_str()) - { - strText.LoadString(IDS_TYPE_MASTERDARK); - fprintf(hFile, "%s -> %s
", (LPCSTR)CT2CA(strText, CP_UTF8), (LPCSTR)CT2CA(si.m_pDarkTask->m_strOutputFile, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "Master Dark", "IDS_TYPE_MASTERDARK") << " -> " << si.m_pDarkTask->m_strOutputFile << "
"; for (j = 0;jm_vBitmaps.size();j++) - fprintf(hFile, "%s
", reinterpret_cast(si.m_pDarkTask->m_vBitmaps[j].filePath.generic_u8string().c_str())); - }; + stream << si.m_pDarkTask->m_vBitmaps[j].filePath.generic_u8string().c_str() << "
"; + } if (si.m_pDarkFlatTask && si.m_pDarkFlatTask->m_vBitmaps.size()) { - strText.LoadString(IDS_TYPE_DARKFLAT); - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << "" << QCoreApplication::translate("Kernel", "Dark Flat", "IDS_TYPE_DARKFLAT") << "
\n"; if (si.m_pDarkFlatTask->m_strOutputFile != si.m_pDarkFlatTask->m_vBitmaps[0].filePath.c_str()) - { - strText.LoadString(IDS_TYPE_MASTERDARKFLAT); - fprintf(hFile, "%s -> %s
", (LPCSTR)CT2CA(strText, CP_UTF8), - (LPCSTR)CT2CA(si.m_pDarkFlatTask->m_strOutputFile, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "Master Dark Flat", "IDS_TYPE_MASTERDARKFLAT") << " -> " << si.m_pDarkFlatTask->m_strOutputFile << "
"; for (j = 0;jm_vBitmaps.size();j++) - fprintf(hFile, "%s
", reinterpret_cast(si.m_pDarkFlatTask->m_vBitmaps[j].filePath.generic_u8string().c_str())); + stream << si.m_pDarkFlatTask->m_vBitmaps[j].filePath.generic_u8string().c_str() << "
"; }; if (si.m_pFlatTask && si.m_pFlatTask->m_vBitmaps.size()) { - strText.LoadString(IDS_TYPE_FLAT); - fprintf(hFile, "%s
\n", (LPCSTR)CT2CA(strText, CP_UTF8)); + stream << "" << QCoreApplication::translate("Kernel", "Flat", "IDS_TYPE_FLAT") << "
\n"; if (si.m_pFlatTask->m_strOutputFile != si.m_pFlatTask->m_vBitmaps[0].filePath.c_str()) - { - strText.LoadString(IDS_TYPE_MASTERFLAT); - fprintf(hFile, "%s -> %s
", (LPCSTR)CT2CA(strText, CP_UTF8), - (LPCSTR)CT2CA(si.m_pFlatTask->m_strOutputFile, CP_UTF8)); - }; + stream << QCoreApplication::translate("Kernel", "Master Flat", "IDS_TYPE_MASTERFLAT") << " -> " << si.m_pFlatTask->m_strOutputFile << "
"; for (j = 0;jm_vBitmaps.size();j++) - fprintf(hFile, "%s
", reinterpret_cast(si.m_pFlatTask->m_vBitmaps[j].filePath.generic_u8string().c_str())); + stream << si.m_pFlatTask->m_vBitmaps[j].filePath.generic_u8string().c_str() << "
"; }; }; }; }; - fprintf(hFile, "
DeepSkyStacker %s", VERSION_DEEPSKYSTACKER); - fprintf(hFile, "\n\n"); - fclose(hFile); + stream << "
DeepSkyStacker " << VERSION_DEEPSKYSTACKER << ""; + stream << "" << Qt::endl; + stream << "" << Qt::endl; }; }; }; diff --git a/DeepSkyStacker/StackingEngine.h b/DeepSkyStackerKernel/StackingEngine.h similarity index 99% rename from DeepSkyStacker/StackingEngine.h rename to DeepSkyStackerKernel/StackingEngine.h index 0be24b7e..0ea2b0e2 100644 --- a/DeepSkyStacker/StackingEngine.h +++ b/DeepSkyStackerKernel/StackingEngine.h @@ -108,7 +108,7 @@ class CLightFramesStackingInfo { private : CString m_strReferenceFrame; - CString m_strStackingFileInfo; + QString m_strStackingFileInfo; LIGHTFRAMESTACKINGINFOVECTOR m_vLightFrameStackingInfo; private : @@ -125,7 +125,7 @@ public : void Clear() { m_strReferenceFrame.Empty(); - m_strStackingFileInfo.Empty(); + m_strStackingFileInfo.clear(); m_vLightFrameStackingInfo.clear(); }; }; diff --git a/DeepSkyStacker/StackingTasks.cpp b/DeepSkyStackerKernel/StackingTasks.cpp similarity index 99% rename from DeepSkyStacker/StackingTasks.cpp rename to DeepSkyStackerKernel/StackingTasks.cpp index 74f1b987..6637afe7 100644 --- a/DeepSkyStacker/StackingTasks.cpp +++ b/DeepSkyStackerKernel/StackingTasks.cpp @@ -11,7 +11,6 @@ #include "Settings.h" #include "ZExcBase.h" #include "MemoryBitmap.h" -#include "resource.h" using namespace DSS; @@ -37,24 +36,6 @@ bool AreExposureEquals(double fExposure1, double fExposure2) return bResult; }; -void SpaceToString(__int64 ulSpace, CString& strSpace) -{ - double fKb, - fMb, - fGb; - - fKb = ulSpace / 1024.0; - fMb = fKb / 1024.0; - fGb = fMb / 1024.0; - - if (fKb < 900) - strSpace.Format(IDS_RECAP_KILOBYTES, fKb); - else if (fMb < 900) - strSpace.Format(IDS_RECAP_MEGABYTES, fMb); - else - strSpace.Format(IDS_RECAP_GIGABYTES, fGb); -}; - void SpaceToQString(__int64 ulSpace, QString& strSpace) { double fKb(ulSpace / 1024.0); diff --git a/DeepSkyStacker/StackingTasks.h b/DeepSkyStackerKernel/StackingTasks.h similarity index 99% rename from DeepSkyStacker/StackingTasks.h rename to DeepSkyStackerKernel/StackingTasks.h index 1b2acb88..b75bd75b 100644 --- a/DeepSkyStacker/StackingTasks.h +++ b/DeepSkyStackerKernel/StackingTasks.h @@ -430,7 +430,6 @@ public : /* ------------------------------------------------------------------- */ -void SpaceToString(__int64 ulSpace, CString& strSpace); void SpaceToQString(__int64 ulSpace, QString& strSpace); /* ------------------------------------------------------------------- */ diff --git a/DeepSkyStacker/StarMask.cpp b/DeepSkyStackerKernel/StarMask.cpp similarity index 100% rename from DeepSkyStacker/StarMask.cpp rename to DeepSkyStackerKernel/StarMask.cpp diff --git a/DeepSkyStacker/StarMask.h b/DeepSkyStackerKernel/StarMask.h similarity index 100% rename from DeepSkyStacker/StarMask.h rename to DeepSkyStackerKernel/StarMask.h diff --git a/DeepSkyStacker/Stars.h b/DeepSkyStackerKernel/Stars.h similarity index 100% rename from DeepSkyStacker/Stars.h rename to DeepSkyStackerKernel/Stars.h diff --git a/DeepSkyStackerKernel/StdAfx.h b/DeepSkyStackerKernel/StdAfx.h new file mode 100644 index 00000000..c1d566a1 --- /dev/null +++ b/DeepSkyStackerKernel/StdAfx.h @@ -0,0 +1,78 @@ +// stdafx.h : include file for standard system include files, +// or project specific include files that are used frequently, but +// are changed infrequently +// +#pragma once + +// Visual Leak Detector +// +#include + +// Qt Files +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// Standard Libraries +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace bip = boost::interprocess; +namespace fs = std::filesystem; + +using std::min; +using std::max; + +// Windows Files (eventually to go!) +#define VC_EXTRALEAN // Exclude rarely-used stuff from Windows headers +#define _WIN32_WINNT _WIN32_WINNT_WIN7 // Want to support Windows XP and up + +#include +#include // MFC core and standard components +#include // MFC extensions +#include // MFC support for Internet Explorer 4 Common Controls +#include +#include // MFC support for Windows Common Controls +#include +#include +#include +#include // for IRichEditOleCallback +#include +using namespace Gdiplus; diff --git a/DeepSkyStacker/TIFFUtil.cpp b/DeepSkyStackerKernel/TIFFUtil.cpp similarity index 99% rename from DeepSkyStacker/TIFFUtil.cpp rename to DeepSkyStackerKernel/TIFFUtil.cpp index 27753933..16fb613d 100644 --- a/DeepSkyStacker/TIFFUtil.cpp +++ b/DeepSkyStackerKernel/TIFFUtil.cpp @@ -1,6 +1,6 @@ #include "stdafx.h" #include "TIFFUtil.h" -#include "resource.h" +//#include "resource.h" #include "Ztrace.h" #include "BitmapInfo.h" #include "DSSProgress.h" diff --git a/DeepSkyStacker/TIFFUtil.h b/DeepSkyStackerKernel/TIFFUtil.h similarity index 100% rename from DeepSkyStacker/TIFFUtil.h rename to DeepSkyStackerKernel/TIFFUtil.h diff --git a/DeepSkyStacker/TaskInfo.cpp b/DeepSkyStackerKernel/TaskInfo.cpp similarity index 100% rename from DeepSkyStacker/TaskInfo.cpp rename to DeepSkyStackerKernel/TaskInfo.cpp diff --git a/DeepSkyStacker/TaskInfo.h b/DeepSkyStackerKernel/TaskInfo.h similarity index 96% rename from DeepSkyStacker/TaskInfo.h rename to DeepSkyStackerKernel/TaskInfo.h index aa7350c0..e8fcc0c3 100644 --- a/DeepSkyStacker/TaskInfo.h +++ b/DeepSkyStackerKernel/TaskInfo.h @@ -1,40 +1,40 @@ -#pragma once - -#include "DSSCommon.h" -#include "FrameInfo.h" - -class CMultiBitmap; -class CMemoryBitmap; -namespace DSS { class ProgressBase; } -using namespace DSS; -class CTaskInfo -{ -public: - std::uint32_t m_dwTaskID; - std::uint32_t m_groupID; - PICTURETYPE m_TaskType; - int m_lISOSpeed; - int m_lGain; - double m_fExposure; - double m_fAperture; - bool m_bUnmodified; - bool m_bDone; - CString m_strOutputFile; - FRAMEINFOVECTOR m_vBitmaps; - MULTIBITMAPPROCESSMETHOD m_Method; - double m_fKappa; - int m_lNrIterations; - std::shared_ptr m_pMaster; - -public: - CTaskInfo(); - CTaskInfo(const CTaskInfo&) = default; - CTaskInfo& operator=(const CTaskInfo&) = default; - virtual ~CTaskInfo() = default; - - void SetMethod(MULTIBITMAPPROCESSMETHOD Method, double fKappa, int lNrIterations); - void CreateEmptyMaster(const CMemoryBitmap* pBitmap); - void AddToMaster(CMemoryBitmap* pBitmap, ProgressBase* pProgress); - std::shared_ptr GetMaster(ProgressBase* const pProgress); - bool HasISOSpeed() const; -}; +#pragma once + +#include "DSSCommon.h" +#include "FrameInfo.h" + +class CMultiBitmap; +class CMemoryBitmap; +namespace DSS { class ProgressBase; } +using namespace DSS; +class CTaskInfo +{ +public: + std::uint32_t m_dwTaskID; + std::uint32_t m_groupID; + PICTURETYPE m_TaskType; + int m_lISOSpeed; + int m_lGain; + double m_fExposure; + double m_fAperture; + bool m_bUnmodified; + bool m_bDone; + CString m_strOutputFile; + FRAMEINFOVECTOR m_vBitmaps; + MULTIBITMAPPROCESSMETHOD m_Method; + double m_fKappa; + int m_lNrIterations; + std::shared_ptr m_pMaster; + +public: + CTaskInfo(); + CTaskInfo(const CTaskInfo&) = default; + CTaskInfo& operator=(const CTaskInfo&) = default; + virtual ~CTaskInfo() = default; + + void SetMethod(MULTIBITMAPPROCESSMETHOD Method, double fKappa, int lNrIterations); + void CreateEmptyMaster(const CMemoryBitmap* pBitmap); + void AddToMaster(CMemoryBitmap* pBitmap, ProgressBase* pProgress); + std::shared_ptr GetMaster(ProgressBase* const pProgress); + bool HasISOSpeed() const; +}; diff --git a/DeepSkyStacker/Workspace.cpp b/DeepSkyStackerKernel/Workspace.cpp similarity index 100% rename from DeepSkyStacker/Workspace.cpp rename to DeepSkyStackerKernel/Workspace.cpp diff --git a/DeepSkyStacker/Workspace.h b/DeepSkyStackerKernel/Workspace.h similarity index 100% rename from DeepSkyStacker/Workspace.h rename to DeepSkyStackerKernel/Workspace.h diff --git a/DeepSkyStacker/avx.cpp b/DeepSkyStackerKernel/avx.cpp similarity index 97% rename from DeepSkyStacker/avx.cpp rename to DeepSkyStackerKernel/avx.cpp index c7abd8f6..15f0d695 100644 --- a/DeepSkyStacker/avx.cpp +++ b/DeepSkyStackerKernel/avx.cpp @@ -1,1013 +1,1013 @@ -#include "stdafx.h" -#include "avx_support.h" -#include "dssrect.h" -#include "avx.h" -#include "PixelTransform.h" -#include "BilinearParameters.h" -#include "TaskInfo.h" -#include "BackgroundCalibration.h" -#include "avx_entropy.h" -#include "EntropyInfo.h" - -AvxStacking::AvxStacking(int lStart, int lEnd, CMemoryBitmap& inputbm, CMemoryBitmap& tempbm, const DSSRect& resultRect, AvxEntropy& entrdat) : - lineStart{ lStart }, lineEnd{ lEnd }, colEnd{ inputbm.Width() }, - width{ colEnd }, height{ lineEnd - lineStart }, - resultWidth{ resultRect.width() }, resultHeight{ resultRect.height() }, - xCoordinates(width >= 0 && height >= 0 ? AvxSupport::numberOfAvxVectors(width) * height : 0), - yCoordinates(width >= 0 && height >= 0 ? AvxSupport::numberOfAvxVectors(width) * height : 0), - redPixels(width >= 0 && height >= 0 ? AvxSupport::numberOfAvxVectors(width) * height : 0), - greenPixels{}, - bluePixels{}, - inputBitmap{ inputbm }, - tempBitmap{ tempbm }, - avxCfa{ lStart, lEnd, inputbm }, - entropyData{ entrdat } -{ - if (width < 0 || height < 0) - throw std::invalid_argument("End index smaller than start index for line or column of AvxStacking"); - - resizeColorVectors(AvxSupport::numberOfAvxVectors(width) * height); -} - -void AvxStacking::init(const int lStart, const int lEnd) -{ - if (!AvxSupport::checkSimdAvailability()) - return; - - lineStart = lStart; - lineEnd = lEnd; - height = lineEnd - lineStart; - const size_t nrVectors = AvxSupport::numberOfAvxVectors(width) * height; - xCoordinates.resize(nrVectors); - yCoordinates.resize(nrVectors); - redPixels.resize(nrVectors); - resizeColorVectors(nrVectors); -} - -void AvxStacking::resizeColorVectors(const size_t nrVectors) -{ - if (AvxSupport{ tempBitmap }.isColorBitmap()) - { - greenPixels.resize(nrVectors); - bluePixels.resize(nrVectors); - } - if (AvxSupport{ inputBitmap }.isMonochromeCfaBitmapOfType()) - { - avxCfa.init(lineStart, lineEnd); - } -} - -int AvxStacking::stack(const CPixelTransform& pixelTransformDef, const CTaskInfo& taskInfo, const CBackgroundCalibration& backgroundCalibrationDef, const int pixelSizeMultiplier) -{ - static_assert(sizeof(unsigned int) == sizeof(std::uint32_t)); - - if (!AvxSupport::checkSimdAvailability()) - return 1; - - int rval = 1; - if (doStack(pixelTransformDef, taskInfo, backgroundCalibrationDef, pixelSizeMultiplier) == 0 - || doStack(pixelTransformDef, taskInfo, backgroundCalibrationDef, pixelSizeMultiplier) == 0 - || doStack(pixelTransformDef, taskInfo, backgroundCalibrationDef, pixelSizeMultiplier) == 0) - { - rval = 0; - } - return AvxSupport::zeroUpper(rval); -} - -template -int AvxStacking::doStack(const CPixelTransform& pixelTransformDef, const CTaskInfo& taskInfo, const CBackgroundCalibration& backgroundCalibrationDef, const int pixelSizeMultiplier) -{ - if (pixelSizeMultiplier != 1 || pixelTransformDef.m_lPixelSizeMultiplier != 1) - return 1; - - // Check input bitmap. - const AvxSupport avxInputSupport{ inputBitmap }; - if (!avxInputSupport.isColorBitmapOfType() && !avxInputSupport.isMonochromeBitmapOfType()) - return 1; - - // Check output (temp) bitmap. - const AvxSupport avxTempSupport{ tempBitmap }; - if (!avxTempSupport.isColorBitmapOfType() && !avxTempSupport.isMonochromeBitmapOfType()) - return 1; - - if (avxInputSupport.isMonochromeCfaBitmapOfType() && avxCfa.interpolate(lineStart, lineEnd, pixelSizeMultiplier) != 0) - return 1; - if (pixelTransform(pixelTransformDef) != 0) - return 1; - if (backgroundCalibration(backgroundCalibrationDef) != 0) - return 1; - - // Pixel partitioning - // Has 4 things to distinguish: Color/Monochrome, Entropy yes/no - const bool isColor = avxTempSupport.isColorBitmap(); - if (taskInfo.m_Method == MBP_ENTROPYAVERAGE) - { - if (isColor && pixelPartitioning() != 0) - return 1; - if (!isColor && pixelPartitioning() != 0) - return 1; - } - else // No entropy average - { - if (isColor && pixelPartitioning() != 0) - return 1; - if (!isColor && pixelPartitioning() != 0) - return 1; - } - - return 0; -}; - -int AvxStacking::pixelTransform(const CPixelTransform& pixelTransformDef) -{ - const CBilinearParameters& bilinearParams = pixelTransformDef.m_BilinearParameters; - - // Number of vectors with 8 pixels each to process. - const size_t nrVectors = AvxSupport::numberOfAvxVectors(width); - const float fxShift = static_cast(pixelTransformDef.m_fXShift + (pixelTransformDef.m_bUseCometShift ? pixelTransformDef.m_fXCometShift : 0.0)); - const float fyShift = static_cast(pixelTransformDef.m_fYShift + (pixelTransformDef.m_bUseCometShift ? pixelTransformDef.m_fYCometShift : 0.0)); - const __m256 fxShiftVec = _mm256_set1_ps(fxShift); - - // Superfast version if no transformation required: indices = coordinates. - if (bilinearParams.Type == TT_BILINEAR && ( - bilinearParams.fXWidth == 1.0f && bilinearParams.fYWidth == 1.0f && - bilinearParams.a1 == 1.0f && bilinearParams.b2 == 1.0f && - bilinearParams.a0 == 0.0f && bilinearParams.a2 == 0.0f && bilinearParams.a3 == 0.0f && - bilinearParams.b0 == 0.0f && bilinearParams.b1 == 0.0f && bilinearParams.b3 == 0.0f - )) - { - for (int row = 0; row < height; ++row) - { - const __m256 fyShiftVec = _mm256_set1_ps(static_cast(lineStart + row) + fyShift); - __m256* pXLine = &xCoordinates.at(row * nrVectors); - __m256* pYLine = &yCoordinates.at(row * nrVectors); - __m256i xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); - - for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) - { - const __m256 fxline = _mm256_cvtepi32_ps(xline); - xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); - _mm256_store_ps((float*)pXLine, _mm256_add_ps(fxline, fxShiftVec)); - _mm256_store_ps((float*)pYLine, fyShiftVec); - } - } - return 0; - } - - const float fa0 = static_cast(bilinearParams.a0); - const float fa1 = static_cast(bilinearParams.a1); - const float fa2 = static_cast(bilinearParams.a2); - const float fa3 = static_cast(bilinearParams.a3); - const float fb0 = static_cast(bilinearParams.b0); - const float fb1 = static_cast(bilinearParams.b1); - const float fb2 = static_cast(bilinearParams.b2); - const float fb3 = static_cast(bilinearParams.b3); - const __m256 xWidth = _mm256_set1_ps(static_cast(bilinearParams.fXWidth)); - const __m256 yWidth = _mm256_set1_ps(static_cast(bilinearParams.fYWidth)); - const __m256 a0 = _mm256_set1_ps(fa0); - const __m256 a1 = _mm256_set1_ps(fa1); - const __m256 a2 = _mm256_set1_ps(fa2); - const __m256 a3 = _mm256_set1_ps(fa3); - const __m256 b0 = _mm256_set1_ps(fb0); - const __m256 b1 = _mm256_set1_ps(fb1); - const __m256 b2 = _mm256_set1_ps(fb2); - const __m256 b3 = _mm256_set1_ps(fb3); - const __m256 fyShiftVec = _mm256_set1_ps(fyShift); - - const auto linearTransformX = [&a0, &a1, &a2, &a3](const __m256 x, const __m256 y, const __m256 xy) -> __m256 - { - return _mm256_fmadd_ps(a3, xy, _mm256_fmadd_ps(a2, y, _mm256_fmadd_ps(a1, x, a0))); // (((a0 + a1*x) + a2*y) + a3*x*y) - }; - const auto linearTransformY = [&b0, &b1, &b2, &b3](const __m256 x, const __m256 y, const __m256 xy) -> __m256 - { - return _mm256_fmadd_ps(b3, xy, _mm256_fmadd_ps(b2, y, _mm256_fmadd_ps(b1, x, b0))); // (((b0 + b1*x) + b2*y) + b3*x*y) - }; - - if (bilinearParams.Type == TT_BILINEAR) - { - for (int row = 0; row < height; ++row) - { - const float y = static_cast(lineStart + row) / static_cast(bilinearParams.fYWidth); - const __m256 vy = _mm256_set1_ps(y); - __m256* pXLine = &xCoordinates.at(row * nrVectors); - __m256* pYLine = &yCoordinates.at(row * nrVectors); - // Vector with x-indices of the current 8 pixels of the line. - __m256i xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); - - for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) - { - const __m256 vx = _mm256_div_ps(_mm256_cvtepi32_ps(xline), xWidth); - // Indices of the next 8 pixels. - xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); - - const __m256 xy = _mm256_mul_ps(vx, vy); - // X- and y-coordinates for the bilinear transformation of the current 8 pixels. - const __m256 xr = linearTransformX(vx, vy, xy); - const __m256 yr = linearTransformY(vx, vy, xy); - - // Save result. - _mm256_store_ps((float*)pXLine, _mm256_fmadd_ps(xr, xWidth, fxShiftVec)); // xr * fxWidth + fxShift - _mm256_store_ps((float*)pYLine, _mm256_fmadd_ps(yr, yWidth, fyShiftVec)); // yr * fyWidth + fyShift - } - } - return 0; - } - - const float fa4 = static_cast(bilinearParams.a4); - const float fa5 = static_cast(bilinearParams.a5); - const float fa6 = static_cast(bilinearParams.a6); - const float fa7 = static_cast(bilinearParams.a7); - const float fa8 = static_cast(bilinearParams.a8); - const float fb4 = static_cast(bilinearParams.b4); - const float fb5 = static_cast(bilinearParams.b5); - const float fb6 = static_cast(bilinearParams.b6); - const float fb7 = static_cast(bilinearParams.b7); - const float fb8 = static_cast(bilinearParams.b8); - const __m256 a4 = _mm256_set1_ps(fa4); - const __m256 a5 = _mm256_set1_ps(fa5); - const __m256 a6 = _mm256_set1_ps(fa6); - const __m256 a7 = _mm256_set1_ps(fa7); - const __m256 a8 = _mm256_set1_ps(fa8); - const __m256 b4 = _mm256_set1_ps(fb4); - const __m256 b5 = _mm256_set1_ps(fb5); - const __m256 b6 = _mm256_set1_ps(fb6); - const __m256 b7 = _mm256_set1_ps(fb7); - const __m256 b8 = _mm256_set1_ps(fb8); - - const auto squaredTransformX = [&a4, &a5, &a6, &a7, &a8](const __m256 xLinear, const __m256 x2, const __m256 y2, const __m256 x2y, const __m256 xy2, const __m256 x2y2) -> __m256 - { - return _mm256_fmadd_ps(a8, x2y2, _mm256_fmadd_ps(a7, xy2, _mm256_fmadd_ps(a6, x2y, _mm256_fmadd_ps(a5, y2, _mm256_fmadd_ps(a4, x2, xLinear))))); // (((((xl + a4*x2) + a5*y2) + a6*x2y) + a7*xy2) + a8*x2y2) - }; - const auto squaredTransformY = [&b4, &b5, &b6, &b7, &b8](const __m256 yLinear, const __m256 x2, const __m256 y2, const __m256 x2y, const __m256 xy2, const __m256 x2y2) -> __m256 - { - return _mm256_fmadd_ps(b8, x2y2, _mm256_fmadd_ps(b7, xy2, _mm256_fmadd_ps(b6, x2y, _mm256_fmadd_ps(b5, y2, _mm256_fmadd_ps(b4, x2, yLinear))))); // (((((yl + b4*x2) + b5*y2) + b6*x2y) + b7*xy2) + b8*x2y2) - }; - - if (bilinearParams.Type == TT_BISQUARED) - { - - for (int row = 0; row < height; ++row) - { - const float y = static_cast(lineStart + row) / static_cast(bilinearParams.fYWidth); - const __m256 vy = _mm256_set1_ps(y); - __m256* pXLine = &xCoordinates.at(row * nrVectors); - __m256* pYLine = &yCoordinates.at(row * nrVectors); - __m256i xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); - - for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) - { - const __m256 vx = _mm256_div_ps(_mm256_cvtepi32_ps(xline), xWidth); - xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); - - // Linear part - const __m256 xy = _mm256_mul_ps(vx, vy); - const __m256 rlx = linearTransformX(vx, vy, xy); - const __m256 rly = linearTransformY(vx, vy, xy); - - // Square parameters - const __m256 x2 = _mm256_mul_ps(vx, vx); - const __m256 y2 = _mm256_mul_ps(vy, vy); - const __m256 x2y = _mm256_mul_ps(x2, vy); - const __m256 xy2 = _mm256_mul_ps(vx, y2); - const __m256 x2y2 = _mm256_mul_ps(x2, y2); - - // The bisqared transformation. - const __m256 xr = squaredTransformX(rlx, x2, y2, x2y, xy2, x2y2); - const __m256 yr = squaredTransformY(rly, x2, y2, x2y, xy2, x2y2); - - _mm256_store_ps((float*)pXLine, _mm256_fmadd_ps(xr, xWidth, fxShiftVec)); - _mm256_store_ps((float*)pYLine, _mm256_fmadd_ps(yr, yWidth, fyShiftVec)); - } - } - return 0; - } - - const float fa9 = static_cast(bilinearParams.a9); - const float fa10 = static_cast(bilinearParams.a10); - const float fa11 = static_cast(bilinearParams.a11); - const float fa12 = static_cast(bilinearParams.a12); - const float fa13 = static_cast(bilinearParams.a13); - const float fa14 = static_cast(bilinearParams.a14); - const float fa15 = static_cast(bilinearParams.a15); - const float fb9 = static_cast(bilinearParams.b9); - const float fb10 = static_cast(bilinearParams.b10); - const float fb11 = static_cast(bilinearParams.b11); - const float fb12 = static_cast(bilinearParams.b12); - const float fb13 = static_cast(bilinearParams.b13); - const float fb14 = static_cast(bilinearParams.b14); - const float fb15 = static_cast(bilinearParams.b15); - const __m256 a9 = _mm256_set1_ps(fa9); - const __m256 a10 = _mm256_set1_ps(fa10); - const __m256 a11 = _mm256_set1_ps(fa11); - const __m256 a12 = _mm256_set1_ps(fa12); - const __m256 a13 = _mm256_set1_ps(fa13); - const __m256 a14 = _mm256_set1_ps(fa14); - const __m256 a15 = _mm256_set1_ps(fa15); - const __m256 b9 = _mm256_set1_ps(fb9); - const __m256 b10 = _mm256_set1_ps(fb10); - const __m256 b11 = _mm256_set1_ps(fb11); - const __m256 b12 = _mm256_set1_ps(fb12); - const __m256 b13 = _mm256_set1_ps(fb13); - const __m256 b14 = _mm256_set1_ps(fb14); - const __m256 b15 = _mm256_set1_ps(fb15); - - const auto cubicTransformX = [&a9, &a10, &a11, &a12, &a13, &a14, &a15]( - const __m256 xSquared, const __m256 x3, const __m256 y3, const __m256 x3y, const __m256 xy3, const __m256 x3y2, const __m256 x2y3, const __m256 x3y3) -> __m256 - { - // (((((squarePart + a9*x3) + a10*y3) + a11*x3y) + a12*xy3) + a13*x3y2) + a14*x2y3) + a15*x3y3) - return _mm256_fmadd_ps(a15, x3y3, _mm256_fmadd_ps(a14, x2y3, _mm256_fmadd_ps(a13, x3y2, _mm256_fmadd_ps(a12, xy3, _mm256_fmadd_ps(a11, x3y, _mm256_fmadd_ps(a10, y3, _mm256_fmadd_ps(a9, x3, xSquared))))))); - }; - const auto cubicTransformY = [&b9, &b10, &b11, &b12, &b13, &b14, &b15]( - const __m256 ySquared, const __m256 x3, const __m256 y3, const __m256 x3y, const __m256 xy3, const __m256 x3y2, const __m256 x2y3, const __m256 x3y3) -> __m256 - { - // (((((squarePart + b9*x3) + b10*y3) + b11*x3y) + b12*xy3) + b13*x3y2) + b14*x2y3) + b15*x3y3) - return _mm256_fmadd_ps(b15, x3y3, _mm256_fmadd_ps(b14, x2y3, _mm256_fmadd_ps(b13, x3y2, _mm256_fmadd_ps(b12, xy3, _mm256_fmadd_ps(b11, x3y, _mm256_fmadd_ps(b10, y3, _mm256_fmadd_ps(b9, x3, ySquared))))))); - }; - - if (bilinearParams.Type == TT_BICUBIC) - { - for (int row = 0; row < height; ++row) - { - const float y = static_cast(lineStart + row) / static_cast(bilinearParams.fYWidth); - const __m256 vy = _mm256_set1_ps(y); - __m256* pXLine = &xCoordinates.at(row * nrVectors); - __m256* pYLine = &yCoordinates.at(row * nrVectors); - __m256i xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); - - // Do it in 2 steps, so that the loops get smaller, and the compiler can better keep data in CPU registers. - // (1) Linear and squared part. - // (2) Cubic part. - - for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) - { - const __m256 vx = _mm256_div_ps(_mm256_cvtepi32_ps(xline), xWidth); - xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); - - // Linear part - const __m256 xy = _mm256_mul_ps(vx, vy); - const __m256 rlx = linearTransformX(vx, vy, xy); - const __m256 rly = linearTransformY(vx, vy, xy); - - // Square part - const __m256 x2 = _mm256_mul_ps(vx, vx); - const __m256 y2 = _mm256_mul_ps(vy, vy); - const __m256 x2y = _mm256_mul_ps(x2, vy); - const __m256 xy2 = _mm256_mul_ps(vx, y2); - const __m256 x2y2 = _mm256_mul_ps(x2, y2); - const __m256 rsx = squaredTransformX(rlx, x2, y2, x2y, xy2, x2y2); - const __m256 rsy = squaredTransformY(rly, x2, y2, x2y, xy2, x2y2); - - _mm256_store_ps((float*)pXLine, rsx); - _mm256_store_ps((float*)pYLine, rsy); - } - - pXLine = &xCoordinates.at(row * nrVectors); - pYLine = &yCoordinates.at(row * nrVectors); - xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); - - for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) - { - const __m256 vx = _mm256_div_ps(_mm256_cvtepi32_ps(xline), xWidth); - xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); - - const __m256 x2 = _mm256_mul_ps(vx, vx); - const __m256 y2 = _mm256_mul_ps(vy, vy); - - // Cubic parameters - const __m256 x3 = _mm256_mul_ps(x2, vx); - const __m256 y3 = _mm256_mul_ps(y2, vy); - const __m256 x3y = _mm256_mul_ps(x3, vy); - const __m256 xy3 = _mm256_mul_ps(vx, y3); - const __m256 x3y2 = _mm256_mul_ps(x3, y2); - const __m256 x2y3 = _mm256_mul_ps(x2, y3); - const __m256 x3y3 = _mm256_mul_ps(x3, y3); - - // Load the squared part (has been calculated in previous step). - const __m256 rsx = _mm256_load_ps((const float*)pXLine); - const __m256 rsy = _mm256_load_ps((const float*)pYLine); - - // The bicubic transformation - const __m256 xr = cubicTransformX(rsx, x3, y3, x3y, xy3, x3y2, x2y3, x3y3); - const __m256 yr = cubicTransformY(rsy, x3, y3, x3y, xy3, x3y2, x2y3, x3y3); - - _mm256_store_ps((float*)pXLine, _mm256_fmadd_ps(xr, xWidth, fxShiftVec)); - _mm256_store_ps((float*)pYLine, _mm256_fmadd_ps(yr, yWidth, fyShiftVec)); - } - } - return 0; - } - - return 1; -}; - -template -int AvxStacking::backgroundCalibLoop(const LoopFunction& loopFunc, const class AvxSupport& avxInputSupport, const InterpolParam& redParams, const InterpolParam& greenParams, const InterpolParam& blueParams) -{ - if (avxInputSupport.isColorBitmapOfType()) - { - const size_t w = static_cast(this->width); - const size_t startNdx = w * lineStart; - loopFunc(&avxInputSupport.redPixels().at(startNdx), w, redParams, redPixels); - loopFunc(&avxInputSupport.greenPixels().at(startNdx), w, greenParams, greenPixels); - loopFunc(&avxInputSupport.bluePixels().at(startNdx), w, blueParams, bluePixels); - return 0; - } - if constexpr (std::is_same::value) - { - if (avxInputSupport.isMonochromeCfaBitmapOfType()) - { - const size_t w = avxCfa.nrVectorsPerLine(); - loopFunc(avxCfa.redCfaBlock(), w, redParams, redPixels); - loopFunc(avxCfa.greenCfaBlock(), w, greenParams, greenPixels); - loopFunc(avxCfa.blueCfaBlock(), w, blueParams, bluePixels); - return 0; - } - } - if (avxInputSupport.isMonochromeBitmapOfType()) - { - const size_t w = static_cast(this->width); - const size_t startNdx = w * lineStart; - loopFunc(&avxInputSupport.grayPixels().at(startNdx), w, redParams, redPixels); - return 0; - } - return 1; -} - -inline float readColorValue(const std::uint16_t c) { return static_cast(c); } -inline float readColorValue(const std::uint32_t c) { return static_cast(c >> 16); } -inline float readColorValue(const float c) { return c; } - -template -int AvxStacking::backgroundCalibration(const CBackgroundCalibration& backgroundCalibrationDef) -{ - // We calculate vectors with 16 pixels each, so this is the number of vectors to process. - const int nrVectors = width / 16; - const AvxSupport avxInputSupport{ inputBitmap }; - - if (backgroundCalibrationDef.m_BackgroundCalibrationMode == BCM_NONE) - { - // Just copy color values as they are, pixel by pixel. - const auto loop = [this, nrVectors](const auto* const pPixels, const size_t nrElementsPerLine, const auto&, std::vector<__m256>& result) -> void - { - const size_t internalBufferNrVectors = AvxSupport::numberOfAvxVectors(this->width); - - for (int row = 0; row < this->height; ++row) - { - const T* pColor = reinterpret_cast(pPixels + row * nrElementsPerLine); - __m256* pResult = &result.at(row * internalBufferNrVectors); - for (int counter = 0; counter < nrVectors; ++counter, pColor += 16, pResult += 2) - { - const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); - _mm256_store_ps((float*)pResult, lo8); - _mm256_store_ps((float*)(pResult + 1), hi8); - } - // Remaining pixels of line - float* pRemaining = reinterpret_cast(pResult); - for (int n = nrVectors * 16; n < this->colEnd; ++n, ++pColor, ++pRemaining) - { - *pRemaining = readColorValue(*pColor); - } - } - }; - - return backgroundCalibLoop(loop, avxInputSupport, backgroundCalibrationDef.m_riRed, backgroundCalibrationDef.m_riGreen, backgroundCalibrationDef.m_riBlue); - } - else if (backgroundCalibrationDef.m_BackgroundInterpolation == BCI_RATIONAL) - { - const auto loop = [this, nrVectors](const auto* const pPixels, const size_t nrElementsPerLine, const auto& params, std::vector<__m256>& result) -> void - { - const __m256 a = _mm256_set1_ps(params.getParameterA()); - const __m256 b = _mm256_set1_ps(params.getParameterB()); - const __m256 c = _mm256_set1_ps(params.getParameterC()); - const __m256 fmin = _mm256_set1_ps(params.getParameterMin()); - const __m256 fmax = _mm256_set1_ps(params.getParameterMax()); - - const auto interpolate = [&a, &b, &c, &fmin, &fmax](const __m256 color) noexcept -> __m256 - { - const __m256 denom = _mm256_fmadd_ps(b, color, c); // b * color + c - const __m256 mask = _mm256_cmp_ps(denom, _mm256_setzero_ps(), 0); // cmp: denom==0 ? 1 : 0 - const __m256 xplusa = _mm256_add_ps(color, a); -// const __m256 division = _mm256_div_ps(xplusa, denom); - const __m256 division = _mm256_mul_ps(xplusa, _mm256_rcp_ps(denom)); // RCP is accurate enough. - // If denominator == 0 => use (x+a) else use (x+a)/denominator, then do the max and min. - return _mm256_max_ps(_mm256_min_ps(_mm256_blendv_ps(division, xplusa, mask), fmax), fmin); // blend: mask==1 ? b : a; - }; - - const size_t internalBufferNrVectors = AvxSupport::numberOfAvxVectors(this->width); - - for (int row = 0; row < this->height; ++row) - { - const T* pColor = reinterpret_cast(pPixels + row * nrElementsPerLine); - __m256* pResult = &result.at(row * internalBufferNrVectors); - for (int counter = 0; counter < nrVectors; ++counter, pColor += 16, pResult += 2) - { - const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); - _mm256_store_ps((float*)pResult, interpolate(lo8)); - _mm256_store_ps((float*)(pResult + 1), interpolate(hi8)); - } - // Remaining pixels of line - float* pRemaining = reinterpret_cast(pResult); - for (int n = nrVectors * 16; n < this->colEnd; ++n, ++pColor, ++pRemaining) - { - const float fcolor = readColorValue(*pColor); - const float denom = b.m256_f32[0] * fcolor + c.m256_f32[0]; - const float xplusa = fcolor + a.m256_f32[0]; - *pRemaining = std::max(std::min(denom == 0.0f ? xplusa : (xplusa / denom), fmax.m256_f32[0]), fmin.m256_f32[0]); - } - } - }; - - return backgroundCalibLoop(loop, avxInputSupport, backgroundCalibrationDef.m_riRed, backgroundCalibrationDef.m_riGreen, backgroundCalibrationDef.m_riBlue); - } - else // LINEAR - { - const auto loop = [this, nrVectors](const auto* const pPixels, const size_t nrElementsPerLine, const auto& params, std::vector<__m256>& result) -> void - { - const __m256 a0 = _mm256_set1_ps(params.getParameterA0()); - const __m256 a1 = _mm256_set1_ps(params.getParameterA1()); - const __m256 b0 = _mm256_set1_ps(params.getParameterB0()); - const __m256 b1 = _mm256_set1_ps(params.getParameterB1()); - const __m256 xm = _mm256_set1_ps(params.getParameterXm()); - - const auto interpolate = [a0, a1, b0, b1, xm](const __m256 x) noexcept -> __m256 - { - const __m256 mask = _mm256_cmp_ps(x, xm, 17); // cmp: x < xm ? 1 : 0 - // If x < xm => use a0 and b0, else use a1 and b1. - const __m256 aSelected = _mm256_blendv_ps(a1, a0, mask); // blend(arg1, arg2, mask): mask==1 ? arg2 : arg1; - const __m256 bSelected = _mm256_blendv_ps(b1, b0, mask); - return _mm256_fmadd_ps(x, aSelected, bSelected); // x * a + b - }; - - const size_t internalBufferNrVectors = AvxSupport::numberOfAvxVectors(this->width); - - for (int row = 0; row < this->height; ++row) - { - const T* pColor = reinterpret_cast(pPixels + row * nrElementsPerLine); - __m256* pResult = &result.at(row * internalBufferNrVectors); - for (int counter = 0; counter < nrVectors; ++counter, pColor += 16, pResult += 2) - { - const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); - _mm256_store_ps((float*)pResult, interpolate(lo8)); - _mm256_store_ps((float*)(pResult + 1), interpolate(hi8)); - } - // Remaining pixels of line - float* pRemaining = reinterpret_cast(pResult); - for (int n = nrVectors * 16; n < this->colEnd; ++n, ++pColor, ++pRemaining) - { - const float fcolor = readColorValue(*pColor); - *pRemaining = fcolor < xm.m256_f32[0] ? (fcolor * a0.m256_f32[0] + b0.m256_f32[0]) : (fcolor * a1.m256_f32[0] + b1.m256_f32[0]); - } - } - }; - - return backgroundCalibLoop(loop, avxInputSupport, backgroundCalibrationDef.m_liRed, backgroundCalibrationDef.m_liGreen, backgroundCalibrationDef.m_liBlue); - } -} - -#pragma warning( push ) -#pragma warning( disable : 4324 ) // Structure was padded -#pragma warning( disable : 4100 ) // Unreferenced variable - -template -int AvxStacking::pixelPartitioning() -{ - AvxSupport avxTempBitmap{ tempBitmap }; - // Check if we were called with the correct template argument. - if constexpr (ISRGB) { - if (!avxTempBitmap.isColorBitmapOfType()) - return 1; - } - else { - if (!avxTempBitmap.isMonochromeBitmapOfType()) - return 1; - } - - const size_t nrVectors = AvxSupport::numberOfAvxVectors(width); - const int outWidth = avxTempBitmap.width(); - if (outWidth <= 0) - return 1; - - // outWidth = width of the temp bitmap. - // resultWidth = width of the rect we want to write (in temp bitmap) - - // Non-vectorized accumulation for the case of 2 (or more) x-coordinates being identical. - // Vectorized version would be incorrect in that case. - const auto accumulateSingle = [](const __m256 newColor, const __m256i outNdx, const __m256i mask, auto* const pOutputBitmap) -> void - { - const auto conditionalAccumulate = [pOutputBitmap](const int m, const size_t ndx, const float color) -> void - { - if (m != 0) - pOutputBitmap[ndx] = AvxSupport::accumulateSingleColorValue(ndx, color, m, pOutputBitmap); - }; - - // This needs to be done pixel by pixel of the vector, because neighboring pixels have identical indices (due to prior pixel transform step). - __m128 color = _mm256_castps256_ps128(newColor); - conditionalAccumulate(_mm256_cvtsi256_si32(mask), _mm256_cvtsi256_si32(outNdx), AvxSupport::extractPs<0>(color)); - conditionalAccumulate(_mm256_extract_epi32(mask, 1), _mm256_extract_epi32(outNdx, 1), AvxSupport::extractPs<1>(color)); - conditionalAccumulate(_mm256_extract_epi32(mask, 2), _mm256_extract_epi32(outNdx, 2), AvxSupport::extractPs<2>(color)); - conditionalAccumulate(_mm256_extract_epi32(mask, 3), _mm256_extract_epi32(outNdx, 3), AvxSupport::extractPs<3>(color)); - color = _mm256_extractf128_ps(newColor, 1); - conditionalAccumulate(_mm256_extract_epi32(mask, 4), _mm256_extract_epi32(outNdx, 4), AvxSupport::extractPs<0>(color)); - conditionalAccumulate(_mm256_extract_epi32(mask, 5), _mm256_extract_epi32(outNdx, 5), AvxSupport::extractPs<1>(color)); - conditionalAccumulate(_mm256_extract_epi32(mask, 6), _mm256_extract_epi32(outNdx, 6), AvxSupport::extractPs<2>(color)); - conditionalAccumulate(_mm256_extract_epi32(mask, 7), _mm256_extract_epi32(outNdx, 7), AvxSupport::extractPs<3>(color)); - }; - - // Vectorized or non-vectorized accumulation - const auto accumulateAVX = [&](const __m256i outNdx, const __m256i mask, const __m256 colorValue, const __m256 fraction, auto* const pOutputBitmap, const bool twoNdxEqual, const bool fastLoadAndStore) -> void - { - if (twoNdxEqual) // If so, we cannot use AVX. - return accumulateSingle(_mm256_mul_ps(colorValue, fraction), outNdx, mask, pOutputBitmap); - - // Read from pOutputBitmap[outNdx[0:7]], and add (colorValue*fraction)[0:7] - const __m256 limitedColor = AvxSupport::accumulateColorValues(outNdx, colorValue, fraction, mask, pOutputBitmap, fastLoadAndStore); - AvxSupport::storeColorValue(outNdx, limitedColor, mask, pOutputBitmap, fastLoadAndStore); - }; - - - const __m256i resultWidthVec = _mm256_set1_epi32(this->resultWidth); - const __m256i resultHeightVec = _mm256_set1_epi32(this->resultHeight); - - const __m256i outWidthVec = _mm256_set1_epi32(outWidth); - const auto getColorPointer = [](const std::vector<__m256>& colorPixels, const size_t offset) -> const __m256* - { - if constexpr (ISRGB) - return colorPixels.data() + offset; - else - return nullptr; - }; - const auto getColorValue = [](const __m256* const pColor) -> __m256 - { - if constexpr (ISRGB) - return _mm256_load_ps((const float*)pColor); - else - return _mm256_undefined_ps(); - }; - - // ------------------------------- - // Entropy data - - float *pRedEntropyLayer, *pGreenEntropyLayer, *pBlueEntropyLayer; - if constexpr (ENTROPY) - { - AvxSupport avxEntropySupport{ *this->entropyData.pEntropyCoverage }; - if (ISRGB && !avxEntropySupport.isColorBitmapOfType()) - return 1; - if (!ISRGB && !avxEntropySupport.isMonochromeBitmapOfType()) - return 1; - if (this->entropyData.redEntropyLayer.empty()) // Something is wrong here! - return 1; - pRedEntropyLayer = reinterpret_cast(this->entropyData.redEntropyLayer.data()); - pGreenEntropyLayer = ISRGB ? reinterpret_cast(this->entropyData.greenEntropyLayer.data()) : nullptr; - pBlueEntropyLayer = ISRGB ? reinterpret_cast(this->entropyData.blueEntropyLayer.data()) : nullptr; - } - - const auto accumulateEntropyRGBorMono = [&](const __m256 r, const __m256 g, const __m256 b, const __m256 fraction, const __m256i outNdx, const __m256i mask, const bool twoNdxEqual, const bool fastLoadAndStore) -> void - { - if constexpr (!ENTROPY) - return; - - if constexpr (ISRGB) - { - accumulateAVX(outNdx, mask, r, fraction, pRedEntropyLayer, twoNdxEqual, fastLoadAndStore); - accumulateAVX(outNdx, mask, g, fraction, pGreenEntropyLayer, twoNdxEqual, fastLoadAndStore); - accumulateAVX(outNdx, mask, b, fraction, pBlueEntropyLayer, twoNdxEqual, fastLoadAndStore); - } - else - { - accumulateAVX(outNdx, mask, r, fraction, pRedEntropyLayer, twoNdxEqual, fastLoadAndStore); - } - }; - // ------------------------------- - - T* const pRedOut = ISRGB ? &*avxTempBitmap.redPixels().begin() : nullptr; - T* const pGreenOut = ISRGB ? &*avxTempBitmap.greenPixels().begin() : nullptr; - T* const pBlueOut = ISRGB ? &*avxTempBitmap.bluePixels().begin() : nullptr; - T* const pGrayOut = ISRGB ? nullptr : &*avxTempBitmap.grayPixels().begin(); - - const auto accumulateRGBorMono = [&](const __m256 r, const __m256 g, const __m256 b, const __m256 fraction, const __m256i outNdx, const __m256i mask, const bool twoNdxEqual, const bool fastLoadAndStore) -> void - { - if constexpr (ISRGB) - { - accumulateAVX(outNdx, mask, r, fraction, pRedOut, twoNdxEqual, fastLoadAndStore); - accumulateAVX(outNdx, mask, g, fraction, pGreenOut, twoNdxEqual, fastLoadAndStore); - accumulateAVX(outNdx, mask, b, fraction, pBlueOut, twoNdxEqual, fastLoadAndStore); - } - else - { - accumulateAVX(outNdx, mask, r, fraction, pGrayOut, twoNdxEqual, fastLoadAndStore); - } - }; - const auto fastAccumulateWordRGBorMono = [&](const __m256 color, const __m256 fraction1, const __m256 fraction2, std::uint16_t* const pOutput) -> void - { - const __m256i colorVector = _mm256_loadu_si256(reinterpret_cast(pOutput)); // vmovdqu ymm, m256 -// const __m256i colorVector = _mm256_lddqu_si256(reinterpret_cast(pOutput)); // vlddqu ymm, m256 - const __m256i f1 = _mm256_zextsi128_si256(AvxSupport::cvtPsEpu16(_mm256_mul_ps(fraction1, color))); // Upper 128 bits are zeroed. - const __m256i f2 = _mm256_zextsi128_si256(AvxSupport::cvtPsEpu16(_mm256_mul_ps(fraction2, color))); - const __m256i f2ShiftedLeft = AvxSupport::shiftLeftEpi8<2>(f2); - const __m256i colorPlusFraction1 = _mm256_adds_epu16(colorVector, f1); - const __m256i colorPlusBothFractions = _mm256_adds_epu16(colorPlusFraction1, f2ShiftedLeft); - _mm256_storeu_si256(reinterpret_cast<__m256i*>(pOutput), colorPlusBothFractions); - }; - - const auto getColumnOrRowMask = [](const __m256i coord, const __m256i resultWidthOrHeight) -> __m256i - { - return _mm256_andnot_si256(_mm256_cmpgt_epi32(_mm256_setzero_si256(), coord), _mm256_cmpgt_epi32(resultWidthOrHeight, coord)); // !(0 > x) and (width > x) == (x >= 0) and (x < width). Same for y with height. - }; - - // Lambda for this check: DSSRect{ 0, 0, m_rcResult.width(), m_rcResult.height() }.contains(ptOut) - const auto resultRectCheck = [](const __m256i coordTrunc, const __m256i resultWidthOrHeight, const __m256 coord) -> __m256i - { - // (pt.x >= 0) && (pt.x <= width-1) is equivalent to !(0 > floor(pt.x)) && (width > ceil(pt.x)) - return _mm256_andnot_si256(_mm256_cmpgt_epi32(_mm256_setzero_si256(), coordTrunc), _mm256_cmpgt_epi32(resultWidthOrHeight, _mm256_cvttps_epi32(_mm256_ceil_ps(coord)))); - }; - - // Accumulates with fraction1 for (x, y) and fraction2 for (x+1, y) - const __m256i allOnes = _mm256_set1_epi32(-1); // All bits '1' == all int elements -1 - const auto accumulateTwoFractions = [&, allOnes](const __m256 red, const __m256 green, const __m256 blue, const __m256 fraction1, const __m256 fraction2, const __m256i outIndex, - __m256i mask1, const __m256i mask2, const bool twoNdxEqual, const bool allNdxValid1, const bool allNdxValid2) -> void - { - if constexpr (std::is_same::value) - { - if (allNdxValid1 && allNdxValid2) - { - const size_t startNdx = _mm256_cvtsi256_si32(outIndex); // outIndex[0] - if constexpr (ISRGB) - { - fastAccumulateWordRGBorMono(red, fraction1, fraction2, pRedOut + startNdx); - fastAccumulateWordRGBorMono(green, fraction1, fraction2, pGreenOut + startNdx); - fastAccumulateWordRGBorMono(blue, fraction1, fraction2, pBlueOut + startNdx); - } - else - fastAccumulateWordRGBorMono(red, fraction1, fraction2, pGrayOut + startNdx); - - return; - } - } - - accumulateRGBorMono(red, green, blue, fraction1, outIndex, mask1, twoNdxEqual, allNdxValid1); // x, y, fraction1 - accumulateRGBorMono(red, green, blue, fraction2, _mm256_sub_epi32(outIndex, allOnes), mask2, twoNdxEqual, allNdxValid2); // x+1, y, fraction2 - }; - - for (int row = 0; row < height; ++row) - { - const size_t offset = row * nrVectors; - const __m256* pXLine = &*xCoordinates.begin() + offset; - const __m256* pYLine = &*yCoordinates.begin() + offset; - const __m256* pRed = &*redPixels.begin() + offset; - const __m256* pGreen = getColorPointer(greenPixels, offset); - const __m256* pBlue = getColorPointer(bluePixels, offset); - __m256i vIndex; - if constexpr (ENTROPY) - vIndex = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); - - for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine, ++pRed) - { - const __m256 xcoord = _mm256_load_ps((const float*)pXLine); - const __m256 ycoord = _mm256_load_ps((const float*)pYLine); - const __m256 xtruncated = _mm256_floor_ps(xcoord); // trunc(coordinate) - const __m256 ytruncated = _mm256_floor_ps(ycoord); - const __m256 xfractional = _mm256_sub_ps(xcoord, xtruncated); // fractional_part(coordinate) - const __m256 yfractional = _mm256_sub_ps(ycoord, ytruncated); - const __m256 xfrac1 = _mm256_sub_ps(_mm256_set1_ps(1.0f), xfractional); // 1 - fractional_part - const __m256 yfrac1 = _mm256_sub_ps(_mm256_set1_ps(1.0f), yfractional); - - const __m256 red = _mm256_load_ps((const float*)pRed); - const __m256 green = getColorValue(pGreen); - const __m256 blue = getColorValue(pBlue); - - // Different pixels of the vector can have different number of fractions. So we always need to consider all 4 fractions. - // Note: We have to process the 4 fractions one by one, because the same pixels can be involved. Otherwise accumulation would be wrong. - - // 1.Fraction at (xtruncated, ytruncated) - // 2.Fraction at (xtruncated+1, ytruncated) - __m256 fraction1 = _mm256_mul_ps(xfrac1, yfrac1); - __m256 fraction2 = _mm256_mul_ps(xfractional, yfrac1); - const __m256i xii = _mm256_cvttps_epi32(xtruncated); - const __m256i yii = _mm256_cvttps_epi32(ytruncated); - - // DSSRect{ 0, 0, m_rcResult.width(), m_rcResult.height() }.contains(ptOut); - const auto resultRectMask = _mm256_and_si256( - resultRectCheck(xii, resultWidthVec, xcoord), // x-coord check against width - resultRectCheck(yii, resultHeightVec, ycoord) // y-coord check against height - ); - - const __m256i columnMask1 = getColumnOrRowMask(xii, resultWidthVec); - const __m256i columnMask2 = getColumnOrRowMask(_mm256_sub_epi32(xii, allOnes), resultWidthVec); - __m256i rowMask = getColumnOrRowMask(yii, resultHeightVec); - __m256i outIndex = _mm256_add_epi32(_mm256_mullo_epi32(outWidthVec, yii), xii); - - // Check if two adjacent indices are equal: Subtract the x-coordinates horizontally and check if any of the results equals zero. If so -> adjacent x-coordinates are equal. - // (a & b) == 0 -> ZF=1, (~a & b) == 0 -> CF=1; testc: return CF; testz: return ZF; testnzc: IF (ZF == 0 && CF == 0) return 1; - const __m256i indexDiff = _mm256_sub_epi32(outIndex, _mm256_permutevar8x32_epi32(outIndex, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0))); // -1 where ndx[i+1] == 1 + ndx[i] - const bool allNdxEquidistant = (1 == _mm256_testc_si256(indexDiff, _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, 0))); // 'testc' returns 1 if all bits are '1' -> 0xffffffff == -1 -> ndx[i] - ndx[i+1] == -1 - const bool twoNdxEqual = (0 == _mm256_testz_si256(_mm256_cmpeq_epi32(_mm256_setzero_si256(), indexDiff), _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, 0))); - - rowMask = _mm256_and_si256(rowMask, resultRectMask); - __m256i mask1 = _mm256_and_si256(columnMask1, rowMask); - __m256i mask2 = _mm256_and_si256(columnMask2, rowMask); - bool allNdxValid1 = allNdxEquidistant && (1 == _mm256_testc_si256(mask1, allOnes)); - bool allNdxValid2 = allNdxEquidistant && (1 == _mm256_testc_si256(mask2, allOnes)); - - accumulateTwoFractions(red, green, blue, fraction1, fraction2, outIndex, mask1, mask2, twoNdxEqual, allNdxValid1, allNdxValid2); // (x, y), (x+1, y) - __m256 redEntropy, greenEntropy, blueEntropy; - if constexpr (ENTROPY) - { - getAvxEntropy(redEntropy, greenEntropy, blueEntropy, vIndex, row); - vIndex = _mm256_add_epi32(vIndex, _mm256_set1_epi32(8)); - accumulateEntropyRGBorMono(redEntropy, greenEntropy, blueEntropy, fraction1, outIndex, mask1, twoNdxEqual, allNdxValid1); - accumulateEntropyRGBorMono(redEntropy, greenEntropy, blueEntropy, fraction2, _mm256_sub_epi32(outIndex, allOnes), mask2, twoNdxEqual, allNdxValid2); - } - - // 3.Fraction at (xtruncated, ytruncated+1) - // 4.Fraction at (xtruncated+1, ytruncated+1) - fraction1 = _mm256_mul_ps(xfrac1, yfractional); - fraction2 = _mm256_mul_ps(xfractional, yfractional); - rowMask = getColumnOrRowMask(_mm256_sub_epi32(yii, allOnes), resultHeightVec); - rowMask = _mm256_and_si256(rowMask, resultRectMask); - mask1 = _mm256_and_si256(columnMask1, rowMask); - mask2 = _mm256_and_si256(columnMask2, rowMask); - allNdxValid1 = allNdxEquidistant && (1 == _mm256_testc_si256(mask1, allOnes)); - allNdxValid2 = allNdxEquidistant && (1 == _mm256_testc_si256(mask2, allOnes)); - outIndex = _mm256_add_epi32(outIndex, outWidthVec); - - accumulateTwoFractions(red, green, blue, fraction1, fraction2, outIndex, mask1, mask2, twoNdxEqual, allNdxValid1, allNdxValid2); // (x, y+1), (x+1, y+1) - if constexpr (ENTROPY) - { - accumulateEntropyRGBorMono(redEntropy, greenEntropy, blueEntropy, fraction1, outIndex, mask1, twoNdxEqual, allNdxValid1); - accumulateEntropyRGBorMono(redEntropy, greenEntropy, blueEntropy, fraction2, _mm256_sub_epi32(outIndex, allOnes), mask2, twoNdxEqual, allNdxValid2); - } - - if constexpr (ISRGB) - { - ++pGreen; - ++pBlue; - } - } - } - - return 0; -} - -#pragma warning( pop ) - -template -inline void AvxStacking::getAvxEntropy(__m256& redEntropy, __m256& greenEntropy, __m256& blueEntropy, const __m256i xIndex, const int row) -{ - const int windowSize = entropyData.entropyInfo.windowSize(); - const int squareSize = 2 * windowSize + 1; - - const __m256 vx = _mm256_cvtepi32_ps(xIndex); - const __m256 vy = _mm256_set1_ps(static_cast(lineStart + row)); - const __m256 vsquareSize = _mm256_set1_ps(static_cast(squareSize)); - const __m256i vsquareNdxY = _mm256_set1_epi32((lineStart + row) / squareSize); - const __m256 xndx = _mm256_floor_ps(_mm256_div_ps(vx, vsquareSize)); - const __m256i vsquareNdxX = _mm256_cvttps_epi32(xndx); - const __m256 vsquareCenterX = _mm256_fmadd_ps(xndx, vsquareSize, _mm256_set1_ps(static_cast(windowSize))); //_mm256_add_epi32(_mm256_mullo_epi32(vsquareNdxX, vsquareSize), _mm256_set1_epi32(windowSize)); - const __m256 vsquareCenterY = _mm256_fmadd_ps(_mm256_cvtepi32_ps(vsquareNdxY), vsquareSize, _mm256_set1_ps(static_cast(windowSize))); //_mm256_add_epi32(_mm256_mullo_epi32(vsquareNdxY, vsquareSize), _mm256_set1_epi32(windowSize)); - const __m256i vnrSquaresX = _mm256_set1_epi32(entropyData.entropyInfo.nrSquaresX()); - const __m256i vnrSquaresY = _mm256_set1_epi32(entropyData.entropyInfo.nrSquaresY()); - - const auto vdistanceTo = [&vx, &vy](const __m256 vcenterX, const __m256 vcenterY) -> __m256 - { - const __m256 x = _mm256_sub_ps(vx, vcenterX); - const __m256 y = _mm256_sub_ps(vy, vcenterY); - return _mm256_sqrt_ps(_mm256_fmadd_ps(y, y, _mm256_mul_ps(x, x))); -// return _mm256_hypot_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vx, vcenterX)), _mm256_cvtepi32_ps(_mm256_sub_epi32(vy, vcenterY))); - }; - - const auto vgetEntropy = [&vnrSquaresX, - pRedEntropy = entropyData.entropyInfo.redEntropyData(), - pGreenEntropy = entropyData.entropyInfo.greenEntropyData(), - pBlueEntropy = entropyData.entropyInfo.blueEntropyData()](const __m256i x, const __m256i y, const __m256 mask) - { - const __m256i index = _mm256_add_epi32(_mm256_mullo_epi32(y, vnrSquaresX), x); - if constexpr (ISRGB) - { - return std::make_tuple( - _mm256_mask_i32gather_ps(mask, pRedEntropy, index, mask, 4), // where mask==0 -> gather returns mask, i.e. it returns zero. - _mm256_mask_i32gather_ps(mask, pGreenEntropy, index, mask, 4), - _mm256_mask_i32gather_ps(mask, pBlueEntropy, index, mask, 4) - ); - } - else - { - return _mm256_mask_i32gather_ps(mask, pRedEntropy, index, mask, 4); - } - }; - - // Square 0 - const __m256 vd0 = vdistanceTo(vsquareCenterX, vsquareCenterY); - // Square 1 - const __m256i usePreviousSquare = _mm256_castps_si256(_mm256_cmp_ps(vsquareCenterX, vx, 30)); // IF x left of square center -> take previous square ELSE take next square. - const __m256i vndxX = _mm256_add_epi32(vsquareNdxX, _mm256_blendv_epi8(_mm256_set1_epi32(1), usePreviousSquare, usePreviousSquare)); // square index + or - 1 depending on above condition. - const __m256 mask1 = _mm256_castsi256_ps(_mm256_andnot_si256(_mm256_cmpgt_epi32(_mm256_setzero_si256(), vndxX), _mm256_cmpgt_epi32(vnrSquaresX, vndxX))); // square index not < 0 and < nr_squares. - const __m256 vd1 = _mm256_blendv_ps( // distance to new square center. Set to large value if x == old square center or new square index out of bounds. - _mm256_set1_ps(3e5f), - vdistanceTo(_mm256_blendv_ps(_mm256_add_ps(vsquareCenterX, vsquareSize), _mm256_sub_ps(vsquareCenterX, vsquareSize), _mm256_castsi256_ps(usePreviousSquare)), vsquareCenterY), - _mm256_andnot_ps(_mm256_cmp_ps(vx, vsquareCenterX, 0), mask1) - ); - // Square 2 - const __m256i useUpperSquare = _mm256_castps_si256(_mm256_cmp_ps(vsquareCenterY, vy, 30)); // IF y above square center -> take upper square ELSE take lower square. - const __m256i vndxY = _mm256_add_epi32(vsquareNdxY, _mm256_blendv_epi8(_mm256_set1_epi32(1), useUpperSquare, useUpperSquare)); - const __m256 mask2 = _mm256_castsi256_ps(_mm256_andnot_si256(_mm256_cmpgt_epi32(_mm256_setzero_si256(), vndxY), _mm256_cmpgt_epi32(vnrSquaresY, vndxY))); // Check bounds of new square index. - const __m256 vd2 = _mm256_blendv_ps( - _mm256_set1_ps(3e5f), - vdistanceTo(vsquareCenterX, _mm256_blendv_ps(_mm256_add_ps(vsquareCenterY, vsquareSize), _mm256_sub_ps(vsquareCenterY, vsquareSize), _mm256_castsi256_ps(useUpperSquare))), - _mm256_andnot_ps(_mm256_cmp_ps(vy, vsquareCenterY, 0), mask2) - ); - - const __m256 vw0 = _mm256_mul_ps(vd1, vd2); // (1/d0)/(1/d0+1/d1+1/d2) = d1d2/(d1d2+d0d2+d0d1) - const __m256 vw1 = _mm256_mul_ps(vd0, vd2); - const __m256 vw2 = _mm256_mul_ps(vd0, vd1); - const __m256 vdenom = _mm256_add_ps(_mm256_add_ps(vw0, vw1), vw2); - - if constexpr (ISRGB) - { - // Entropies of square0, square1, square2 - const auto [vr0, vg0, vb0] = vgetEntropy(vsquareNdxX, vsquareNdxY, _mm256_castsi256_ps(_mm256_set1_epi32(0xffffffff))); - const auto [vr1, vg1, vb1] = vgetEntropy(vndxX, vsquareNdxY, mask1); - const auto [vr2, vg2, vb2] = vgetEntropy(vsquareNdxX, vndxY, mask2); - redEntropy = _mm256_div_ps(_mm256_fmadd_ps(vw0, vr0, _mm256_fmadd_ps(vw1, vr1, _mm256_mul_ps(vw2, vr2))), vdenom); - greenEntropy = _mm256_div_ps(_mm256_fmadd_ps(vw0, vg0, _mm256_fmadd_ps(vw1, vg1, _mm256_mul_ps(vw2, vg2))), vdenom); - blueEntropy = _mm256_div_ps(_mm256_fmadd_ps(vw0, vb0, _mm256_fmadd_ps(vw1, vb1, _mm256_mul_ps(vw2, vb2))), vdenom); - } - else - { - // Entropies of square0, square1, square2 - const __m256 vr0 = vgetEntropy(vsquareNdxX, vsquareNdxY, _mm256_castsi256_ps(_mm256_set1_epi32(0xffffffff))); - const __m256 vr1 = vgetEntropy(vndxX, vsquareNdxY, mask1); - const __m256 vr2 = vgetEntropy(vsquareNdxX, vndxY, mask2); - redEntropy = _mm256_div_ps(_mm256_fmadd_ps(vw0, vr0, _mm256_fmadd_ps(vw1, vr1, _mm256_mul_ps(vw2, vr2))), vdenom); - } -/* - const auto getEntropies = [nrSquaresX = entropyData.entropyInfo.nrSquaresX(), - nrSquaresY = entropyData.entropyInfo.nrSquaresY(), - redSquareEntropies = entropyData.entropyInfo.redEntropyData(), - greenSquareEntropies = entropyData.entropyInfo.greenEntropyData(), - blueSquareEntropies = entropyData.entropyInfo.blueEntropyData()](const int x, const int y) -> std::tuple - { - return (x >= 0 && x < nrSquaresX && y >= 0 && y < nrSquaresY) - ? (constexpr (ISRGB) - ? std::make_tuple(redSquareEntropies[y * nrSquaresX + x], greenSquareEntropies[y * nrSquaresX + x], blueSquareEntropies[y * nrSquaresX + x]) - : std::make_tuple(redSquareEntropies[y * nrSquaresX + x], 0.0f, 0.0f)) - : std::make_tuple(-1.0f, -1.0f, -1.0f); - }; - - const int y = lineStart + row; - const int squareNdxY = y / squareSize; - for (int n = 0; n < 8; ++n) - { - const int x = counter * 8 + n; - const int squareNdxX = x / squareSize; - const int squareCenterX = squareNdxX * squareSize + windowSize; - const int squareCenterY = squareNdxY * squareSize + windowSize; - - const auto distanceTo = [x, y](const int centerX, const int centerY) -> float - { - const auto square = [](const int x) { return static_cast(x * x); }; - return sqrtf(square(x - centerX) + square(y - centerY)); - }; - - // Square 0 - const auto [re0, ge0, be0] = getEntropies(squareNdxX, squareNdxY); - const float d0 = distanceTo(squareCenterX, squareCenterY); - // Square 1 - int ndxX = x >= squareCenterX ? (squareNdxX + 1) : (squareNdxX - 1); - int ndxY = squareNdxY; - const auto [re1, ge1, be1] = getEntropies(ndxX, ndxY); - const float d1 = (re1 < 0.0f || x == squareCenterX) ? 1e5f : distanceTo(squareCenterX + (x >= squareCenterX ? squareSize : -squareSize), squareCenterY); - // Square 2 - ndxX = squareNdxX; - ndxY = y >= squareCenterY ? (squareNdxY + 1) : (squareNdxY - 1); - const auto [re2, ge2, be2] = getEntropies(ndxX, ndxY); - const float d2 = (re2 < 0.0f || y == squareCenterY) ? 1e5f : distanceTo(squareCenterX, squareCenterY + (y >= squareCenterY ? squareSize : -squareSize)); - - const float denom = d1 * d2 + d0 * (d1 + d2); - const float w0 = d1 * d2; - const float w1 = d0 * d2; - const float w2 = d0 * d1; - - redEntropy.m256_f32[n] = (w0 * re0 + w1 * re1 + w2 * re2) / denom; - if constexpr (ISRGB) - { - greenEntropy.m256_f32[n] = (w0 * ge0 + w1 * ge1 + w2 * ge2) / denom; - blueEntropy.m256_f32[n] = (w0 * be0 + w1 * be1 + w2 * be2) / denom; - } - } -*/ -/* - double dr, dg, db; - COLORREF16 crcol; - for (int n = 0; n < 8; ++n) - { - const_cast(entropyData.entropyInfo).GetPixel(xIndex.m256i_i32[n], lineStart + row, dr, dg, db, crcol); - if (fabsf(redEntropy.m256_f32[n] - static_cast(dr)) > 0.01f) - { - wchar_t s[256]; - swprintf_s(s, L"x/y=%d/%d, soll=%f, ist=%f", xIndex.m256i_i32[n], lineStart+row, static_cast(dr), redEntropy.m256_f32[n]); - MessageBox(0, s, L"", 0); - } - } -*/ -} +#include "stdafx.h" +#include "avx_support.h" +#include "dssrect.h" +#include "avx.h" +#include "PixelTransform.h" +#include "BilinearParameters.h" +#include "TaskInfo.h" +#include "BackgroundCalibration.h" +#include "avx_entropy.h" +#include "EntropyInfo.h" + +AvxStacking::AvxStacking(int lStart, int lEnd, CMemoryBitmap& inputbm, CMemoryBitmap& tempbm, const DSSRect& resultRect, AvxEntropy& entrdat) : + lineStart{ lStart }, lineEnd{ lEnd }, colEnd{ inputbm.Width() }, + width{ colEnd }, height{ lineEnd - lineStart }, + resultWidth{ resultRect.width() }, resultHeight{ resultRect.height() }, + xCoordinates(width >= 0 && height >= 0 ? AvxSupport::numberOfAvxVectors(width) * height : 0), + yCoordinates(width >= 0 && height >= 0 ? AvxSupport::numberOfAvxVectors(width) * height : 0), + redPixels(width >= 0 && height >= 0 ? AvxSupport::numberOfAvxVectors(width) * height : 0), + greenPixels{}, + bluePixels{}, + inputBitmap{ inputbm }, + tempBitmap{ tempbm }, + avxCfa{ lStart, lEnd, inputbm }, + entropyData{ entrdat } +{ + if (width < 0 || height < 0) + throw std::invalid_argument("End index smaller than start index for line or column of AvxStacking"); + + resizeColorVectors(AvxSupport::numberOfAvxVectors(width) * height); +} + +void AvxStacking::init(const int lStart, const int lEnd) +{ + if (!AvxSupport::checkSimdAvailability()) + return; + + lineStart = lStart; + lineEnd = lEnd; + height = lineEnd - lineStart; + const size_t nrVectors = AvxSupport::numberOfAvxVectors(width) * height; + xCoordinates.resize(nrVectors); + yCoordinates.resize(nrVectors); + redPixels.resize(nrVectors); + resizeColorVectors(nrVectors); +} + +void AvxStacking::resizeColorVectors(const size_t nrVectors) +{ + if (AvxSupport{ tempBitmap }.isColorBitmap()) + { + greenPixels.resize(nrVectors); + bluePixels.resize(nrVectors); + } + if (AvxSupport{ inputBitmap }.isMonochromeCfaBitmapOfType()) + { + avxCfa.init(lineStart, lineEnd); + } +} + +int AvxStacking::stack(const CPixelTransform& pixelTransformDef, const CTaskInfo& taskInfo, const CBackgroundCalibration& backgroundCalibrationDef, const int pixelSizeMultiplier) +{ + static_assert(sizeof(unsigned int) == sizeof(std::uint32_t)); + + if (!AvxSupport::checkSimdAvailability()) + return 1; + + int rval = 1; + if (doStack(pixelTransformDef, taskInfo, backgroundCalibrationDef, pixelSizeMultiplier) == 0 + || doStack(pixelTransformDef, taskInfo, backgroundCalibrationDef, pixelSizeMultiplier) == 0 + || doStack(pixelTransformDef, taskInfo, backgroundCalibrationDef, pixelSizeMultiplier) == 0) + { + rval = 0; + } + return AvxSupport::zeroUpper(rval); +} + +template +int AvxStacking::doStack(const CPixelTransform& pixelTransformDef, const CTaskInfo& taskInfo, const CBackgroundCalibration& backgroundCalibrationDef, const int pixelSizeMultiplier) +{ + if (pixelSizeMultiplier != 1 || pixelTransformDef.m_lPixelSizeMultiplier != 1) + return 1; + + // Check input bitmap. + const AvxSupport avxInputSupport{ inputBitmap }; + if (!avxInputSupport.isColorBitmapOfType() && !avxInputSupport.isMonochromeBitmapOfType()) + return 1; + + // Check output (temp) bitmap. + const AvxSupport avxTempSupport{ tempBitmap }; + if (!avxTempSupport.isColorBitmapOfType() && !avxTempSupport.isMonochromeBitmapOfType()) + return 1; + + if (avxInputSupport.isMonochromeCfaBitmapOfType() && avxCfa.interpolate(lineStart, lineEnd, pixelSizeMultiplier) != 0) + return 1; + if (pixelTransform(pixelTransformDef) != 0) + return 1; + if (backgroundCalibration(backgroundCalibrationDef) != 0) + return 1; + + // Pixel partitioning + // Has 4 things to distinguish: Color/Monochrome, Entropy yes/no + const bool isColor = avxTempSupport.isColorBitmap(); + if (taskInfo.m_Method == MBP_ENTROPYAVERAGE) + { + if (isColor && pixelPartitioning() != 0) + return 1; + if (!isColor && pixelPartitioning() != 0) + return 1; + } + else // No entropy average + { + if (isColor && pixelPartitioning() != 0) + return 1; + if (!isColor && pixelPartitioning() != 0) + return 1; + } + + return 0; +}; + +int AvxStacking::pixelTransform(const CPixelTransform& pixelTransformDef) +{ + const CBilinearParameters& bilinearParams = pixelTransformDef.m_BilinearParameters; + + // Number of vectors with 8 pixels each to process. + const size_t nrVectors = AvxSupport::numberOfAvxVectors(width); + const float fxShift = static_cast(pixelTransformDef.m_fXShift + (pixelTransformDef.m_bUseCometShift ? pixelTransformDef.m_fXCometShift : 0.0)); + const float fyShift = static_cast(pixelTransformDef.m_fYShift + (pixelTransformDef.m_bUseCometShift ? pixelTransformDef.m_fYCometShift : 0.0)); + const __m256 fxShiftVec = _mm256_set1_ps(fxShift); + + // Superfast version if no transformation required: indices = coordinates. + if (bilinearParams.Type == TT_BILINEAR && ( + bilinearParams.fXWidth == 1.0f && bilinearParams.fYWidth == 1.0f && + bilinearParams.a1 == 1.0f && bilinearParams.b2 == 1.0f && + bilinearParams.a0 == 0.0f && bilinearParams.a2 == 0.0f && bilinearParams.a3 == 0.0f && + bilinearParams.b0 == 0.0f && bilinearParams.b1 == 0.0f && bilinearParams.b3 == 0.0f + )) + { + for (int row = 0; row < height; ++row) + { + const __m256 fyShiftVec = _mm256_set1_ps(static_cast(lineStart + row) + fyShift); + __m256* pXLine = &xCoordinates.at(row * nrVectors); + __m256* pYLine = &yCoordinates.at(row * nrVectors); + __m256i xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + + for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) + { + const __m256 fxline = _mm256_cvtepi32_ps(xline); + xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); + _mm256_store_ps((float*)pXLine, _mm256_add_ps(fxline, fxShiftVec)); + _mm256_store_ps((float*)pYLine, fyShiftVec); + } + } + return 0; + } + + const float fa0 = static_cast(bilinearParams.a0); + const float fa1 = static_cast(bilinearParams.a1); + const float fa2 = static_cast(bilinearParams.a2); + const float fa3 = static_cast(bilinearParams.a3); + const float fb0 = static_cast(bilinearParams.b0); + const float fb1 = static_cast(bilinearParams.b1); + const float fb2 = static_cast(bilinearParams.b2); + const float fb3 = static_cast(bilinearParams.b3); + const __m256 xWidth = _mm256_set1_ps(static_cast(bilinearParams.fXWidth)); + const __m256 yWidth = _mm256_set1_ps(static_cast(bilinearParams.fYWidth)); + const __m256 a0 = _mm256_set1_ps(fa0); + const __m256 a1 = _mm256_set1_ps(fa1); + const __m256 a2 = _mm256_set1_ps(fa2); + const __m256 a3 = _mm256_set1_ps(fa3); + const __m256 b0 = _mm256_set1_ps(fb0); + const __m256 b1 = _mm256_set1_ps(fb1); + const __m256 b2 = _mm256_set1_ps(fb2); + const __m256 b3 = _mm256_set1_ps(fb3); + const __m256 fyShiftVec = _mm256_set1_ps(fyShift); + + const auto linearTransformX = [&a0, &a1, &a2, &a3](const __m256 x, const __m256 y, const __m256 xy) -> __m256 + { + return _mm256_fmadd_ps(a3, xy, _mm256_fmadd_ps(a2, y, _mm256_fmadd_ps(a1, x, a0))); // (((a0 + a1*x) + a2*y) + a3*x*y) + }; + const auto linearTransformY = [&b0, &b1, &b2, &b3](const __m256 x, const __m256 y, const __m256 xy) -> __m256 + { + return _mm256_fmadd_ps(b3, xy, _mm256_fmadd_ps(b2, y, _mm256_fmadd_ps(b1, x, b0))); // (((b0 + b1*x) + b2*y) + b3*x*y) + }; + + if (bilinearParams.Type == TT_BILINEAR) + { + for (int row = 0; row < height; ++row) + { + const float y = static_cast(lineStart + row) / static_cast(bilinearParams.fYWidth); + const __m256 vy = _mm256_set1_ps(y); + __m256* pXLine = &xCoordinates.at(row * nrVectors); + __m256* pYLine = &yCoordinates.at(row * nrVectors); + // Vector with x-indices of the current 8 pixels of the line. + __m256i xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + + for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) + { + const __m256 vx = _mm256_div_ps(_mm256_cvtepi32_ps(xline), xWidth); + // Indices of the next 8 pixels. + xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); + + const __m256 xy = _mm256_mul_ps(vx, vy); + // X- and y-coordinates for the bilinear transformation of the current 8 pixels. + const __m256 xr = linearTransformX(vx, vy, xy); + const __m256 yr = linearTransformY(vx, vy, xy); + + // Save result. + _mm256_store_ps((float*)pXLine, _mm256_fmadd_ps(xr, xWidth, fxShiftVec)); // xr * fxWidth + fxShift + _mm256_store_ps((float*)pYLine, _mm256_fmadd_ps(yr, yWidth, fyShiftVec)); // yr * fyWidth + fyShift + } + } + return 0; + } + + const float fa4 = static_cast(bilinearParams.a4); + const float fa5 = static_cast(bilinearParams.a5); + const float fa6 = static_cast(bilinearParams.a6); + const float fa7 = static_cast(bilinearParams.a7); + const float fa8 = static_cast(bilinearParams.a8); + const float fb4 = static_cast(bilinearParams.b4); + const float fb5 = static_cast(bilinearParams.b5); + const float fb6 = static_cast(bilinearParams.b6); + const float fb7 = static_cast(bilinearParams.b7); + const float fb8 = static_cast(bilinearParams.b8); + const __m256 a4 = _mm256_set1_ps(fa4); + const __m256 a5 = _mm256_set1_ps(fa5); + const __m256 a6 = _mm256_set1_ps(fa6); + const __m256 a7 = _mm256_set1_ps(fa7); + const __m256 a8 = _mm256_set1_ps(fa8); + const __m256 b4 = _mm256_set1_ps(fb4); + const __m256 b5 = _mm256_set1_ps(fb5); + const __m256 b6 = _mm256_set1_ps(fb6); + const __m256 b7 = _mm256_set1_ps(fb7); + const __m256 b8 = _mm256_set1_ps(fb8); + + const auto squaredTransformX = [&a4, &a5, &a6, &a7, &a8](const __m256 xLinear, const __m256 x2, const __m256 y2, const __m256 x2y, const __m256 xy2, const __m256 x2y2) -> __m256 + { + return _mm256_fmadd_ps(a8, x2y2, _mm256_fmadd_ps(a7, xy2, _mm256_fmadd_ps(a6, x2y, _mm256_fmadd_ps(a5, y2, _mm256_fmadd_ps(a4, x2, xLinear))))); // (((((xl + a4*x2) + a5*y2) + a6*x2y) + a7*xy2) + a8*x2y2) + }; + const auto squaredTransformY = [&b4, &b5, &b6, &b7, &b8](const __m256 yLinear, const __m256 x2, const __m256 y2, const __m256 x2y, const __m256 xy2, const __m256 x2y2) -> __m256 + { + return _mm256_fmadd_ps(b8, x2y2, _mm256_fmadd_ps(b7, xy2, _mm256_fmadd_ps(b6, x2y, _mm256_fmadd_ps(b5, y2, _mm256_fmadd_ps(b4, x2, yLinear))))); // (((((yl + b4*x2) + b5*y2) + b6*x2y) + b7*xy2) + b8*x2y2) + }; + + if (bilinearParams.Type == TT_BISQUARED) + { + + for (int row = 0; row < height; ++row) + { + const float y = static_cast(lineStart + row) / static_cast(bilinearParams.fYWidth); + const __m256 vy = _mm256_set1_ps(y); + __m256* pXLine = &xCoordinates.at(row * nrVectors); + __m256* pYLine = &yCoordinates.at(row * nrVectors); + __m256i xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + + for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) + { + const __m256 vx = _mm256_div_ps(_mm256_cvtepi32_ps(xline), xWidth); + xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); + + // Linear part + const __m256 xy = _mm256_mul_ps(vx, vy); + const __m256 rlx = linearTransformX(vx, vy, xy); + const __m256 rly = linearTransformY(vx, vy, xy); + + // Square parameters + const __m256 x2 = _mm256_mul_ps(vx, vx); + const __m256 y2 = _mm256_mul_ps(vy, vy); + const __m256 x2y = _mm256_mul_ps(x2, vy); + const __m256 xy2 = _mm256_mul_ps(vx, y2); + const __m256 x2y2 = _mm256_mul_ps(x2, y2); + + // The bisqared transformation. + const __m256 xr = squaredTransformX(rlx, x2, y2, x2y, xy2, x2y2); + const __m256 yr = squaredTransformY(rly, x2, y2, x2y, xy2, x2y2); + + _mm256_store_ps((float*)pXLine, _mm256_fmadd_ps(xr, xWidth, fxShiftVec)); + _mm256_store_ps((float*)pYLine, _mm256_fmadd_ps(yr, yWidth, fyShiftVec)); + } + } + return 0; + } + + const float fa9 = static_cast(bilinearParams.a9); + const float fa10 = static_cast(bilinearParams.a10); + const float fa11 = static_cast(bilinearParams.a11); + const float fa12 = static_cast(bilinearParams.a12); + const float fa13 = static_cast(bilinearParams.a13); + const float fa14 = static_cast(bilinearParams.a14); + const float fa15 = static_cast(bilinearParams.a15); + const float fb9 = static_cast(bilinearParams.b9); + const float fb10 = static_cast(bilinearParams.b10); + const float fb11 = static_cast(bilinearParams.b11); + const float fb12 = static_cast(bilinearParams.b12); + const float fb13 = static_cast(bilinearParams.b13); + const float fb14 = static_cast(bilinearParams.b14); + const float fb15 = static_cast(bilinearParams.b15); + const __m256 a9 = _mm256_set1_ps(fa9); + const __m256 a10 = _mm256_set1_ps(fa10); + const __m256 a11 = _mm256_set1_ps(fa11); + const __m256 a12 = _mm256_set1_ps(fa12); + const __m256 a13 = _mm256_set1_ps(fa13); + const __m256 a14 = _mm256_set1_ps(fa14); + const __m256 a15 = _mm256_set1_ps(fa15); + const __m256 b9 = _mm256_set1_ps(fb9); + const __m256 b10 = _mm256_set1_ps(fb10); + const __m256 b11 = _mm256_set1_ps(fb11); + const __m256 b12 = _mm256_set1_ps(fb12); + const __m256 b13 = _mm256_set1_ps(fb13); + const __m256 b14 = _mm256_set1_ps(fb14); + const __m256 b15 = _mm256_set1_ps(fb15); + + const auto cubicTransformX = [&a9, &a10, &a11, &a12, &a13, &a14, &a15]( + const __m256 xSquared, const __m256 x3, const __m256 y3, const __m256 x3y, const __m256 xy3, const __m256 x3y2, const __m256 x2y3, const __m256 x3y3) -> __m256 + { + // (((((squarePart + a9*x3) + a10*y3) + a11*x3y) + a12*xy3) + a13*x3y2) + a14*x2y3) + a15*x3y3) + return _mm256_fmadd_ps(a15, x3y3, _mm256_fmadd_ps(a14, x2y3, _mm256_fmadd_ps(a13, x3y2, _mm256_fmadd_ps(a12, xy3, _mm256_fmadd_ps(a11, x3y, _mm256_fmadd_ps(a10, y3, _mm256_fmadd_ps(a9, x3, xSquared))))))); + }; + const auto cubicTransformY = [&b9, &b10, &b11, &b12, &b13, &b14, &b15]( + const __m256 ySquared, const __m256 x3, const __m256 y3, const __m256 x3y, const __m256 xy3, const __m256 x3y2, const __m256 x2y3, const __m256 x3y3) -> __m256 + { + // (((((squarePart + b9*x3) + b10*y3) + b11*x3y) + b12*xy3) + b13*x3y2) + b14*x2y3) + b15*x3y3) + return _mm256_fmadd_ps(b15, x3y3, _mm256_fmadd_ps(b14, x2y3, _mm256_fmadd_ps(b13, x3y2, _mm256_fmadd_ps(b12, xy3, _mm256_fmadd_ps(b11, x3y, _mm256_fmadd_ps(b10, y3, _mm256_fmadd_ps(b9, x3, ySquared))))))); + }; + + if (bilinearParams.Type == TT_BICUBIC) + { + for (int row = 0; row < height; ++row) + { + const float y = static_cast(lineStart + row) / static_cast(bilinearParams.fYWidth); + const __m256 vy = _mm256_set1_ps(y); + __m256* pXLine = &xCoordinates.at(row * nrVectors); + __m256* pYLine = &yCoordinates.at(row * nrVectors); + __m256i xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + + // Do it in 2 steps, so that the loops get smaller, and the compiler can better keep data in CPU registers. + // (1) Linear and squared part. + // (2) Cubic part. + + for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) + { + const __m256 vx = _mm256_div_ps(_mm256_cvtepi32_ps(xline), xWidth); + xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); + + // Linear part + const __m256 xy = _mm256_mul_ps(vx, vy); + const __m256 rlx = linearTransformX(vx, vy, xy); + const __m256 rly = linearTransformY(vx, vy, xy); + + // Square part + const __m256 x2 = _mm256_mul_ps(vx, vx); + const __m256 y2 = _mm256_mul_ps(vy, vy); + const __m256 x2y = _mm256_mul_ps(x2, vy); + const __m256 xy2 = _mm256_mul_ps(vx, y2); + const __m256 x2y2 = _mm256_mul_ps(x2, y2); + const __m256 rsx = squaredTransformX(rlx, x2, y2, x2y, xy2, x2y2); + const __m256 rsy = squaredTransformY(rly, x2, y2, x2y, xy2, x2y2); + + _mm256_store_ps((float*)pXLine, rsx); + _mm256_store_ps((float*)pYLine, rsy); + } + + pXLine = &xCoordinates.at(row * nrVectors); + pYLine = &yCoordinates.at(row * nrVectors); + xline = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + + for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine) + { + const __m256 vx = _mm256_div_ps(_mm256_cvtepi32_ps(xline), xWidth); + xline = _mm256_add_epi32(xline, _mm256_set1_epi32(8)); + + const __m256 x2 = _mm256_mul_ps(vx, vx); + const __m256 y2 = _mm256_mul_ps(vy, vy); + + // Cubic parameters + const __m256 x3 = _mm256_mul_ps(x2, vx); + const __m256 y3 = _mm256_mul_ps(y2, vy); + const __m256 x3y = _mm256_mul_ps(x3, vy); + const __m256 xy3 = _mm256_mul_ps(vx, y3); + const __m256 x3y2 = _mm256_mul_ps(x3, y2); + const __m256 x2y3 = _mm256_mul_ps(x2, y3); + const __m256 x3y3 = _mm256_mul_ps(x3, y3); + + // Load the squared part (has been calculated in previous step). + const __m256 rsx = _mm256_load_ps((const float*)pXLine); + const __m256 rsy = _mm256_load_ps((const float*)pYLine); + + // The bicubic transformation + const __m256 xr = cubicTransformX(rsx, x3, y3, x3y, xy3, x3y2, x2y3, x3y3); + const __m256 yr = cubicTransformY(rsy, x3, y3, x3y, xy3, x3y2, x2y3, x3y3); + + _mm256_store_ps((float*)pXLine, _mm256_fmadd_ps(xr, xWidth, fxShiftVec)); + _mm256_store_ps((float*)pYLine, _mm256_fmadd_ps(yr, yWidth, fyShiftVec)); + } + } + return 0; + } + + return 1; +}; + +template +int AvxStacking::backgroundCalibLoop(const LoopFunction& loopFunc, const class AvxSupport& avxInputSupport, const InterpolParam& redParams, const InterpolParam& greenParams, const InterpolParam& blueParams) +{ + if (avxInputSupport.isColorBitmapOfType()) + { + const size_t w = static_cast(this->width); + const size_t startNdx = w * lineStart; + loopFunc(&avxInputSupport.redPixels().at(startNdx), w, redParams, redPixels); + loopFunc(&avxInputSupport.greenPixels().at(startNdx), w, greenParams, greenPixels); + loopFunc(&avxInputSupport.bluePixels().at(startNdx), w, blueParams, bluePixels); + return 0; + } + if constexpr (std::is_same::value) + { + if (avxInputSupport.isMonochromeCfaBitmapOfType()) + { + const size_t w = avxCfa.nrVectorsPerLine(); + loopFunc(avxCfa.redCfaBlock(), w, redParams, redPixels); + loopFunc(avxCfa.greenCfaBlock(), w, greenParams, greenPixels); + loopFunc(avxCfa.blueCfaBlock(), w, blueParams, bluePixels); + return 0; + } + } + if (avxInputSupport.isMonochromeBitmapOfType()) + { + const size_t w = static_cast(this->width); + const size_t startNdx = w * lineStart; + loopFunc(&avxInputSupport.grayPixels().at(startNdx), w, redParams, redPixels); + return 0; + } + return 1; +} + +inline float readColorValue(const std::uint16_t c) { return static_cast(c); } +inline float readColorValue(const std::uint32_t c) { return static_cast(c >> 16); } +inline float readColorValue(const float c) { return c; } + +template +int AvxStacking::backgroundCalibration(const CBackgroundCalibration& backgroundCalibrationDef) +{ + // We calculate vectors with 16 pixels each, so this is the number of vectors to process. + const int nrVectors = width / 16; + const AvxSupport avxInputSupport{ inputBitmap }; + + if (backgroundCalibrationDef.m_BackgroundCalibrationMode == BCM_NONE) + { + // Just copy color values as they are, pixel by pixel. + const auto loop = [this, nrVectors](const auto* const pPixels, const size_t nrElementsPerLine, const auto&, std::vector<__m256>& result) -> void + { + const size_t internalBufferNrVectors = AvxSupport::numberOfAvxVectors(this->width); + + for (int row = 0; row < this->height; ++row) + { + const T* pColor = reinterpret_cast(pPixels + row * nrElementsPerLine); + __m256* pResult = &result.at(row * internalBufferNrVectors); + for (int counter = 0; counter < nrVectors; ++counter, pColor += 16, pResult += 2) + { + const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); + _mm256_store_ps((float*)pResult, lo8); + _mm256_store_ps((float*)(pResult + 1), hi8); + } + // Remaining pixels of line + float* pRemaining = reinterpret_cast(pResult); + for (int n = nrVectors * 16; n < this->colEnd; ++n, ++pColor, ++pRemaining) + { + *pRemaining = readColorValue(*pColor); + } + } + }; + + return backgroundCalibLoop(loop, avxInputSupport, backgroundCalibrationDef.m_riRed, backgroundCalibrationDef.m_riGreen, backgroundCalibrationDef.m_riBlue); + } + else if (backgroundCalibrationDef.m_BackgroundInterpolation == BCI_RATIONAL) + { + const auto loop = [this, nrVectors](const auto* const pPixels, const size_t nrElementsPerLine, const auto& params, std::vector<__m256>& result) -> void + { + const __m256 a = _mm256_set1_ps(params.getParameterA()); + const __m256 b = _mm256_set1_ps(params.getParameterB()); + const __m256 c = _mm256_set1_ps(params.getParameterC()); + const __m256 fmin = _mm256_set1_ps(params.getParameterMin()); + const __m256 fmax = _mm256_set1_ps(params.getParameterMax()); + + const auto interpolate = [&a, &b, &c, &fmin, &fmax](const __m256 color) noexcept -> __m256 + { + const __m256 denom = _mm256_fmadd_ps(b, color, c); // b * color + c + const __m256 mask = _mm256_cmp_ps(denom, _mm256_setzero_ps(), 0); // cmp: denom==0 ? 1 : 0 + const __m256 xplusa = _mm256_add_ps(color, a); +// const __m256 division = _mm256_div_ps(xplusa, denom); + const __m256 division = _mm256_mul_ps(xplusa, _mm256_rcp_ps(denom)); // RCP is accurate enough. + // If denominator == 0 => use (x+a) else use (x+a)/denominator, then do the max and min. + return _mm256_max_ps(_mm256_min_ps(_mm256_blendv_ps(division, xplusa, mask), fmax), fmin); // blend: mask==1 ? b : a; + }; + + const size_t internalBufferNrVectors = AvxSupport::numberOfAvxVectors(this->width); + + for (int row = 0; row < this->height; ++row) + { + const T* pColor = reinterpret_cast(pPixels + row * nrElementsPerLine); + __m256* pResult = &result.at(row * internalBufferNrVectors); + for (int counter = 0; counter < nrVectors; ++counter, pColor += 16, pResult += 2) + { + const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); + _mm256_store_ps((float*)pResult, interpolate(lo8)); + _mm256_store_ps((float*)(pResult + 1), interpolate(hi8)); + } + // Remaining pixels of line + float* pRemaining = reinterpret_cast(pResult); + for (int n = nrVectors * 16; n < this->colEnd; ++n, ++pColor, ++pRemaining) + { + const float fcolor = readColorValue(*pColor); + const float denom = b.m256_f32[0] * fcolor + c.m256_f32[0]; + const float xplusa = fcolor + a.m256_f32[0]; + *pRemaining = std::max(std::min(denom == 0.0f ? xplusa : (xplusa / denom), fmax.m256_f32[0]), fmin.m256_f32[0]); + } + } + }; + + return backgroundCalibLoop(loop, avxInputSupport, backgroundCalibrationDef.m_riRed, backgroundCalibrationDef.m_riGreen, backgroundCalibrationDef.m_riBlue); + } + else // LINEAR + { + const auto loop = [this, nrVectors](const auto* const pPixels, const size_t nrElementsPerLine, const auto& params, std::vector<__m256>& result) -> void + { + const __m256 a0 = _mm256_set1_ps(params.getParameterA0()); + const __m256 a1 = _mm256_set1_ps(params.getParameterA1()); + const __m256 b0 = _mm256_set1_ps(params.getParameterB0()); + const __m256 b1 = _mm256_set1_ps(params.getParameterB1()); + const __m256 xm = _mm256_set1_ps(params.getParameterXm()); + + const auto interpolate = [a0, a1, b0, b1, xm](const __m256 x) noexcept -> __m256 + { + const __m256 mask = _mm256_cmp_ps(x, xm, 17); // cmp: x < xm ? 1 : 0 + // If x < xm => use a0 and b0, else use a1 and b1. + const __m256 aSelected = _mm256_blendv_ps(a1, a0, mask); // blend(arg1, arg2, mask): mask==1 ? arg2 : arg1; + const __m256 bSelected = _mm256_blendv_ps(b1, b0, mask); + return _mm256_fmadd_ps(x, aSelected, bSelected); // x * a + b + }; + + const size_t internalBufferNrVectors = AvxSupport::numberOfAvxVectors(this->width); + + for (int row = 0; row < this->height; ++row) + { + const T* pColor = reinterpret_cast(pPixels + row * nrElementsPerLine); + __m256* pResult = &result.at(row * internalBufferNrVectors); + for (int counter = 0; counter < nrVectors; ++counter, pColor += 16, pResult += 2) + { + const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); + _mm256_store_ps((float*)pResult, interpolate(lo8)); + _mm256_store_ps((float*)(pResult + 1), interpolate(hi8)); + } + // Remaining pixels of line + float* pRemaining = reinterpret_cast(pResult); + for (int n = nrVectors * 16; n < this->colEnd; ++n, ++pColor, ++pRemaining) + { + const float fcolor = readColorValue(*pColor); + *pRemaining = fcolor < xm.m256_f32[0] ? (fcolor * a0.m256_f32[0] + b0.m256_f32[0]) : (fcolor * a1.m256_f32[0] + b1.m256_f32[0]); + } + } + }; + + return backgroundCalibLoop(loop, avxInputSupport, backgroundCalibrationDef.m_liRed, backgroundCalibrationDef.m_liGreen, backgroundCalibrationDef.m_liBlue); + } +} + +#pragma warning( push ) +#pragma warning( disable : 4324 ) // Structure was padded +#pragma warning( disable : 4100 ) // Unreferenced variable + +template +int AvxStacking::pixelPartitioning() +{ + AvxSupport avxTempBitmap{ tempBitmap }; + // Check if we were called with the correct template argument. + if constexpr (ISRGB) { + if (!avxTempBitmap.isColorBitmapOfType()) + return 1; + } + else { + if (!avxTempBitmap.isMonochromeBitmapOfType()) + return 1; + } + + const size_t nrVectors = AvxSupport::numberOfAvxVectors(width); + const int outWidth = avxTempBitmap.width(); + if (outWidth <= 0) + return 1; + + // outWidth = width of the temp bitmap. + // resultWidth = width of the rect we want to write (in temp bitmap) + + // Non-vectorized accumulation for the case of 2 (or more) x-coordinates being identical. + // Vectorized version would be incorrect in that case. + const auto accumulateSingle = [](const __m256 newColor, const __m256i outNdx, const __m256i mask, auto* const pOutputBitmap) -> void + { + const auto conditionalAccumulate = [pOutputBitmap](const int m, const size_t ndx, const float color) -> void + { + if (m != 0) + pOutputBitmap[ndx] = AvxSupport::accumulateSingleColorValue(ndx, color, m, pOutputBitmap); + }; + + // This needs to be done pixel by pixel of the vector, because neighboring pixels have identical indices (due to prior pixel transform step). + __m128 color = _mm256_castps256_ps128(newColor); + conditionalAccumulate(_mm256_cvtsi256_si32(mask), _mm256_cvtsi256_si32(outNdx), AvxSupport::extractPs<0>(color)); + conditionalAccumulate(_mm256_extract_epi32(mask, 1), _mm256_extract_epi32(outNdx, 1), AvxSupport::extractPs<1>(color)); + conditionalAccumulate(_mm256_extract_epi32(mask, 2), _mm256_extract_epi32(outNdx, 2), AvxSupport::extractPs<2>(color)); + conditionalAccumulate(_mm256_extract_epi32(mask, 3), _mm256_extract_epi32(outNdx, 3), AvxSupport::extractPs<3>(color)); + color = _mm256_extractf128_ps(newColor, 1); + conditionalAccumulate(_mm256_extract_epi32(mask, 4), _mm256_extract_epi32(outNdx, 4), AvxSupport::extractPs<0>(color)); + conditionalAccumulate(_mm256_extract_epi32(mask, 5), _mm256_extract_epi32(outNdx, 5), AvxSupport::extractPs<1>(color)); + conditionalAccumulate(_mm256_extract_epi32(mask, 6), _mm256_extract_epi32(outNdx, 6), AvxSupport::extractPs<2>(color)); + conditionalAccumulate(_mm256_extract_epi32(mask, 7), _mm256_extract_epi32(outNdx, 7), AvxSupport::extractPs<3>(color)); + }; + + // Vectorized or non-vectorized accumulation + const auto accumulateAVX = [&](const __m256i outNdx, const __m256i mask, const __m256 colorValue, const __m256 fraction, auto* const pOutputBitmap, const bool twoNdxEqual, const bool fastLoadAndStore) -> void + { + if (twoNdxEqual) // If so, we cannot use AVX. + return accumulateSingle(_mm256_mul_ps(colorValue, fraction), outNdx, mask, pOutputBitmap); + + // Read from pOutputBitmap[outNdx[0:7]], and add (colorValue*fraction)[0:7] + const __m256 limitedColor = AvxSupport::accumulateColorValues(outNdx, colorValue, fraction, mask, pOutputBitmap, fastLoadAndStore); + AvxSupport::storeColorValue(outNdx, limitedColor, mask, pOutputBitmap, fastLoadAndStore); + }; + + + const __m256i resultWidthVec = _mm256_set1_epi32(this->resultWidth); + const __m256i resultHeightVec = _mm256_set1_epi32(this->resultHeight); + + const __m256i outWidthVec = _mm256_set1_epi32(outWidth); + const auto getColorPointer = [](const std::vector<__m256>& colorPixels, const size_t offset) -> const __m256* + { + if constexpr (ISRGB) + return colorPixels.data() + offset; + else + return nullptr; + }; + const auto getColorValue = [](const __m256* const pColor) -> __m256 + { + if constexpr (ISRGB) + return _mm256_load_ps((const float*)pColor); + else + return _mm256_undefined_ps(); + }; + + // ------------------------------- + // Entropy data + + float *pRedEntropyLayer, *pGreenEntropyLayer, *pBlueEntropyLayer; + if constexpr (ENTROPY) + { + AvxSupport avxEntropySupport{ *this->entropyData.pEntropyCoverage }; + if (ISRGB && !avxEntropySupport.isColorBitmapOfType()) + return 1; + if (!ISRGB && !avxEntropySupport.isMonochromeBitmapOfType()) + return 1; + if (this->entropyData.redEntropyLayer.empty()) // Something is wrong here! + return 1; + pRedEntropyLayer = reinterpret_cast(this->entropyData.redEntropyLayer.data()); + pGreenEntropyLayer = ISRGB ? reinterpret_cast(this->entropyData.greenEntropyLayer.data()) : nullptr; + pBlueEntropyLayer = ISRGB ? reinterpret_cast(this->entropyData.blueEntropyLayer.data()) : nullptr; + } + + const auto accumulateEntropyRGBorMono = [&](const __m256 r, const __m256 g, const __m256 b, const __m256 fraction, const __m256i outNdx, const __m256i mask, const bool twoNdxEqual, const bool fastLoadAndStore) -> void + { + if constexpr (!ENTROPY) + return; + + if constexpr (ISRGB) + { + accumulateAVX(outNdx, mask, r, fraction, pRedEntropyLayer, twoNdxEqual, fastLoadAndStore); + accumulateAVX(outNdx, mask, g, fraction, pGreenEntropyLayer, twoNdxEqual, fastLoadAndStore); + accumulateAVX(outNdx, mask, b, fraction, pBlueEntropyLayer, twoNdxEqual, fastLoadAndStore); + } + else + { + accumulateAVX(outNdx, mask, r, fraction, pRedEntropyLayer, twoNdxEqual, fastLoadAndStore); + } + }; + // ------------------------------- + + T* const pRedOut = ISRGB ? &*avxTempBitmap.redPixels().begin() : nullptr; + T* const pGreenOut = ISRGB ? &*avxTempBitmap.greenPixels().begin() : nullptr; + T* const pBlueOut = ISRGB ? &*avxTempBitmap.bluePixels().begin() : nullptr; + T* const pGrayOut = ISRGB ? nullptr : &*avxTempBitmap.grayPixels().begin(); + + const auto accumulateRGBorMono = [&](const __m256 r, const __m256 g, const __m256 b, const __m256 fraction, const __m256i outNdx, const __m256i mask, const bool twoNdxEqual, const bool fastLoadAndStore) -> void + { + if constexpr (ISRGB) + { + accumulateAVX(outNdx, mask, r, fraction, pRedOut, twoNdxEqual, fastLoadAndStore); + accumulateAVX(outNdx, mask, g, fraction, pGreenOut, twoNdxEqual, fastLoadAndStore); + accumulateAVX(outNdx, mask, b, fraction, pBlueOut, twoNdxEqual, fastLoadAndStore); + } + else + { + accumulateAVX(outNdx, mask, r, fraction, pGrayOut, twoNdxEqual, fastLoadAndStore); + } + }; + const auto fastAccumulateWordRGBorMono = [&](const __m256 color, const __m256 fraction1, const __m256 fraction2, std::uint16_t* const pOutput) -> void + { + const __m256i colorVector = _mm256_loadu_si256(reinterpret_cast(pOutput)); // vmovdqu ymm, m256 +// const __m256i colorVector = _mm256_lddqu_si256(reinterpret_cast(pOutput)); // vlddqu ymm, m256 + const __m256i f1 = _mm256_zextsi128_si256(AvxSupport::cvtPsEpu16(_mm256_mul_ps(fraction1, color))); // Upper 128 bits are zeroed. + const __m256i f2 = _mm256_zextsi128_si256(AvxSupport::cvtPsEpu16(_mm256_mul_ps(fraction2, color))); + const __m256i f2ShiftedLeft = AvxSupport::shiftLeftEpi8<2>(f2); + const __m256i colorPlusFraction1 = _mm256_adds_epu16(colorVector, f1); + const __m256i colorPlusBothFractions = _mm256_adds_epu16(colorPlusFraction1, f2ShiftedLeft); + _mm256_storeu_si256(reinterpret_cast<__m256i*>(pOutput), colorPlusBothFractions); + }; + + const auto getColumnOrRowMask = [](const __m256i coord, const __m256i resultWidthOrHeight) -> __m256i + { + return _mm256_andnot_si256(_mm256_cmpgt_epi32(_mm256_setzero_si256(), coord), _mm256_cmpgt_epi32(resultWidthOrHeight, coord)); // !(0 > x) and (width > x) == (x >= 0) and (x < width). Same for y with height. + }; + + // Lambda for this check: DSSRect{ 0, 0, m_rcResult.width(), m_rcResult.height() }.contains(ptOut) + const auto resultRectCheck = [](const __m256i coordTrunc, const __m256i resultWidthOrHeight, const __m256 coord) -> __m256i + { + // (pt.x >= 0) && (pt.x <= width-1) is equivalent to !(0 > floor(pt.x)) && (width > ceil(pt.x)) + return _mm256_andnot_si256(_mm256_cmpgt_epi32(_mm256_setzero_si256(), coordTrunc), _mm256_cmpgt_epi32(resultWidthOrHeight, _mm256_cvttps_epi32(_mm256_ceil_ps(coord)))); + }; + + // Accumulates with fraction1 for (x, y) and fraction2 for (x+1, y) + const __m256i allOnes = _mm256_set1_epi32(-1); // All bits '1' == all int elements -1 + const auto accumulateTwoFractions = [&, allOnes](const __m256 red, const __m256 green, const __m256 blue, const __m256 fraction1, const __m256 fraction2, const __m256i outIndex, + __m256i mask1, const __m256i mask2, const bool twoNdxEqual, const bool allNdxValid1, const bool allNdxValid2) -> void + { + if constexpr (std::is_same::value) + { + if (allNdxValid1 && allNdxValid2) + { + const size_t startNdx = _mm256_cvtsi256_si32(outIndex); // outIndex[0] + if constexpr (ISRGB) + { + fastAccumulateWordRGBorMono(red, fraction1, fraction2, pRedOut + startNdx); + fastAccumulateWordRGBorMono(green, fraction1, fraction2, pGreenOut + startNdx); + fastAccumulateWordRGBorMono(blue, fraction1, fraction2, pBlueOut + startNdx); + } + else + fastAccumulateWordRGBorMono(red, fraction1, fraction2, pGrayOut + startNdx); + + return; + } + } + + accumulateRGBorMono(red, green, blue, fraction1, outIndex, mask1, twoNdxEqual, allNdxValid1); // x, y, fraction1 + accumulateRGBorMono(red, green, blue, fraction2, _mm256_sub_epi32(outIndex, allOnes), mask2, twoNdxEqual, allNdxValid2); // x+1, y, fraction2 + }; + + for (int row = 0; row < height; ++row) + { + const size_t offset = row * nrVectors; + const __m256* pXLine = &*xCoordinates.begin() + offset; + const __m256* pYLine = &*yCoordinates.begin() + offset; + const __m256* pRed = &*redPixels.begin() + offset; + const __m256* pGreen = getColorPointer(greenPixels, offset); + const __m256* pBlue = getColorPointer(bluePixels, offset); + __m256i vIndex; + if constexpr (ENTROPY) + vIndex = _mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7); + + for (size_t counter = 0; counter < nrVectors; ++counter, ++pXLine, ++pYLine, ++pRed) + { + const __m256 xcoord = _mm256_load_ps((const float*)pXLine); + const __m256 ycoord = _mm256_load_ps((const float*)pYLine); + const __m256 xtruncated = _mm256_floor_ps(xcoord); // trunc(coordinate) + const __m256 ytruncated = _mm256_floor_ps(ycoord); + const __m256 xfractional = _mm256_sub_ps(xcoord, xtruncated); // fractional_part(coordinate) + const __m256 yfractional = _mm256_sub_ps(ycoord, ytruncated); + const __m256 xfrac1 = _mm256_sub_ps(_mm256_set1_ps(1.0f), xfractional); // 1 - fractional_part + const __m256 yfrac1 = _mm256_sub_ps(_mm256_set1_ps(1.0f), yfractional); + + const __m256 red = _mm256_load_ps((const float*)pRed); + const __m256 green = getColorValue(pGreen); + const __m256 blue = getColorValue(pBlue); + + // Different pixels of the vector can have different number of fractions. So we always need to consider all 4 fractions. + // Note: We have to process the 4 fractions one by one, because the same pixels can be involved. Otherwise accumulation would be wrong. + + // 1.Fraction at (xtruncated, ytruncated) + // 2.Fraction at (xtruncated+1, ytruncated) + __m256 fraction1 = _mm256_mul_ps(xfrac1, yfrac1); + __m256 fraction2 = _mm256_mul_ps(xfractional, yfrac1); + const __m256i xii = _mm256_cvttps_epi32(xtruncated); + const __m256i yii = _mm256_cvttps_epi32(ytruncated); + + // DSSRect{ 0, 0, m_rcResult.width(), m_rcResult.height() }.contains(ptOut); + const auto resultRectMask = _mm256_and_si256( + resultRectCheck(xii, resultWidthVec, xcoord), // x-coord check against width + resultRectCheck(yii, resultHeightVec, ycoord) // y-coord check against height + ); + + const __m256i columnMask1 = getColumnOrRowMask(xii, resultWidthVec); + const __m256i columnMask2 = getColumnOrRowMask(_mm256_sub_epi32(xii, allOnes), resultWidthVec); + __m256i rowMask = getColumnOrRowMask(yii, resultHeightVec); + __m256i outIndex = _mm256_add_epi32(_mm256_mullo_epi32(outWidthVec, yii), xii); + + // Check if two adjacent indices are equal: Subtract the x-coordinates horizontally and check if any of the results equals zero. If so -> adjacent x-coordinates are equal. + // (a & b) == 0 -> ZF=1, (~a & b) == 0 -> CF=1; testc: return CF; testz: return ZF; testnzc: IF (ZF == 0 && CF == 0) return 1; + const __m256i indexDiff = _mm256_sub_epi32(outIndex, _mm256_permutevar8x32_epi32(outIndex, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 0))); // -1 where ndx[i+1] == 1 + ndx[i] + const bool allNdxEquidistant = (1 == _mm256_testc_si256(indexDiff, _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, 0))); // 'testc' returns 1 if all bits are '1' -> 0xffffffff == -1 -> ndx[i] - ndx[i+1] == -1 + const bool twoNdxEqual = (0 == _mm256_testz_si256(_mm256_cmpeq_epi32(_mm256_setzero_si256(), indexDiff), _mm256_setr_epi32(-1, -1, -1, -1, -1, -1, -1, 0))); + + rowMask = _mm256_and_si256(rowMask, resultRectMask); + __m256i mask1 = _mm256_and_si256(columnMask1, rowMask); + __m256i mask2 = _mm256_and_si256(columnMask2, rowMask); + bool allNdxValid1 = allNdxEquidistant && (1 == _mm256_testc_si256(mask1, allOnes)); + bool allNdxValid2 = allNdxEquidistant && (1 == _mm256_testc_si256(mask2, allOnes)); + + accumulateTwoFractions(red, green, blue, fraction1, fraction2, outIndex, mask1, mask2, twoNdxEqual, allNdxValid1, allNdxValid2); // (x, y), (x+1, y) + __m256 redEntropy, greenEntropy, blueEntropy; + if constexpr (ENTROPY) + { + getAvxEntropy(redEntropy, greenEntropy, blueEntropy, vIndex, row); + vIndex = _mm256_add_epi32(vIndex, _mm256_set1_epi32(8)); + accumulateEntropyRGBorMono(redEntropy, greenEntropy, blueEntropy, fraction1, outIndex, mask1, twoNdxEqual, allNdxValid1); + accumulateEntropyRGBorMono(redEntropy, greenEntropy, blueEntropy, fraction2, _mm256_sub_epi32(outIndex, allOnes), mask2, twoNdxEqual, allNdxValid2); + } + + // 3.Fraction at (xtruncated, ytruncated+1) + // 4.Fraction at (xtruncated+1, ytruncated+1) + fraction1 = _mm256_mul_ps(xfrac1, yfractional); + fraction2 = _mm256_mul_ps(xfractional, yfractional); + rowMask = getColumnOrRowMask(_mm256_sub_epi32(yii, allOnes), resultHeightVec); + rowMask = _mm256_and_si256(rowMask, resultRectMask); + mask1 = _mm256_and_si256(columnMask1, rowMask); + mask2 = _mm256_and_si256(columnMask2, rowMask); + allNdxValid1 = allNdxEquidistant && (1 == _mm256_testc_si256(mask1, allOnes)); + allNdxValid2 = allNdxEquidistant && (1 == _mm256_testc_si256(mask2, allOnes)); + outIndex = _mm256_add_epi32(outIndex, outWidthVec); + + accumulateTwoFractions(red, green, blue, fraction1, fraction2, outIndex, mask1, mask2, twoNdxEqual, allNdxValid1, allNdxValid2); // (x, y+1), (x+1, y+1) + if constexpr (ENTROPY) + { + accumulateEntropyRGBorMono(redEntropy, greenEntropy, blueEntropy, fraction1, outIndex, mask1, twoNdxEqual, allNdxValid1); + accumulateEntropyRGBorMono(redEntropy, greenEntropy, blueEntropy, fraction2, _mm256_sub_epi32(outIndex, allOnes), mask2, twoNdxEqual, allNdxValid2); + } + + if constexpr (ISRGB) + { + ++pGreen; + ++pBlue; + } + } + } + + return 0; +} + +#pragma warning( pop ) + +template +inline void AvxStacking::getAvxEntropy(__m256& redEntropy, __m256& greenEntropy, __m256& blueEntropy, const __m256i xIndex, const int row) +{ + const int windowSize = entropyData.entropyInfo.windowSize(); + const int squareSize = 2 * windowSize + 1; + + const __m256 vx = _mm256_cvtepi32_ps(xIndex); + const __m256 vy = _mm256_set1_ps(static_cast(lineStart + row)); + const __m256 vsquareSize = _mm256_set1_ps(static_cast(squareSize)); + const __m256i vsquareNdxY = _mm256_set1_epi32((lineStart + row) / squareSize); + const __m256 xndx = _mm256_floor_ps(_mm256_div_ps(vx, vsquareSize)); + const __m256i vsquareNdxX = _mm256_cvttps_epi32(xndx); + const __m256 vsquareCenterX = _mm256_fmadd_ps(xndx, vsquareSize, _mm256_set1_ps(static_cast(windowSize))); //_mm256_add_epi32(_mm256_mullo_epi32(vsquareNdxX, vsquareSize), _mm256_set1_epi32(windowSize)); + const __m256 vsquareCenterY = _mm256_fmadd_ps(_mm256_cvtepi32_ps(vsquareNdxY), vsquareSize, _mm256_set1_ps(static_cast(windowSize))); //_mm256_add_epi32(_mm256_mullo_epi32(vsquareNdxY, vsquareSize), _mm256_set1_epi32(windowSize)); + const __m256i vnrSquaresX = _mm256_set1_epi32(entropyData.entropyInfo.nrSquaresX()); + const __m256i vnrSquaresY = _mm256_set1_epi32(entropyData.entropyInfo.nrSquaresY()); + + const auto vdistanceTo = [&vx, &vy](const __m256 vcenterX, const __m256 vcenterY) -> __m256 + { + const __m256 x = _mm256_sub_ps(vx, vcenterX); + const __m256 y = _mm256_sub_ps(vy, vcenterY); + return _mm256_sqrt_ps(_mm256_fmadd_ps(y, y, _mm256_mul_ps(x, x))); +// return _mm256_hypot_ps(_mm256_cvtepi32_ps(_mm256_sub_epi32(vx, vcenterX)), _mm256_cvtepi32_ps(_mm256_sub_epi32(vy, vcenterY))); + }; + + const auto vgetEntropy = [&vnrSquaresX, + pRedEntropy = entropyData.entropyInfo.redEntropyData(), + pGreenEntropy = entropyData.entropyInfo.greenEntropyData(), + pBlueEntropy = entropyData.entropyInfo.blueEntropyData()](const __m256i x, const __m256i y, const __m256 mask) + { + const __m256i index = _mm256_add_epi32(_mm256_mullo_epi32(y, vnrSquaresX), x); + if constexpr (ISRGB) + { + return std::make_tuple( + _mm256_mask_i32gather_ps(mask, pRedEntropy, index, mask, 4), // where mask==0 -> gather returns mask, i.e. it returns zero. + _mm256_mask_i32gather_ps(mask, pGreenEntropy, index, mask, 4), + _mm256_mask_i32gather_ps(mask, pBlueEntropy, index, mask, 4) + ); + } + else + { + return _mm256_mask_i32gather_ps(mask, pRedEntropy, index, mask, 4); + } + }; + + // Square 0 + const __m256 vd0 = vdistanceTo(vsquareCenterX, vsquareCenterY); + // Square 1 + const __m256i usePreviousSquare = _mm256_castps_si256(_mm256_cmp_ps(vsquareCenterX, vx, 30)); // IF x left of square center -> take previous square ELSE take next square. + const __m256i vndxX = _mm256_add_epi32(vsquareNdxX, _mm256_blendv_epi8(_mm256_set1_epi32(1), usePreviousSquare, usePreviousSquare)); // square index + or - 1 depending on above condition. + const __m256 mask1 = _mm256_castsi256_ps(_mm256_andnot_si256(_mm256_cmpgt_epi32(_mm256_setzero_si256(), vndxX), _mm256_cmpgt_epi32(vnrSquaresX, vndxX))); // square index not < 0 and < nr_squares. + const __m256 vd1 = _mm256_blendv_ps( // distance to new square center. Set to large value if x == old square center or new square index out of bounds. + _mm256_set1_ps(3e5f), + vdistanceTo(_mm256_blendv_ps(_mm256_add_ps(vsquareCenterX, vsquareSize), _mm256_sub_ps(vsquareCenterX, vsquareSize), _mm256_castsi256_ps(usePreviousSquare)), vsquareCenterY), + _mm256_andnot_ps(_mm256_cmp_ps(vx, vsquareCenterX, 0), mask1) + ); + // Square 2 + const __m256i useUpperSquare = _mm256_castps_si256(_mm256_cmp_ps(vsquareCenterY, vy, 30)); // IF y above square center -> take upper square ELSE take lower square. + const __m256i vndxY = _mm256_add_epi32(vsquareNdxY, _mm256_blendv_epi8(_mm256_set1_epi32(1), useUpperSquare, useUpperSquare)); + const __m256 mask2 = _mm256_castsi256_ps(_mm256_andnot_si256(_mm256_cmpgt_epi32(_mm256_setzero_si256(), vndxY), _mm256_cmpgt_epi32(vnrSquaresY, vndxY))); // Check bounds of new square index. + const __m256 vd2 = _mm256_blendv_ps( + _mm256_set1_ps(3e5f), + vdistanceTo(vsquareCenterX, _mm256_blendv_ps(_mm256_add_ps(vsquareCenterY, vsquareSize), _mm256_sub_ps(vsquareCenterY, vsquareSize), _mm256_castsi256_ps(useUpperSquare))), + _mm256_andnot_ps(_mm256_cmp_ps(vy, vsquareCenterY, 0), mask2) + ); + + const __m256 vw0 = _mm256_mul_ps(vd1, vd2); // (1/d0)/(1/d0+1/d1+1/d2) = d1d2/(d1d2+d0d2+d0d1) + const __m256 vw1 = _mm256_mul_ps(vd0, vd2); + const __m256 vw2 = _mm256_mul_ps(vd0, vd1); + const __m256 vdenom = _mm256_add_ps(_mm256_add_ps(vw0, vw1), vw2); + + if constexpr (ISRGB) + { + // Entropies of square0, square1, square2 + const auto [vr0, vg0, vb0] = vgetEntropy(vsquareNdxX, vsquareNdxY, _mm256_castsi256_ps(_mm256_set1_epi32(0xffffffff))); + const auto [vr1, vg1, vb1] = vgetEntropy(vndxX, vsquareNdxY, mask1); + const auto [vr2, vg2, vb2] = vgetEntropy(vsquareNdxX, vndxY, mask2); + redEntropy = _mm256_div_ps(_mm256_fmadd_ps(vw0, vr0, _mm256_fmadd_ps(vw1, vr1, _mm256_mul_ps(vw2, vr2))), vdenom); + greenEntropy = _mm256_div_ps(_mm256_fmadd_ps(vw0, vg0, _mm256_fmadd_ps(vw1, vg1, _mm256_mul_ps(vw2, vg2))), vdenom); + blueEntropy = _mm256_div_ps(_mm256_fmadd_ps(vw0, vb0, _mm256_fmadd_ps(vw1, vb1, _mm256_mul_ps(vw2, vb2))), vdenom); + } + else + { + // Entropies of square0, square1, square2 + const __m256 vr0 = vgetEntropy(vsquareNdxX, vsquareNdxY, _mm256_castsi256_ps(_mm256_set1_epi32(0xffffffff))); + const __m256 vr1 = vgetEntropy(vndxX, vsquareNdxY, mask1); + const __m256 vr2 = vgetEntropy(vsquareNdxX, vndxY, mask2); + redEntropy = _mm256_div_ps(_mm256_fmadd_ps(vw0, vr0, _mm256_fmadd_ps(vw1, vr1, _mm256_mul_ps(vw2, vr2))), vdenom); + } +/* + const auto getEntropies = [nrSquaresX = entropyData.entropyInfo.nrSquaresX(), + nrSquaresY = entropyData.entropyInfo.nrSquaresY(), + redSquareEntropies = entropyData.entropyInfo.redEntropyData(), + greenSquareEntropies = entropyData.entropyInfo.greenEntropyData(), + blueSquareEntropies = entropyData.entropyInfo.blueEntropyData()](const int x, const int y) -> std::tuple + { + return (x >= 0 && x < nrSquaresX && y >= 0 && y < nrSquaresY) + ? (constexpr (ISRGB) + ? std::make_tuple(redSquareEntropies[y * nrSquaresX + x], greenSquareEntropies[y * nrSquaresX + x], blueSquareEntropies[y * nrSquaresX + x]) + : std::make_tuple(redSquareEntropies[y * nrSquaresX + x], 0.0f, 0.0f)) + : std::make_tuple(-1.0f, -1.0f, -1.0f); + }; + + const int y = lineStart + row; + const int squareNdxY = y / squareSize; + for (int n = 0; n < 8; ++n) + { + const int x = counter * 8 + n; + const int squareNdxX = x / squareSize; + const int squareCenterX = squareNdxX * squareSize + windowSize; + const int squareCenterY = squareNdxY * squareSize + windowSize; + + const auto distanceTo = [x, y](const int centerX, const int centerY) -> float + { + const auto square = [](const int x) { return static_cast(x * x); }; + return sqrtf(square(x - centerX) + square(y - centerY)); + }; + + // Square 0 + const auto [re0, ge0, be0] = getEntropies(squareNdxX, squareNdxY); + const float d0 = distanceTo(squareCenterX, squareCenterY); + // Square 1 + int ndxX = x >= squareCenterX ? (squareNdxX + 1) : (squareNdxX - 1); + int ndxY = squareNdxY; + const auto [re1, ge1, be1] = getEntropies(ndxX, ndxY); + const float d1 = (re1 < 0.0f || x == squareCenterX) ? 1e5f : distanceTo(squareCenterX + (x >= squareCenterX ? squareSize : -squareSize), squareCenterY); + // Square 2 + ndxX = squareNdxX; + ndxY = y >= squareCenterY ? (squareNdxY + 1) : (squareNdxY - 1); + const auto [re2, ge2, be2] = getEntropies(ndxX, ndxY); + const float d2 = (re2 < 0.0f || y == squareCenterY) ? 1e5f : distanceTo(squareCenterX, squareCenterY + (y >= squareCenterY ? squareSize : -squareSize)); + + const float denom = d1 * d2 + d0 * (d1 + d2); + const float w0 = d1 * d2; + const float w1 = d0 * d2; + const float w2 = d0 * d1; + + redEntropy.m256_f32[n] = (w0 * re0 + w1 * re1 + w2 * re2) / denom; + if constexpr (ISRGB) + { + greenEntropy.m256_f32[n] = (w0 * ge0 + w1 * ge1 + w2 * ge2) / denom; + blueEntropy.m256_f32[n] = (w0 * be0 + w1 * be1 + w2 * be2) / denom; + } + } +*/ +/* + double dr, dg, db; + COLORREF16 crcol; + for (int n = 0; n < 8; ++n) + { + const_cast(entropyData.entropyInfo).GetPixel(xIndex.m256i_i32[n], lineStart + row, dr, dg, db, crcol); + if (fabsf(redEntropy.m256_f32[n] - static_cast(dr)) > 0.01f) + { + wchar_t s[256]; + swprintf_s(s, L"x/y=%d/%d, soll=%f, ist=%f", xIndex.m256i_i32[n], lineStart+row, static_cast(dr), redEntropy.m256_f32[n]); + MessageBox(0, s, L"", 0); + } + } +*/ +} diff --git a/DeepSkyStacker/avx.h b/DeepSkyStackerKernel/avx.h similarity index 97% rename from DeepSkyStacker/avx.h rename to DeepSkyStackerKernel/avx.h index cb58867a..e856fbe1 100644 --- a/DeepSkyStacker/avx.h +++ b/DeepSkyStackerKernel/avx.h @@ -1,52 +1,52 @@ -#pragma once -#include "avx_cfa.h" - -class AvxEntropy; -class CPixelTransform; -class CTaskInfo; -class CBackgroundCalibration; -class AvxStacking -{ -private: - int lineStart, lineEnd, colEnd; - int width, height; - int resultWidth, resultHeight; - std::vector<__m256> xCoordinates; - std::vector<__m256> yCoordinates; - std::vector<__m256> redPixels; - std::vector<__m256> greenPixels; - std::vector<__m256> bluePixels; - CMemoryBitmap& inputBitmap; - CMemoryBitmap& tempBitmap; - AvxCfaProcessing avxCfa; - AvxEntropy& entropyData; -public: - AvxStacking() = delete; - AvxStacking(int lStart, int lEnd, CMemoryBitmap& inputbm, CMemoryBitmap& tempbm, const DSSRect& resultRect, AvxEntropy& entrdat); - AvxStacking(const AvxStacking&) = default; - AvxStacking(AvxStacking&&) = delete; - AvxStacking& operator=(const AvxStacking&) = delete; - - void init(const int lStart, const int lEnd); - - int stack(const CPixelTransform& pixelTransformDef, const CTaskInfo& taskInfo, const CBackgroundCalibration& backgroundCalibrationDef, const int pixelSizeMultiplier); -private: - void resizeColorVectors(const size_t nrVectors); - - template - int doStack(const CPixelTransform& pixelTransformDef, const CTaskInfo& taskInfo, const CBackgroundCalibration& backgroundCalibrationDef, const int pixelSizeMultiplier); - - int pixelTransform(const CPixelTransform& pixelTransformDef); - - template - int backgroundCalibLoop(const LoopFunction& loopFunc, const class AvxSupport& avxSupport, const InterpolParam& redParams, const InterpolParam& greenParams, const InterpolParam& blueParams); - - template - int backgroundCalibration(const CBackgroundCalibration& backgroundCalibrationDef); - - template - int pixelPartitioning(); - - template - void getAvxEntropy(__m256& redEntropy, __m256& greenEntropy, __m256& blueEntropy, const __m256i xIndex, const int row); -}; +#pragma once +#include "avx_cfa.h" + +class AvxEntropy; +class CPixelTransform; +class CTaskInfo; +class CBackgroundCalibration; +class AvxStacking +{ +private: + int lineStart, lineEnd, colEnd; + int width, height; + int resultWidth, resultHeight; + std::vector<__m256> xCoordinates; + std::vector<__m256> yCoordinates; + std::vector<__m256> redPixels; + std::vector<__m256> greenPixels; + std::vector<__m256> bluePixels; + CMemoryBitmap& inputBitmap; + CMemoryBitmap& tempBitmap; + AvxCfaProcessing avxCfa; + AvxEntropy& entropyData; +public: + AvxStacking() = delete; + AvxStacking(int lStart, int lEnd, CMemoryBitmap& inputbm, CMemoryBitmap& tempbm, const DSSRect& resultRect, AvxEntropy& entrdat); + AvxStacking(const AvxStacking&) = default; + AvxStacking(AvxStacking&&) = delete; + AvxStacking& operator=(const AvxStacking&) = delete; + + void init(const int lStart, const int lEnd); + + int stack(const CPixelTransform& pixelTransformDef, const CTaskInfo& taskInfo, const CBackgroundCalibration& backgroundCalibrationDef, const int pixelSizeMultiplier); +private: + void resizeColorVectors(const size_t nrVectors); + + template + int doStack(const CPixelTransform& pixelTransformDef, const CTaskInfo& taskInfo, const CBackgroundCalibration& backgroundCalibrationDef, const int pixelSizeMultiplier); + + int pixelTransform(const CPixelTransform& pixelTransformDef); + + template + int backgroundCalibLoop(const LoopFunction& loopFunc, const class AvxSupport& avxSupport, const InterpolParam& redParams, const InterpolParam& greenParams, const InterpolParam& blueParams); + + template + int backgroundCalibration(const CBackgroundCalibration& backgroundCalibrationDef); + + template + int pixelPartitioning(); + + template + void getAvxEntropy(__m256& redEntropy, __m256& greenEntropy, __m256& blueEntropy, const __m256i xIndex, const int row); +}; diff --git a/DeepSkyStacker/avx_avg.cpp b/DeepSkyStackerKernel/avx_avg.cpp similarity index 97% rename from DeepSkyStacker/avx_avg.cpp rename to DeepSkyStackerKernel/avx_avg.cpp index add616f7..1de3c3f1 100644 --- a/DeepSkyStacker/avx_avg.cpp +++ b/DeepSkyStackerKernel/avx_avg.cpp @@ -1,296 +1,296 @@ -#include "stdafx.h" -#include "avx_avg.h" -#include "dssrect.h" -#include "avx_support.h" -#include "TaskInfo.h" -#include "Ztrace.h" - -AvxAccumulation::AvxAccumulation(const DSSRect& resultRect, const CTaskInfo& tInfo, CMemoryBitmap& tempbm, CMemoryBitmap& outbm, AvxEntropy& entroinfo) noexcept : - resultWidth{ resultRect.width() }, resultHeight{ resultRect.height() }, - tempBitmap{ tempbm }, - outputBitmap{ outbm }, - taskInfo{ tInfo }, - avxEntropy{ entroinfo } -{} - -// ********************************************************************************************* -// Sept. 2020: Only works for output bitmaps of type float (which is currently always the case). -// There is a static type check below. -// ********************************************************************************************* - -int AvxAccumulation::accumulate(const int nrStackedBitmaps) -{ - if (!AvxSupport::checkSimdAvailability()) - return 1; - - int rval = 1; - if (doAccumulate(nrStackedBitmaps) == 0 - || doAccumulate(nrStackedBitmaps) == 0 - || doAccumulate(nrStackedBitmaps) == 0) - { - rval = 0; - } - return AvxSupport::zeroUpper(rval); -} - -template -int AvxAccumulation::doAccumulate(const int nrStackedBitmaps) -{ - // Output bitmap is always float - if constexpr (!std::is_same::value) - return 1; - - const AvxSupport avxTempBitmap{ tempBitmap }; - - if (!avxTempBitmap.bitmapHasCorrectType()) - return 1; - if (!AvxSupport{outputBitmap}.bitmapHasCorrectType()) - return 1; - - ZFUNCTRACE_RUNTIME(); - - constexpr size_t vectorLen = 16; - const int nrVectors = resultWidth / vectorLen; - - if (taskInfo.m_Method == MBP_FASTAVERAGE) - { - const __m256 nrStacked = _mm256_set1_ps(static_cast(nrStackedBitmaps)); - const __m256 nrStacked1 = _mm256_set1_ps(static_cast(nrStackedBitmaps + 1)); - - const auto accumulate = [&nrStacked, &nrStacked1](const T_IN* pIn, T_OUT* pOut) -> void - { - const auto [newColorLo8, newColorHi8] = AvxSupport::read16PackedSingle(pIn); - const auto [oldColorLo8, oldColorHi8] = AvxSupport::read16PackedSingle(pOut); - // If T_OUT != float, then we need to change these lines (below too). - _mm256_storeu_ps(pOut, _mm256_div_ps(_mm256_fmadd_ps(oldColorLo8, nrStacked, newColorLo8), nrStacked1)); // (oldColor * nrStacked + newColor) / (nrStacked + 1) - _mm256_storeu_ps(pOut + 8, _mm256_div_ps(_mm256_fmadd_ps(oldColorHi8, nrStacked, newColorHi8), nrStacked1)); - }; - - if (avxTempBitmap.isColorBitmap()) - { - const T_IN *pRed{ &*avxTempBitmap.redPixels().cbegin() }, *pGreen{ &*avxTempBitmap.greenPixels().cbegin() }, *pBlue{ &*avxTempBitmap.bluePixels().cbegin() }; - auto *const pOutput = dynamic_cast*>(&outputBitmap); - if (pOutput == nullptr) - return 1; - T_OUT *pOutRed{ &*pOutput->m_Red.m_vPixels.begin() }, *pOutGreen{ &*pOutput->m_Green.m_vPixels.begin() }, *pOutBlue{ &*pOutput->m_Blue.m_vPixels.begin() }; - - for (int row = 0; row < resultHeight; ++row) - { - for (int counter = 0; counter < nrVectors; ++counter, pRed += vectorLen, pGreen += vectorLen, pBlue += vectorLen, pOutRed += vectorLen, pOutGreen += vectorLen, pOutBlue += vectorLen) - { - accumulate(pRed, pOutRed); - accumulate(pGreen, pOutGreen); - accumulate(pBlue, pOutBlue); - } - // Rest of line - for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pRed, ++pGreen, ++pBlue, ++pOutRed, ++pOutGreen, ++pOutBlue) - { - if constexpr (std::is_same_v) - { - *pOutRed = (*pOutRed * static_cast(nrStackedBitmaps) + static_cast(*pRed >> 16)) / static_cast(nrStackedBitmaps + 1); - *pOutGreen = (*pOutGreen * static_cast(nrStackedBitmaps) + static_cast(*pGreen >> 16)) / static_cast(nrStackedBitmaps + 1); - *pOutBlue = (*pOutBlue * static_cast(nrStackedBitmaps) + static_cast(*pBlue >> 16)) / static_cast(nrStackedBitmaps + 1); - } - else - { - *pOutRed = (*pOutRed * static_cast(nrStackedBitmaps) + static_cast(*pRed)) / static_cast(nrStackedBitmaps + 1); - *pOutGreen = (*pOutGreen * static_cast(nrStackedBitmaps) + static_cast(*pGreen)) / static_cast(nrStackedBitmaps + 1); - *pOutBlue = (*pOutBlue * static_cast(nrStackedBitmaps) + static_cast(*pBlue)) / static_cast(nrStackedBitmaps + 1); - } - } - } - return 0; - } - if (avxTempBitmap.isMonochromeBitmap()) - { - const T_IN* pGray{ &*avxTempBitmap.grayPixels().cbegin() }; - auto *const pOutput = dynamic_cast*>(&outputBitmap); - if (pOutput == nullptr) - return 1; - T_OUT* pOut{ &*pOutput->m_vPixels.begin() }; - - for (int row = 0; row < resultHeight; ++row) - { - for (int counter = 0; counter < nrVectors; ++counter, pGray += vectorLen, pOut += vectorLen) - accumulate(pGray, pOut); - // Rest of line - for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pGray, ++pOut) - if constexpr (std::is_same_v) - *pOut = (*pOut * static_cast(nrStackedBitmaps) + static_cast(*pGray >> 16)) / static_cast(nrStackedBitmaps + 1); - else - *pOut = (*pOut * static_cast(nrStackedBitmaps) + static_cast(*pGray)) / static_cast(nrStackedBitmaps + 1); - } - return 0; - } - return 1; - } - else if (taskInfo.m_Method == MBP_MAXIMUM) - { - const auto maximum = [](const T_IN* pIn, T_OUT* pOut) -> void - { - const auto [newColorLo8, newColorHi8] = AvxSupport::read16PackedSingle(pIn); - const auto [oldColorLo8, oldColorHi8] = AvxSupport::read16PackedSingle(pOut); - _mm256_storeu_ps(pOut, _mm256_max_ps(oldColorLo8, newColorLo8)); - _mm256_storeu_ps(pOut + 8, _mm256_max_ps(oldColorHi8, newColorHi8)); - }; - - if (avxTempBitmap.isColorBitmap()) - { - const T_IN *pRed{ &*avxTempBitmap.redPixels().cbegin() }, *pGreen{ &*avxTempBitmap.greenPixels().cbegin() }, *pBlue{ &*avxTempBitmap.bluePixels().cbegin() }; - auto* const pOutput = dynamic_cast*>(&outputBitmap); - if (pOutput == nullptr) - return 1; - T_OUT *pOutRed{ &*pOutput->m_Red.m_vPixels.begin() }, *pOutGreen{ &*pOutput->m_Green.m_vPixels.begin() }, *pOutBlue{ &*pOutput->m_Blue.m_vPixels.begin() }; - - for (int row = 0; row < resultHeight; ++row) - { - for (int counter = 0; counter < nrVectors; ++counter, pRed += vectorLen, pGreen += vectorLen, pBlue += vectorLen, pOutRed += vectorLen, pOutGreen += vectorLen, pOutBlue += vectorLen) - { - maximum(pRed, pOutRed); - maximum(pGreen, pOutGreen); - maximum(pBlue, pOutBlue); - } - // Rest of line - for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pRed, ++pGreen, ++pBlue, ++pOutRed, ++pOutGreen, ++pOutBlue) - { - if constexpr (std::is_same_v) - { - *pOutRed = std::max(*pOutRed, static_cast(*pRed >> 16)); - *pOutGreen = std::max(*pOutGreen, static_cast(*pGreen >> 16)); - *pOutBlue = std::max(*pOutBlue, static_cast(*pBlue >> 16)); - } - else - { - *pOutRed = std::max(*pOutRed, static_cast(*pRed)); - *pOutGreen = std::max(*pOutGreen, static_cast(*pGreen)); - *pOutBlue = std::max(*pOutBlue, static_cast(*pBlue)); - } - } - } - return 0; - } - if (avxTempBitmap.isMonochromeBitmap()) - { - const T_IN* pGray{ &*avxTempBitmap.grayPixels().cbegin() }; - auto *const pOutput = dynamic_cast*>(&outputBitmap); - if (pOutput == nullptr) - return 1; - T_OUT* pOut{ &*pOutput->m_vPixels.begin() }; - - for (int row = 0; row < resultHeight; ++row) - { - for (int counter = 0; counter < nrVectors; ++counter, pGray += vectorLen, pOut += vectorLen) - maximum(pGray, pOut); - // Rest of line - for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pGray, ++pOut) - if constexpr (std::is_same_v) - *pOut = std::max(*pOut, static_cast(*pGray >> 16)); - else - *pOut = std::max(*pOut, static_cast(*pGray)); - } - return 0; - } - return 1; - } - else if (taskInfo.m_Method == MBP_ENTROPYAVERAGE) - { - if (avxEntropy.pEntropyCoverage == nullptr) - return 1; - AvxSupport avxEntropyCoverageBitmap{ *avxEntropy.pEntropyCoverage }; - if (!avxEntropyCoverageBitmap.bitmapHasCorrectType()) - return 1; - - const auto average = [](const T_IN* pIn, T_OUT* pOut, const float* pEntropyLayer, float* pEntropyCoverage) -> void - { - const auto [newColorLo8, newColorHi8] = AvxSupport::read16PackedSingle(pIn); - const auto [oldColorLo8, oldColorHi8] = AvxSupport::read16PackedSingle(pOut); - const auto [newEntropyLo8, newEntropyHi8] = AvxSupport::read16PackedSingle(pEntropyLayer); - const auto [oldEntropyLo8, oldEntropyHi8] = AvxSupport::read16PackedSingle(pEntropyCoverage); - - _mm256_storeu_ps(pEntropyCoverage, _mm256_add_ps(oldEntropyLo8, newEntropyLo8)); // EntropyCoverage += Entropy - _mm256_storeu_ps(pEntropyCoverage + 8, _mm256_add_ps(oldEntropyHi8, newEntropyHi8)); - _mm256_storeu_ps(pOut, _mm256_fmadd_ps(newColorLo8, newEntropyLo8, oldColorLo8)); // OutputBitmap += Color * Entropy - _mm256_storeu_ps(pOut + 8, _mm256_fmadd_ps(newColorHi8, newEntropyHi8, oldColorHi8)); - }; - - if (avxTempBitmap.isColorBitmap()) - { - const T_IN *pRed{ &*avxTempBitmap.redPixels().cbegin() }, *pGreen{ &*avxTempBitmap.greenPixels().cbegin() }, *pBlue{ &*avxTempBitmap.bluePixels().cbegin() }; - auto* const pOutput = dynamic_cast*>(&outputBitmap); - if (pOutput == nullptr) - return 1; - T_OUT *pOutRed{ &*pOutput->m_Red.m_vPixels.begin() }, *pOutGreen{ &*pOutput->m_Green.m_vPixels.begin() }, *pOutBlue{ &*pOutput->m_Blue.m_vPixels.begin() }; - // Entropy - const float* pEntropyRed = reinterpret_cast(avxEntropy.redEntropyLayer.data()); - const float* pEntropyGreen = reinterpret_cast(avxEntropy.greenEntropyLayer.data()); - const float* pEntropyBlue = reinterpret_cast(avxEntropy.blueEntropyLayer.data()); - float *pEntropyCovRed{ avxEntropyCoverageBitmap.redPixels().data() }, *pEntropyCovGreen{ avxEntropyCoverageBitmap.greenPixels().data() }, *pEntropyCovBlue{ &*avxEntropyCoverageBitmap.bluePixels().data() }; - - for (int row = 0; row < resultHeight; ++row) - { - for (int counter = 0; counter < nrVectors; ++counter, - pRed += vectorLen, pGreen += vectorLen, pBlue += vectorLen, - pOutRed += vectorLen, pOutGreen += vectorLen, pOutBlue += vectorLen, - pEntropyRed += vectorLen, pEntropyGreen += vectorLen, pEntropyBlue += vectorLen, - pEntropyCovRed += vectorLen, pEntropyCovGreen += vectorLen, pEntropyCovBlue += vectorLen) - { - average(pRed, pOutRed, pEntropyRed, pEntropyCovRed); - average(pGreen, pOutGreen, pEntropyGreen, pEntropyCovGreen); - average(pBlue, pOutBlue, pEntropyBlue, pEntropyCovBlue); - } - // Rest of line - for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pRed, ++pGreen, ++pBlue, ++pOutRed, ++pOutGreen, ++pOutBlue, - ++pEntropyRed, ++pEntropyGreen, ++pEntropyBlue, ++pEntropyCovRed, ++pEntropyCovGreen, ++pEntropyCovBlue) - { - *pEntropyCovRed += *pEntropyRed; // EntropyCoverage += Entropy - *pEntropyCovGreen += *pEntropyGreen; - *pEntropyCovBlue += *pEntropyBlue; - if constexpr (std::is_same_v) - { - *pOutRed += static_cast(*pRed >> 16) * *pEntropyRed; // OutputBitmap += Color * Entropy - *pOutGreen += static_cast(*pGreen >> 16) * *pEntropyGreen; - *pOutBlue += static_cast(*pBlue >> 16) * *pEntropyBlue; - } - else - { - *pOutRed += static_cast(*pRed) * *pEntropyRed; // OutputBitmap += Color * Entropy - *pOutGreen += static_cast(*pGreen) * *pEntropyGreen; - *pOutBlue += static_cast(*pBlue) * *pEntropyBlue; - } - } - } - return 0; - } - if (avxTempBitmap.isMonochromeBitmap()) - { - const T_IN* pGray{ &*avxTempBitmap.grayPixels().cbegin() }; - auto* const pOutput = dynamic_cast*>(&outputBitmap); - if (pOutput == nullptr) - return 1; - T_OUT* pOut{ &*pOutput->m_vPixels.begin() }; - // Entropy - const float* pEntropy = reinterpret_cast(avxEntropy.redEntropyLayer.data()); - float* pEntropyCov{ avxEntropyCoverageBitmap.grayPixels().data() }; - - for (int row = 0; row < resultHeight; ++row) - { - for (int counter = 0; counter < nrVectors; ++counter, pGray += vectorLen, pOut += vectorLen, pEntropy += vectorLen, pEntropyCov += vectorLen) - average(pGray, pOut, pEntropy, pEntropyCov); - // Rest of line - for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pGray, ++pOut, ++pEntropy, ++pEntropyCov) - { - *pEntropyCov += *pEntropy; - if constexpr (std::is_same_v) - *pOut += static_cast(*pGray >> 16) * *pEntropy; - else - *pOut += static_cast(*pGray) * *pEntropy; - } - } - return 0; - } - - return 1; - } - - return 1; -} +#include "stdafx.h" +#include "avx_avg.h" +#include "dssrect.h" +#include "avx_support.h" +#include "TaskInfo.h" +#include "Ztrace.h" + +AvxAccumulation::AvxAccumulation(const DSSRect& resultRect, const CTaskInfo& tInfo, CMemoryBitmap& tempbm, CMemoryBitmap& outbm, AvxEntropy& entroinfo) noexcept : + resultWidth{ resultRect.width() }, resultHeight{ resultRect.height() }, + tempBitmap{ tempbm }, + outputBitmap{ outbm }, + taskInfo{ tInfo }, + avxEntropy{ entroinfo } +{} + +// ********************************************************************************************* +// Sept. 2020: Only works for output bitmaps of type float (which is currently always the case). +// There is a static type check below. +// ********************************************************************************************* + +int AvxAccumulation::accumulate(const int nrStackedBitmaps) +{ + if (!AvxSupport::checkSimdAvailability()) + return 1; + + int rval = 1; + if (doAccumulate(nrStackedBitmaps) == 0 + || doAccumulate(nrStackedBitmaps) == 0 + || doAccumulate(nrStackedBitmaps) == 0) + { + rval = 0; + } + return AvxSupport::zeroUpper(rval); +} + +template +int AvxAccumulation::doAccumulate(const int nrStackedBitmaps) +{ + // Output bitmap is always float + if constexpr (!std::is_same::value) + return 1; + + const AvxSupport avxTempBitmap{ tempBitmap }; + + if (!avxTempBitmap.bitmapHasCorrectType()) + return 1; + if (!AvxSupport{outputBitmap}.bitmapHasCorrectType()) + return 1; + + ZFUNCTRACE_RUNTIME(); + + constexpr size_t vectorLen = 16; + const int nrVectors = resultWidth / vectorLen; + + if (taskInfo.m_Method == MBP_FASTAVERAGE) + { + const __m256 nrStacked = _mm256_set1_ps(static_cast(nrStackedBitmaps)); + const __m256 nrStacked1 = _mm256_set1_ps(static_cast(nrStackedBitmaps + 1)); + + const auto accumulate = [&nrStacked, &nrStacked1](const T_IN* pIn, T_OUT* pOut) -> void + { + const auto [newColorLo8, newColorHi8] = AvxSupport::read16PackedSingle(pIn); + const auto [oldColorLo8, oldColorHi8] = AvxSupport::read16PackedSingle(pOut); + // If T_OUT != float, then we need to change these lines (below too). + _mm256_storeu_ps(pOut, _mm256_div_ps(_mm256_fmadd_ps(oldColorLo8, nrStacked, newColorLo8), nrStacked1)); // (oldColor * nrStacked + newColor) / (nrStacked + 1) + _mm256_storeu_ps(pOut + 8, _mm256_div_ps(_mm256_fmadd_ps(oldColorHi8, nrStacked, newColorHi8), nrStacked1)); + }; + + if (avxTempBitmap.isColorBitmap()) + { + const T_IN *pRed{ &*avxTempBitmap.redPixels().cbegin() }, *pGreen{ &*avxTempBitmap.greenPixels().cbegin() }, *pBlue{ &*avxTempBitmap.bluePixels().cbegin() }; + auto *const pOutput = dynamic_cast*>(&outputBitmap); + if (pOutput == nullptr) + return 1; + T_OUT *pOutRed{ &*pOutput->m_Red.m_vPixels.begin() }, *pOutGreen{ &*pOutput->m_Green.m_vPixels.begin() }, *pOutBlue{ &*pOutput->m_Blue.m_vPixels.begin() }; + + for (int row = 0; row < resultHeight; ++row) + { + for (int counter = 0; counter < nrVectors; ++counter, pRed += vectorLen, pGreen += vectorLen, pBlue += vectorLen, pOutRed += vectorLen, pOutGreen += vectorLen, pOutBlue += vectorLen) + { + accumulate(pRed, pOutRed); + accumulate(pGreen, pOutGreen); + accumulate(pBlue, pOutBlue); + } + // Rest of line + for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pRed, ++pGreen, ++pBlue, ++pOutRed, ++pOutGreen, ++pOutBlue) + { + if constexpr (std::is_same_v) + { + *pOutRed = (*pOutRed * static_cast(nrStackedBitmaps) + static_cast(*pRed >> 16)) / static_cast(nrStackedBitmaps + 1); + *pOutGreen = (*pOutGreen * static_cast(nrStackedBitmaps) + static_cast(*pGreen >> 16)) / static_cast(nrStackedBitmaps + 1); + *pOutBlue = (*pOutBlue * static_cast(nrStackedBitmaps) + static_cast(*pBlue >> 16)) / static_cast(nrStackedBitmaps + 1); + } + else + { + *pOutRed = (*pOutRed * static_cast(nrStackedBitmaps) + static_cast(*pRed)) / static_cast(nrStackedBitmaps + 1); + *pOutGreen = (*pOutGreen * static_cast(nrStackedBitmaps) + static_cast(*pGreen)) / static_cast(nrStackedBitmaps + 1); + *pOutBlue = (*pOutBlue * static_cast(nrStackedBitmaps) + static_cast(*pBlue)) / static_cast(nrStackedBitmaps + 1); + } + } + } + return 0; + } + if (avxTempBitmap.isMonochromeBitmap()) + { + const T_IN* pGray{ &*avxTempBitmap.grayPixels().cbegin() }; + auto *const pOutput = dynamic_cast*>(&outputBitmap); + if (pOutput == nullptr) + return 1; + T_OUT* pOut{ &*pOutput->m_vPixels.begin() }; + + for (int row = 0; row < resultHeight; ++row) + { + for (int counter = 0; counter < nrVectors; ++counter, pGray += vectorLen, pOut += vectorLen) + accumulate(pGray, pOut); + // Rest of line + for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pGray, ++pOut) + if constexpr (std::is_same_v) + *pOut = (*pOut * static_cast(nrStackedBitmaps) + static_cast(*pGray >> 16)) / static_cast(nrStackedBitmaps + 1); + else + *pOut = (*pOut * static_cast(nrStackedBitmaps) + static_cast(*pGray)) / static_cast(nrStackedBitmaps + 1); + } + return 0; + } + return 1; + } + else if (taskInfo.m_Method == MBP_MAXIMUM) + { + const auto maximum = [](const T_IN* pIn, T_OUT* pOut) -> void + { + const auto [newColorLo8, newColorHi8] = AvxSupport::read16PackedSingle(pIn); + const auto [oldColorLo8, oldColorHi8] = AvxSupport::read16PackedSingle(pOut); + _mm256_storeu_ps(pOut, _mm256_max_ps(oldColorLo8, newColorLo8)); + _mm256_storeu_ps(pOut + 8, _mm256_max_ps(oldColorHi8, newColorHi8)); + }; + + if (avxTempBitmap.isColorBitmap()) + { + const T_IN *pRed{ &*avxTempBitmap.redPixels().cbegin() }, *pGreen{ &*avxTempBitmap.greenPixels().cbegin() }, *pBlue{ &*avxTempBitmap.bluePixels().cbegin() }; + auto* const pOutput = dynamic_cast*>(&outputBitmap); + if (pOutput == nullptr) + return 1; + T_OUT *pOutRed{ &*pOutput->m_Red.m_vPixels.begin() }, *pOutGreen{ &*pOutput->m_Green.m_vPixels.begin() }, *pOutBlue{ &*pOutput->m_Blue.m_vPixels.begin() }; + + for (int row = 0; row < resultHeight; ++row) + { + for (int counter = 0; counter < nrVectors; ++counter, pRed += vectorLen, pGreen += vectorLen, pBlue += vectorLen, pOutRed += vectorLen, pOutGreen += vectorLen, pOutBlue += vectorLen) + { + maximum(pRed, pOutRed); + maximum(pGreen, pOutGreen); + maximum(pBlue, pOutBlue); + } + // Rest of line + for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pRed, ++pGreen, ++pBlue, ++pOutRed, ++pOutGreen, ++pOutBlue) + { + if constexpr (std::is_same_v) + { + *pOutRed = std::max(*pOutRed, static_cast(*pRed >> 16)); + *pOutGreen = std::max(*pOutGreen, static_cast(*pGreen >> 16)); + *pOutBlue = std::max(*pOutBlue, static_cast(*pBlue >> 16)); + } + else + { + *pOutRed = std::max(*pOutRed, static_cast(*pRed)); + *pOutGreen = std::max(*pOutGreen, static_cast(*pGreen)); + *pOutBlue = std::max(*pOutBlue, static_cast(*pBlue)); + } + } + } + return 0; + } + if (avxTempBitmap.isMonochromeBitmap()) + { + const T_IN* pGray{ &*avxTempBitmap.grayPixels().cbegin() }; + auto *const pOutput = dynamic_cast*>(&outputBitmap); + if (pOutput == nullptr) + return 1; + T_OUT* pOut{ &*pOutput->m_vPixels.begin() }; + + for (int row = 0; row < resultHeight; ++row) + { + for (int counter = 0; counter < nrVectors; ++counter, pGray += vectorLen, pOut += vectorLen) + maximum(pGray, pOut); + // Rest of line + for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pGray, ++pOut) + if constexpr (std::is_same_v) + *pOut = std::max(*pOut, static_cast(*pGray >> 16)); + else + *pOut = std::max(*pOut, static_cast(*pGray)); + } + return 0; + } + return 1; + } + else if (taskInfo.m_Method == MBP_ENTROPYAVERAGE) + { + if (avxEntropy.pEntropyCoverage == nullptr) + return 1; + AvxSupport avxEntropyCoverageBitmap{ *avxEntropy.pEntropyCoverage }; + if (!avxEntropyCoverageBitmap.bitmapHasCorrectType()) + return 1; + + const auto average = [](const T_IN* pIn, T_OUT* pOut, const float* pEntropyLayer, float* pEntropyCoverage) -> void + { + const auto [newColorLo8, newColorHi8] = AvxSupport::read16PackedSingle(pIn); + const auto [oldColorLo8, oldColorHi8] = AvxSupport::read16PackedSingle(pOut); + const auto [newEntropyLo8, newEntropyHi8] = AvxSupport::read16PackedSingle(pEntropyLayer); + const auto [oldEntropyLo8, oldEntropyHi8] = AvxSupport::read16PackedSingle(pEntropyCoverage); + + _mm256_storeu_ps(pEntropyCoverage, _mm256_add_ps(oldEntropyLo8, newEntropyLo8)); // EntropyCoverage += Entropy + _mm256_storeu_ps(pEntropyCoverage + 8, _mm256_add_ps(oldEntropyHi8, newEntropyHi8)); + _mm256_storeu_ps(pOut, _mm256_fmadd_ps(newColorLo8, newEntropyLo8, oldColorLo8)); // OutputBitmap += Color * Entropy + _mm256_storeu_ps(pOut + 8, _mm256_fmadd_ps(newColorHi8, newEntropyHi8, oldColorHi8)); + }; + + if (avxTempBitmap.isColorBitmap()) + { + const T_IN *pRed{ &*avxTempBitmap.redPixels().cbegin() }, *pGreen{ &*avxTempBitmap.greenPixels().cbegin() }, *pBlue{ &*avxTempBitmap.bluePixels().cbegin() }; + auto* const pOutput = dynamic_cast*>(&outputBitmap); + if (pOutput == nullptr) + return 1; + T_OUT *pOutRed{ &*pOutput->m_Red.m_vPixels.begin() }, *pOutGreen{ &*pOutput->m_Green.m_vPixels.begin() }, *pOutBlue{ &*pOutput->m_Blue.m_vPixels.begin() }; + // Entropy + const float* pEntropyRed = reinterpret_cast(avxEntropy.redEntropyLayer.data()); + const float* pEntropyGreen = reinterpret_cast(avxEntropy.greenEntropyLayer.data()); + const float* pEntropyBlue = reinterpret_cast(avxEntropy.blueEntropyLayer.data()); + float *pEntropyCovRed{ avxEntropyCoverageBitmap.redPixels().data() }, *pEntropyCovGreen{ avxEntropyCoverageBitmap.greenPixels().data() }, *pEntropyCovBlue{ &*avxEntropyCoverageBitmap.bluePixels().data() }; + + for (int row = 0; row < resultHeight; ++row) + { + for (int counter = 0; counter < nrVectors; ++counter, + pRed += vectorLen, pGreen += vectorLen, pBlue += vectorLen, + pOutRed += vectorLen, pOutGreen += vectorLen, pOutBlue += vectorLen, + pEntropyRed += vectorLen, pEntropyGreen += vectorLen, pEntropyBlue += vectorLen, + pEntropyCovRed += vectorLen, pEntropyCovGreen += vectorLen, pEntropyCovBlue += vectorLen) + { + average(pRed, pOutRed, pEntropyRed, pEntropyCovRed); + average(pGreen, pOutGreen, pEntropyGreen, pEntropyCovGreen); + average(pBlue, pOutBlue, pEntropyBlue, pEntropyCovBlue); + } + // Rest of line + for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pRed, ++pGreen, ++pBlue, ++pOutRed, ++pOutGreen, ++pOutBlue, + ++pEntropyRed, ++pEntropyGreen, ++pEntropyBlue, ++pEntropyCovRed, ++pEntropyCovGreen, ++pEntropyCovBlue) + { + *pEntropyCovRed += *pEntropyRed; // EntropyCoverage += Entropy + *pEntropyCovGreen += *pEntropyGreen; + *pEntropyCovBlue += *pEntropyBlue; + if constexpr (std::is_same_v) + { + *pOutRed += static_cast(*pRed >> 16) * *pEntropyRed; // OutputBitmap += Color * Entropy + *pOutGreen += static_cast(*pGreen >> 16) * *pEntropyGreen; + *pOutBlue += static_cast(*pBlue >> 16) * *pEntropyBlue; + } + else + { + *pOutRed += static_cast(*pRed) * *pEntropyRed; // OutputBitmap += Color * Entropy + *pOutGreen += static_cast(*pGreen) * *pEntropyGreen; + *pOutBlue += static_cast(*pBlue) * *pEntropyBlue; + } + } + } + return 0; + } + if (avxTempBitmap.isMonochromeBitmap()) + { + const T_IN* pGray{ &*avxTempBitmap.grayPixels().cbegin() }; + auto* const pOutput = dynamic_cast*>(&outputBitmap); + if (pOutput == nullptr) + return 1; + T_OUT* pOut{ &*pOutput->m_vPixels.begin() }; + // Entropy + const float* pEntropy = reinterpret_cast(avxEntropy.redEntropyLayer.data()); + float* pEntropyCov{ avxEntropyCoverageBitmap.grayPixels().data() }; + + for (int row = 0; row < resultHeight; ++row) + { + for (int counter = 0; counter < nrVectors; ++counter, pGray += vectorLen, pOut += vectorLen, pEntropy += vectorLen, pEntropyCov += vectorLen) + average(pGray, pOut, pEntropy, pEntropyCov); + // Rest of line + for (int n = nrVectors * vectorLen; n < resultWidth; ++n, ++pGray, ++pOut, ++pEntropy, ++pEntropyCov) + { + *pEntropyCov += *pEntropy; + if constexpr (std::is_same_v) + *pOut += static_cast(*pGray >> 16) * *pEntropy; + else + *pOut += static_cast(*pGray) * *pEntropy; + } + } + return 0; + } + + return 1; + } + + return 1; +} diff --git a/DeepSkyStacker/avx_avg.h b/DeepSkyStackerKernel/avx_avg.h similarity index 96% rename from DeepSkyStacker/avx_avg.h rename to DeepSkyStackerKernel/avx_avg.h index 365bfe26..988b788e 100644 --- a/DeepSkyStacker/avx_avg.h +++ b/DeepSkyStackerKernel/avx_avg.h @@ -1,29 +1,29 @@ -#pragma once - -#include "BitmapBase.h" -#include "avx_entropy.h" - -class CTaskInfo; -class DSSRect; -class CMemoryBitmap; -class AvxEntropy; - -class AvxAccumulation -{ - int resultWidth, resultHeight; - CMemoryBitmap& tempBitmap; - CMemoryBitmap& outputBitmap; - const CTaskInfo& taskInfo; - AvxEntropy& avxEntropy; -public: - AvxAccumulation() = delete; - AvxAccumulation(const DSSRect& resultRect, const CTaskInfo& tInfo, CMemoryBitmap& tempbm, CMemoryBitmap& outbm, AvxEntropy& entroinfo) noexcept; - AvxAccumulation(const AvxAccumulation&) = delete; - AvxAccumulation(AvxAccumulation&&) = delete; - AvxAccumulation& operator=(const AvxAccumulation&) = delete; - - int accumulate(const int nrStackedBitmaps); -private: - template - int doAccumulate(const int nrStackedBitmaps); -}; +#pragma once + +#include "BitmapBase.h" +#include "avx_entropy.h" + +class CTaskInfo; +class DSSRect; +class CMemoryBitmap; +class AvxEntropy; + +class AvxAccumulation +{ + int resultWidth, resultHeight; + CMemoryBitmap& tempBitmap; + CMemoryBitmap& outputBitmap; + const CTaskInfo& taskInfo; + AvxEntropy& avxEntropy; +public: + AvxAccumulation() = delete; + AvxAccumulation(const DSSRect& resultRect, const CTaskInfo& tInfo, CMemoryBitmap& tempbm, CMemoryBitmap& outbm, AvxEntropy& entroinfo) noexcept; + AvxAccumulation(const AvxAccumulation&) = delete; + AvxAccumulation(AvxAccumulation&&) = delete; + AvxAccumulation& operator=(const AvxAccumulation&) = delete; + + int accumulate(const int nrStackedBitmaps); +private: + template + int doAccumulate(const int nrStackedBitmaps); +}; diff --git a/DeepSkyStacker/avx_bitmap_filler.cpp b/DeepSkyStackerKernel/avx_bitmap_filler.cpp similarity index 98% rename from DeepSkyStacker/avx_bitmap_filler.cpp rename to DeepSkyStackerKernel/avx_bitmap_filler.cpp index 4266f5a7..df492575 100644 --- a/DeepSkyStacker/avx_bitmap_filler.cpp +++ b/DeepSkyStackerKernel/avx_bitmap_filler.cpp @@ -1,211 +1,211 @@ -#include "stdafx.h" - -#include "avx_bitmap_filler.h" -#include "avx_support.h" -#include "ZExcept.h" - - -// --------------------------------- -// AVX Bitmap Filler -// --------------------------------- - -AvxBitmapFiller::AvxBitmapFiller(CMemoryBitmap* pB, ProgressBase* pP, const double redWb, const double greenWb, const double blueWb) : - BitmapFillerBase{ pB, pP, redWb, greenWb, blueWb }, - sourceBuffer{} -{} - -bool AvxBitmapFiller::isThreadSafe() const { return true; } - -std::unique_ptr AvxBitmapFiller::clone() -{ - return std::make_unique(*this); -} - -size_t AvxBitmapFiller::Write(const void* source, const size_t bytesPerPixel, const size_t nrPixels, const size_t rowIndex) -{ - ZASSERTSTATE(0 != this->width); - ZASSERTSTATE(0 != this->height); - ZASSERTSTATE(0 != this->bytesPerChannel); - ZASSERTSTATE((nrPixels % static_cast(this->width)) == 0); - - if (this->isGray) - { - ZASSERTSTATE(bytesPerPixel == this->bytesPerChannel); - constexpr size_t vectorLen = 16; - redBuffer.resize(nrPixels); - float* pBuf = redBuffer.data(); - - const size_t nrVectors = nrPixels / vectorLen; - if (this->bytesPerChannel == 1) - { - const std::uint8_t* pData = static_cast(source); - for (size_t n = 0; n < nrVectors; ++n, pData += 16, pBuf += 16) - { - const __m128i epu8 = _mm_loadu_si128((const __m128i*)pData); // Load 16 pixels (each 8 bits) - const __m256i epu16 = _mm256_slli_epi16(_mm256_cvtepu8_epi16(epu8), 8); - const __m256 lo8 = AvxSupport::wordToPackedFloat(_mm256_castsi256_si128(epu16)); - const __m256 hi8 = AvxSupport::wordToPackedFloat(_mm256_extracti128_si256(epu16, 1)); - _mm256_storeu_ps(pBuf, lo8); - _mm256_storeu_ps(pBuf + 8, hi8); - } - for (size_t i = nrVectors * vectorLen; i < nrPixels; ++i, ++pData, ++pBuf) // Remaining pixels of line - *pBuf = static_cast(static_cast(*pData) << 8); - } - else - { - const __m256i* const pData = static_cast(source); - for (size_t n = 0; n < nrVectors; ++n) - { - const __m256i epu16_be = _mm256_loadu_si256(pData + n); // Load 16 pixels (each 16 bits big endian) - const __m256i epu16_le = _mm256_shuffle_epi8(epu16_be, - _mm256_set_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001, 0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001)); // big endian -> little endian - const __m256 lo8 = AvxSupport::wordToPackedFloat(_mm256_castsi256_si128(epu16_le)); - const __m256 hi8 = AvxSupport::wordToPackedFloat(_mm256_extracti128_si256(epu16_le, 1)); - _mm256_storeu_ps(pBuf + n*16, lo8); - _mm256_storeu_ps(pBuf + n*16 + 8, hi8); - } - const std::uint16_t* const p16 = static_cast(source); - for (size_t i = nrVectors * vectorLen; i < nrPixels; ++i) // Remaining pixels of line - pBuf[i] = static_cast(_byteswap_ushort(p16[i])); // Load an convert to little endian - } - - if (this->isRgbBayerPattern()) - { - const size_t y = 2 * (rowIndex % 2); // 0, 2, 0, 2, ... - const float adjustFactors[2] = { this->cfaFactors[y], this->cfaFactors[y + 1] }; // {0, 1} or {2, 3}, depending on the line number. - const __m256 adjustFactorsVec = _mm256_setr_ps(cfaFactors[y], cfaFactors[y + 1], cfaFactors[y], cfaFactors[y + 1], cfaFactors[y], cfaFactors[y + 1], cfaFactors[y], cfaFactors[y + 1]); - pBuf = redBuffer.data(); - for (size_t i = 0; i < nrPixels / 8; ++i, pBuf += 8) - { - const __m256 value = _mm256_loadu_ps(pBuf); - const __m256 adjusted = _mm256_mul_ps(value, adjustFactorsVec); - const __m256 limited = _mm256_min_ps(adjusted, _mm256_set1_ps(static_cast(std::numeric_limits::max() - 1))); - _mm256_storeu_ps(pBuf, limited); - } - for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pBuf) // Remaining pixels of line - *pBuf = adjustColor(*pBuf, adjustFactors[i % 2]); - } - - auto* pGray16Bitmap = dynamic_cast(pBitmap); - ZASSERTSTATE(pGray16Bitmap != nullptr); - pBuf = redBuffer.data(); - std::uint16_t* pOut = pGray16Bitmap->m_vPixels.data() + rowIndex * nrPixels; - for (size_t i = 0; i < nrPixels / 8; ++i, pBuf += 8, pOut += 8) - _mm_storeu_si128((__m128i*)pOut, AvxSupport::cvtTruncatePsEpu16(_mm256_loadu_ps(pBuf))); - for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pBuf, ++pOut) // Remaining pixels of line - *pOut = static_cast(*pBuf); - } - else - { - ZASSERTSTATE(bytesPerPixel == this->bytesPerChannel * 3); - redBuffer.resize(nrPixels); - greenBuffer.resize(nrPixels); - blueBuffer.resize(nrPixels); - float* pRed = redBuffer.data(); - float* pGreen = greenBuffer.data(); - float* pBlue = blueBuffer.data(); - - if (this->bytesPerChannel == 1) - { - sourceBuffer.resize(nrPixels * 3 + 32); // To avoid read access violations. - memcpy(sourceBuffer.data(), source, nrPixels * 3); - const std::uint8_t* pData = static_cast(sourceBuffer.data()); - const __m256i indices = _mm256_set_epi32(0xffff09ff, 0xffff06ff, 0xffff03ff, 0xffff00ff, 0xffff09ff, 0xffff06ff, 0xffff03ff, 0xffff00ff); - // 8 RGB pixels at once (each 8 bits) - for (size_t n = 0; n < nrPixels / 8; ++n, pData += 24, pRed += 8, pGreen += 8, pBlue += 8) - { - const __m256i data = _mm256_loadu_si256((const __m256i*)pData); // Load 8 RGB pixels of 8 bits per channel (24 bytes), 8 bytes are ignored. - const __m256i rgb = _mm256_permute2x128_si256(data, AvxSupport::shiftLeftEpi32<1>(data), 0x30); // 4 pixels (rgb) in lo lane, 4 pixels in hi lane (4 bytes gap ignored). - // We make 3 steps: for red, green, and blue. - // In each step, we directly convert the 4 color bytes to 4 ints in each 128 bit lane, then to 4 floats. - // First step: Red channel @ byte positions 9, 6, 3, 0 in each lane. - const __m256i red32 = _mm256_shuffle_epi8(rgb, indices); // 4 red values per lane as int32 (original value * 256) - const __m256i green32 = _mm256_shuffle_epi8(_mm256_srli_si256(rgb, 1), indices); // 8 x green - const __m256i blue32 = _mm256_shuffle_epi8(_mm256_srli_si256(rgb, 2), indices); // 8 x blue - // Save the color values: - _mm256_storeu_ps(pRed, _mm256_cvtepi32_ps(red32)); - _mm256_storeu_ps(pGreen, _mm256_cvtepi32_ps(green32)); - _mm256_storeu_ps(pBlue, _mm256_cvtepi32_ps(blue32)); - } - for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pRed, ++pGreen, ++pBlue, pData += 3) - { - *pRed = static_cast(static_cast(pData[0]) << 8); - *pGreen = static_cast(static_cast(pData[1]) << 8); - *pBlue = static_cast(static_cast(pData[2]) << 8); - } - } - else - { - sourceBuffer.resize(nrPixels * 6 + 32); // To avoid read access violations. - memcpy(sourceBuffer.data(), source, nrPixels * 6); - const std::uint16_t* pData = reinterpret_cast(sourceBuffer.data()); - const __m256i indices = _mm256_set_epi32(0xffffffff, 0xffffffff, 0xffff0607, 0xffff0001, 0xffffffff, 0xffffffff, 0xffff0607, 0xffff0001); - // 4 RGB pixels at once (each 16 bits) - for (size_t n = 0; n < nrPixels / 4; ++n, pData += 12, pRed += 4, pGreen += 4, pBlue += 4) - { - const __m256i data = _mm256_loadu_si256((const __m256i*)pData); // Load 4 RGB pixels of 16 bits per channel (24 bytes), 8 bytes are ignored. - const __m256i rgb = _mm256_permute2x128_si256(data, AvxSupport::shiftLeftEpi32<1>(data), 0x30); // 2 pixels (rgb) in lo lane, 2 pixels in hi lane (4 bytes gap ignored). - // 3 steps (R, G, B): Convert the 2 color WORDs to 2 ints in each 128 bit lane, be->le, then convert to float. - const __m256i red32 = _mm256_shuffle_epi8(rgb, indices); // 2 red values per lane as int32 (original value converted be -> le) - const __m256i green32 = _mm256_shuffle_epi8(_mm256_srli_si256(rgb, 2), indices); // 4 x green - const __m256i blue32 = _mm256_shuffle_epi8(_mm256_srli_si256(rgb, 4), indices); // 4 x blue - // Pack the 2 pixels in the hi lane and the 2 pixels in the lo lane, then convert to float, and store. - _mm_storeu_ps(pRed, _mm_cvtepi32_ps(_mm256_castsi256_si128(_mm256_permute4x64_epi64(red32, 0x08)))); - _mm_storeu_ps(pGreen, _mm_cvtepi32_ps(_mm256_castsi256_si128(_mm256_permute4x64_epi64(green32, 0x08)))); - _mm_storeu_ps(pBlue, _mm_cvtepi32_ps(_mm256_castsi256_si128(_mm256_permute4x64_epi64(blue32, 0x08)))); - } - for (size_t i = (nrPixels / 4) * 4; i < nrPixels; ++i, ++pRed, ++pGreen, ++pBlue, pData += 3) - { - *pRed = static_cast(_byteswap_ushort(pData[0])); - *pGreen = static_cast(_byteswap_ushort(pData[1])); - *pBlue = static_cast(_byteswap_ushort(pData[2])); - } - } - - pRed = redBuffer.data(); - pGreen = greenBuffer.data(); - pBlue = blueBuffer.data(); - const __m256 MAXIMUM = _mm256_set1_ps(static_cast(std::numeric_limits::max() - 1)); - for (size_t n = 0; n < nrPixels / 8; ++n, pRed += 8, pGreen += 8, pBlue += 8) - { - const __m256 r = _mm256_mul_ps(_mm256_loadu_ps(pRed), _mm256_set1_ps(redScale)); - const __m256 g = _mm256_mul_ps(_mm256_loadu_ps(pGreen), _mm256_set1_ps(greenScale)); - const __m256 b = _mm256_mul_ps(_mm256_loadu_ps(pBlue), _mm256_set1_ps(blueScale)); - _mm256_storeu_ps(pRed, _mm256_min_ps(r, MAXIMUM)); - _mm256_storeu_ps(pGreen, _mm256_min_ps(g, MAXIMUM)); - _mm256_storeu_ps(pBlue, _mm256_min_ps(b, MAXIMUM)); - } - for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pRed, ++pGreen, ++pBlue) - { - *pRed = adjustColor(*pRed, redScale); - *pGreen = adjustColor(*pGreen, greenScale); - *pBlue = adjustColor(*pBlue, blueScale); - } - - auto* pColor16Bitmap = dynamic_cast(pBitmap); - ZASSERTSTATE(pColor16Bitmap != nullptr); - std::uint16_t* pOutRed = pColor16Bitmap->m_Red.m_vPixels.data() + rowIndex * nrPixels; - std::uint16_t* pOutGreen = pColor16Bitmap->m_Green.m_vPixels.data() + rowIndex * nrPixels; - std::uint16_t* pOutBlue = pColor16Bitmap->m_Blue.m_vPixels.data() + rowIndex * nrPixels; - pRed = redBuffer.data(); - pGreen = greenBuffer.data(); - pBlue = blueBuffer.data(); - for (size_t n = 0; n < nrPixels / 8; ++n, pOutRed += 8, pOutGreen += 8, pOutBlue += 8, pRed += 8, pGreen += 8, pBlue += 8) - { - _mm_storeu_si128((__m128i*)pOutRed, AvxSupport::cvtTruncatePsEpu16(_mm256_loadu_ps(pRed))); - _mm_storeu_si128((__m128i*)pOutGreen, AvxSupport::cvtTruncatePsEpu16(_mm256_loadu_ps(pGreen))); - _mm_storeu_si128((__m128i*)pOutBlue, AvxSupport::cvtTruncatePsEpu16(_mm256_loadu_ps(pBlue))); - } - for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pOutRed, ++pOutGreen, ++pOutBlue, ++pRed, ++pGreen, ++pBlue) - { - *pOutRed = static_cast(*pRed); - *pOutGreen = static_cast(*pGreen); - *pOutBlue = static_cast(*pBlue); - } - } - - //if (((rowIndex + 1) % 32) == 0 && this->pProgress != nullptr) - // this->pProgress->Progress2(static_cast(rowIndex + 1)); - - return AvxSupport::zeroUpper(nrPixels); -} +#include "stdafx.h" + +#include "avx_bitmap_filler.h" +#include "avx_support.h" +#include "ZExcept.h" + + +// --------------------------------- +// AVX Bitmap Filler +// --------------------------------- + +AvxBitmapFiller::AvxBitmapFiller(CMemoryBitmap* pB, ProgressBase* pP, const double redWb, const double greenWb, const double blueWb) : + BitmapFillerBase{ pB, pP, redWb, greenWb, blueWb }, + sourceBuffer{} +{} + +bool AvxBitmapFiller::isThreadSafe() const { return true; } + +std::unique_ptr AvxBitmapFiller::clone() +{ + return std::make_unique(*this); +} + +size_t AvxBitmapFiller::Write(const void* source, const size_t bytesPerPixel, const size_t nrPixels, const size_t rowIndex) +{ + ZASSERTSTATE(0 != this->width); + ZASSERTSTATE(0 != this->height); + ZASSERTSTATE(0 != this->bytesPerChannel); + ZASSERTSTATE((nrPixels % static_cast(this->width)) == 0); + + if (this->isGray) + { + ZASSERTSTATE(bytesPerPixel == this->bytesPerChannel); + constexpr size_t vectorLen = 16; + redBuffer.resize(nrPixels); + float* pBuf = redBuffer.data(); + + const size_t nrVectors = nrPixels / vectorLen; + if (this->bytesPerChannel == 1) + { + const std::uint8_t* pData = static_cast(source); + for (size_t n = 0; n < nrVectors; ++n, pData += 16, pBuf += 16) + { + const __m128i epu8 = _mm_loadu_si128((const __m128i*)pData); // Load 16 pixels (each 8 bits) + const __m256i epu16 = _mm256_slli_epi16(_mm256_cvtepu8_epi16(epu8), 8); + const __m256 lo8 = AvxSupport::wordToPackedFloat(_mm256_castsi256_si128(epu16)); + const __m256 hi8 = AvxSupport::wordToPackedFloat(_mm256_extracti128_si256(epu16, 1)); + _mm256_storeu_ps(pBuf, lo8); + _mm256_storeu_ps(pBuf + 8, hi8); + } + for (size_t i = nrVectors * vectorLen; i < nrPixels; ++i, ++pData, ++pBuf) // Remaining pixels of line + *pBuf = static_cast(static_cast(*pData) << 8); + } + else + { + const __m256i* const pData = static_cast(source); + for (size_t n = 0; n < nrVectors; ++n) + { + const __m256i epu16_be = _mm256_loadu_si256(pData + n); // Load 16 pixels (each 16 bits big endian) + const __m256i epu16_le = _mm256_shuffle_epi8(epu16_be, + _mm256_set_epi32(0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001, 0x0e0f0c0d, 0x0a0b0809, 0x06070405, 0x02030001)); // big endian -> little endian + const __m256 lo8 = AvxSupport::wordToPackedFloat(_mm256_castsi256_si128(epu16_le)); + const __m256 hi8 = AvxSupport::wordToPackedFloat(_mm256_extracti128_si256(epu16_le, 1)); + _mm256_storeu_ps(pBuf + n*16, lo8); + _mm256_storeu_ps(pBuf + n*16 + 8, hi8); + } + const std::uint16_t* const p16 = static_cast(source); + for (size_t i = nrVectors * vectorLen; i < nrPixels; ++i) // Remaining pixels of line + pBuf[i] = static_cast(_byteswap_ushort(p16[i])); // Load an convert to little endian + } + + if (this->isRgbBayerPattern()) + { + const size_t y = 2 * (rowIndex % 2); // 0, 2, 0, 2, ... + const float adjustFactors[2] = { this->cfaFactors[y], this->cfaFactors[y + 1] }; // {0, 1} or {2, 3}, depending on the line number. + const __m256 adjustFactorsVec = _mm256_setr_ps(cfaFactors[y], cfaFactors[y + 1], cfaFactors[y], cfaFactors[y + 1], cfaFactors[y], cfaFactors[y + 1], cfaFactors[y], cfaFactors[y + 1]); + pBuf = redBuffer.data(); + for (size_t i = 0; i < nrPixels / 8; ++i, pBuf += 8) + { + const __m256 value = _mm256_loadu_ps(pBuf); + const __m256 adjusted = _mm256_mul_ps(value, adjustFactorsVec); + const __m256 limited = _mm256_min_ps(adjusted, _mm256_set1_ps(static_cast(std::numeric_limits::max() - 1))); + _mm256_storeu_ps(pBuf, limited); + } + for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pBuf) // Remaining pixels of line + *pBuf = adjustColor(*pBuf, adjustFactors[i % 2]); + } + + auto* pGray16Bitmap = dynamic_cast(pBitmap); + ZASSERTSTATE(pGray16Bitmap != nullptr); + pBuf = redBuffer.data(); + std::uint16_t* pOut = pGray16Bitmap->m_vPixels.data() + rowIndex * nrPixels; + for (size_t i = 0; i < nrPixels / 8; ++i, pBuf += 8, pOut += 8) + _mm_storeu_si128((__m128i*)pOut, AvxSupport::cvtTruncatePsEpu16(_mm256_loadu_ps(pBuf))); + for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pBuf, ++pOut) // Remaining pixels of line + *pOut = static_cast(*pBuf); + } + else + { + ZASSERTSTATE(bytesPerPixel == this->bytesPerChannel * 3); + redBuffer.resize(nrPixels); + greenBuffer.resize(nrPixels); + blueBuffer.resize(nrPixels); + float* pRed = redBuffer.data(); + float* pGreen = greenBuffer.data(); + float* pBlue = blueBuffer.data(); + + if (this->bytesPerChannel == 1) + { + sourceBuffer.resize(nrPixels * 3 + 32); // To avoid read access violations. + memcpy(sourceBuffer.data(), source, nrPixels * 3); + const std::uint8_t* pData = static_cast(sourceBuffer.data()); + const __m256i indices = _mm256_set_epi32(0xffff09ff, 0xffff06ff, 0xffff03ff, 0xffff00ff, 0xffff09ff, 0xffff06ff, 0xffff03ff, 0xffff00ff); + // 8 RGB pixels at once (each 8 bits) + for (size_t n = 0; n < nrPixels / 8; ++n, pData += 24, pRed += 8, pGreen += 8, pBlue += 8) + { + const __m256i data = _mm256_loadu_si256((const __m256i*)pData); // Load 8 RGB pixels of 8 bits per channel (24 bytes), 8 bytes are ignored. + const __m256i rgb = _mm256_permute2x128_si256(data, AvxSupport::shiftLeftEpi32<1>(data), 0x30); // 4 pixels (rgb) in lo lane, 4 pixels in hi lane (4 bytes gap ignored). + // We make 3 steps: for red, green, and blue. + // In each step, we directly convert the 4 color bytes to 4 ints in each 128 bit lane, then to 4 floats. + // First step: Red channel @ byte positions 9, 6, 3, 0 in each lane. + const __m256i red32 = _mm256_shuffle_epi8(rgb, indices); // 4 red values per lane as int32 (original value * 256) + const __m256i green32 = _mm256_shuffle_epi8(_mm256_srli_si256(rgb, 1), indices); // 8 x green + const __m256i blue32 = _mm256_shuffle_epi8(_mm256_srli_si256(rgb, 2), indices); // 8 x blue + // Save the color values: + _mm256_storeu_ps(pRed, _mm256_cvtepi32_ps(red32)); + _mm256_storeu_ps(pGreen, _mm256_cvtepi32_ps(green32)); + _mm256_storeu_ps(pBlue, _mm256_cvtepi32_ps(blue32)); + } + for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pRed, ++pGreen, ++pBlue, pData += 3) + { + *pRed = static_cast(static_cast(pData[0]) << 8); + *pGreen = static_cast(static_cast(pData[1]) << 8); + *pBlue = static_cast(static_cast(pData[2]) << 8); + } + } + else + { + sourceBuffer.resize(nrPixels * 6 + 32); // To avoid read access violations. + memcpy(sourceBuffer.data(), source, nrPixels * 6); + const std::uint16_t* pData = reinterpret_cast(sourceBuffer.data()); + const __m256i indices = _mm256_set_epi32(0xffffffff, 0xffffffff, 0xffff0607, 0xffff0001, 0xffffffff, 0xffffffff, 0xffff0607, 0xffff0001); + // 4 RGB pixels at once (each 16 bits) + for (size_t n = 0; n < nrPixels / 4; ++n, pData += 12, pRed += 4, pGreen += 4, pBlue += 4) + { + const __m256i data = _mm256_loadu_si256((const __m256i*)pData); // Load 4 RGB pixels of 16 bits per channel (24 bytes), 8 bytes are ignored. + const __m256i rgb = _mm256_permute2x128_si256(data, AvxSupport::shiftLeftEpi32<1>(data), 0x30); // 2 pixels (rgb) in lo lane, 2 pixels in hi lane (4 bytes gap ignored). + // 3 steps (R, G, B): Convert the 2 color WORDs to 2 ints in each 128 bit lane, be->le, then convert to float. + const __m256i red32 = _mm256_shuffle_epi8(rgb, indices); // 2 red values per lane as int32 (original value converted be -> le) + const __m256i green32 = _mm256_shuffle_epi8(_mm256_srli_si256(rgb, 2), indices); // 4 x green + const __m256i blue32 = _mm256_shuffle_epi8(_mm256_srli_si256(rgb, 4), indices); // 4 x blue + // Pack the 2 pixels in the hi lane and the 2 pixels in the lo lane, then convert to float, and store. + _mm_storeu_ps(pRed, _mm_cvtepi32_ps(_mm256_castsi256_si128(_mm256_permute4x64_epi64(red32, 0x08)))); + _mm_storeu_ps(pGreen, _mm_cvtepi32_ps(_mm256_castsi256_si128(_mm256_permute4x64_epi64(green32, 0x08)))); + _mm_storeu_ps(pBlue, _mm_cvtepi32_ps(_mm256_castsi256_si128(_mm256_permute4x64_epi64(blue32, 0x08)))); + } + for (size_t i = (nrPixels / 4) * 4; i < nrPixels; ++i, ++pRed, ++pGreen, ++pBlue, pData += 3) + { + *pRed = static_cast(_byteswap_ushort(pData[0])); + *pGreen = static_cast(_byteswap_ushort(pData[1])); + *pBlue = static_cast(_byteswap_ushort(pData[2])); + } + } + + pRed = redBuffer.data(); + pGreen = greenBuffer.data(); + pBlue = blueBuffer.data(); + const __m256 MAXIMUM = _mm256_set1_ps(static_cast(std::numeric_limits::max() - 1)); + for (size_t n = 0; n < nrPixels / 8; ++n, pRed += 8, pGreen += 8, pBlue += 8) + { + const __m256 r = _mm256_mul_ps(_mm256_loadu_ps(pRed), _mm256_set1_ps(redScale)); + const __m256 g = _mm256_mul_ps(_mm256_loadu_ps(pGreen), _mm256_set1_ps(greenScale)); + const __m256 b = _mm256_mul_ps(_mm256_loadu_ps(pBlue), _mm256_set1_ps(blueScale)); + _mm256_storeu_ps(pRed, _mm256_min_ps(r, MAXIMUM)); + _mm256_storeu_ps(pGreen, _mm256_min_ps(g, MAXIMUM)); + _mm256_storeu_ps(pBlue, _mm256_min_ps(b, MAXIMUM)); + } + for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pRed, ++pGreen, ++pBlue) + { + *pRed = adjustColor(*pRed, redScale); + *pGreen = adjustColor(*pGreen, greenScale); + *pBlue = adjustColor(*pBlue, blueScale); + } + + auto* pColor16Bitmap = dynamic_cast(pBitmap); + ZASSERTSTATE(pColor16Bitmap != nullptr); + std::uint16_t* pOutRed = pColor16Bitmap->m_Red.m_vPixels.data() + rowIndex * nrPixels; + std::uint16_t* pOutGreen = pColor16Bitmap->m_Green.m_vPixels.data() + rowIndex * nrPixels; + std::uint16_t* pOutBlue = pColor16Bitmap->m_Blue.m_vPixels.data() + rowIndex * nrPixels; + pRed = redBuffer.data(); + pGreen = greenBuffer.data(); + pBlue = blueBuffer.data(); + for (size_t n = 0; n < nrPixels / 8; ++n, pOutRed += 8, pOutGreen += 8, pOutBlue += 8, pRed += 8, pGreen += 8, pBlue += 8) + { + _mm_storeu_si128((__m128i*)pOutRed, AvxSupport::cvtTruncatePsEpu16(_mm256_loadu_ps(pRed))); + _mm_storeu_si128((__m128i*)pOutGreen, AvxSupport::cvtTruncatePsEpu16(_mm256_loadu_ps(pGreen))); + _mm_storeu_si128((__m128i*)pOutBlue, AvxSupport::cvtTruncatePsEpu16(_mm256_loadu_ps(pBlue))); + } + for (size_t i = (nrPixels / 8) * 8; i < nrPixels; ++i, ++pOutRed, ++pOutGreen, ++pOutBlue, ++pRed, ++pGreen, ++pBlue) + { + *pOutRed = static_cast(*pRed); + *pOutGreen = static_cast(*pGreen); + *pOutBlue = static_cast(*pBlue); + } + } + + //if (((rowIndex + 1) % 32) == 0 && this->pProgress != nullptr) + // this->pProgress->Progress2(static_cast(rowIndex + 1)); + + return AvxSupport::zeroUpper(nrPixels); +} diff --git a/DeepSkyStacker/avx_bitmap_filler.h b/DeepSkyStackerKernel/avx_bitmap_filler.h similarity index 97% rename from DeepSkyStacker/avx_bitmap_filler.h rename to DeepSkyStackerKernel/avx_bitmap_filler.h index ba59216f..9c70ee36 100644 --- a/DeepSkyStacker/avx_bitmap_filler.h +++ b/DeepSkyStackerKernel/avx_bitmap_filler.h @@ -1,32 +1,32 @@ -#pragma once -#include "BitMapFiller.h" - -namespace DSS { class ProgressBase; } - -class AvxBitmapFiller : public BitmapFillerBase -{ -private: - std::vector sourceBuffer; -public: - AvxBitmapFiller(CMemoryBitmap* pB, DSS::ProgressBase* pP, const double redWb, const double greenWb, const double blueWb); - AvxBitmapFiller(const AvxBitmapFiller&) = default; // For cloning. - virtual ~AvxBitmapFiller() {} - - virtual bool isThreadSafe() const override; - virtual std::unique_ptr clone() override; - - virtual size_t Write(const void* source, const size_t bytesPerPixel, const size_t nrPixels, const size_t rowIndex) override; -}; - -class NonAvxBitmapFiller : public BitmapFillerBase -{ -public: - NonAvxBitmapFiller(CMemoryBitmap* pB, DSS::ProgressBase* pP, const double redWb, const double greenWb, const double blueWb); - NonAvxBitmapFiller(const NonAvxBitmapFiller&) = default; // For cloning. - virtual ~NonAvxBitmapFiller() {} - - virtual bool isThreadSafe() const override; - virtual std::unique_ptr clone() override; - - virtual size_t Write(const void* source, const size_t bytesPerPixel, const size_t nrPixels, const size_t rowIndex) override; -}; +#pragma once +#include "BitMapFiller.h" + +namespace DSS { class ProgressBase; } + +class AvxBitmapFiller : public BitmapFillerBase +{ +private: + std::vector sourceBuffer; +public: + AvxBitmapFiller(CMemoryBitmap* pB, DSS::ProgressBase* pP, const double redWb, const double greenWb, const double blueWb); + AvxBitmapFiller(const AvxBitmapFiller&) = default; // For cloning. + virtual ~AvxBitmapFiller() {} + + virtual bool isThreadSafe() const override; + virtual std::unique_ptr clone() override; + + virtual size_t Write(const void* source, const size_t bytesPerPixel, const size_t nrPixels, const size_t rowIndex) override; +}; + +class NonAvxBitmapFiller : public BitmapFillerBase +{ +public: + NonAvxBitmapFiller(CMemoryBitmap* pB, DSS::ProgressBase* pP, const double redWb, const double greenWb, const double blueWb); + NonAvxBitmapFiller(const NonAvxBitmapFiller&) = default; // For cloning. + virtual ~NonAvxBitmapFiller() {} + + virtual bool isThreadSafe() const override; + virtual std::unique_ptr clone() override; + + virtual size_t Write(const void* source, const size_t bytesPerPixel, const size_t nrPixels, const size_t rowIndex) override; +}; diff --git a/DeepSkyStacker/avx_cfa.cpp b/DeepSkyStackerKernel/avx_cfa.cpp similarity index 97% rename from DeepSkyStacker/avx_cfa.cpp rename to DeepSkyStackerKernel/avx_cfa.cpp index 1c9d23e2..de4a4900 100644 --- a/DeepSkyStacker/avx_cfa.cpp +++ b/DeepSkyStackerKernel/avx_cfa.cpp @@ -1,208 +1,208 @@ -#include "stdafx.h" -#include "avx_cfa.h" -#include "avx_support.h" - -AvxCfaProcessing::AvxCfaProcessing(const size_t lineStart, const size_t lineEnd, CMemoryBitmap& inputbm) : - redPixels{}, - greenPixels{}, - bluePixels{}, - inputBitmap{ inputbm }, - vectorsPerLine{ 0 }, - avxReady{ AvxSupport::checkSimdAvailability() } -{ - init(lineStart, lineEnd); -} - -void AvxCfaProcessing::init(const size_t lineStart, const size_t lineEnd) // You should be sure that lineEnd >= lineStart! -{ - const size_t height = lineEnd - lineStart; - vectorsPerLine = AvxSupport::numberOfAvxVectors(inputBitmap.Width()); - const size_t nrVectors = vectorsPerLine * height; - if (nrVectors != 0 && AvxSupport{ inputBitmap }.isMonochromeCfaBitmapOfType()) - { - redPixels.resize(nrVectors); - greenPixels.resize(nrVectors); - bluePixels.resize(nrVectors); - } -} - -int AvxCfaProcessing::interpolate(const size_t lineStart, const size_t lineEnd, const int pixelSizeMultiplier) -{ - if (!avxReady) - return 1; - if (pixelSizeMultiplier != 1) - return 1; - if (!AvxSupport{ inputBitmap }.isMonochromeCfaBitmapOfType()) - return 1; - - return AvxSupport{ inputBitmap }.getCfaType() == CFATYPE_RGGB - ? Avx256CfaProcessing{ *this }.interpolateGrayCFA2Color<0>(lineStart, lineEnd) - : Avx256CfaProcessing{ *this }.interpolateGrayCFA2Color<1>(lineStart, lineEnd); -} - - -// *********************************************** -// ************ AVX-256 interpolation ************ -// *********************************************** - -template -int Avx256CfaProcessing::interpolateGrayCFA2Color(const size_t lineStart, const size_t lineEnd) -{ - if (const auto* const p{ dynamic_cast*>(&avxData.inputBitmap) }) - { - if (!p->IsCFA()) - return 1; - } - else - return 1; - if ((lineStart % 2) != 0) // Must start with an even index (RG-line). - return 2; - - const size_t width = avxData.inputBitmap.Width(); - const size_t height = avxData.inputBitmap.Height(); - if (width < 64 || height < 8) // AVX makes no sense for super-small arrays. - return 2; - - constexpr size_t VecSize = sizeof(__m256i) / sizeof(std::uint16_t); - static_assert(VecSize == 16); - const size_t nrVectors = width / VecSize; - const unsigned int remainingPixels = width % VecSize; - - __m256i thisRowCurrent, thisRowNext; // Current and next vector of current row. - __m256i prevRowCurrent, prevRowNext; // ... of previous row. - __m256i nextRowCurrent, nextRowNext; // ... of following row. - int thisRowLast, prevRowLast, nextRowLast; // Last value of the previous line. - - const AvxSupport avxSupport{ avxData.inputBitmap }; - const std::uint16_t* pGray = avxSupport.grayPixels().data() + lineStart * width; - std::uint16_t* pRed = avxData.redCfaLine(0); - std::uint16_t* pGreen = avxData.greenCfaLine(0); - std::uint16_t* pBlue = avxData.blueCfaLine(0); - std::int16_t prevRowMask = lineStart == 0 ? 0x0 : -1; - - const auto extract0 = [](const __m256i x) -> int { return _mm256_cvtsi256_si32(x); }; - const auto extract15 = [](const __m256i x) -> int { return _mm256_extract_epi16(x, 15); }; - const auto storePixel = [&pRed, &pGreen, &pBlue](const auto rgbVec) -> void - { - const auto [r, g, b] = rgbVec; - _mm256_storeu_si256((__m256i*)pRed, r); - _mm256_storeu_si256((__m256i*)pGreen, g); - _mm256_storeu_si256((__m256i*)pBlue, b); - }; - const auto loadRemainingPixels = [remainingPixels](const std::uint16_t* pGray, const bool doLoad) -> __m256i - { - if (!doLoad) - return _mm256_setzero_si256(); - __m256i vec = _mm256_setzero_si256(); - memcpy(&vec, pGray, remainingPixels * sizeof(pGray[0])); - return vec; - }; - const auto storeRemainingPixel = [remainingPixels, &pRed, &pGreen, &pBlue](const auto rgbVec) -> void - { - const auto [r, g, b] = rgbVec; - memcpy(pRed, &r, remainingPixels * sizeof(pRed[0])); - memcpy(pGreen, &g, remainingPixels * sizeof(pGreen[0])); - memcpy(pBlue, &b, remainingPixels * sizeof(pBlue[0])); - }; - - for (size_t row = 0, lineNdx = lineStart; lineNdx < lineEnd; ++row, ++lineNdx) - { - // curr. row prev. pix/curr. row curr. vector/curr. row next pix./prev. row prev. pix./prev. row current vec./prev. row next pix. ... - const auto debayer = [row](const int thisP, const __m256i thisCurr, const int thisN, - const int prevP, const __m256i prevCurr, const int prevN, const int nextP, const __m256i nextCurr, const int nextN) -> std::tuple<__m256i, __m256i, __m256i> - { - const __m256i currRight = AvxSupport::shl1Epi16(thisCurr, thisP); // One pixel right is shifting to higher address. This is a shiftLeft(...). - const __m256i currLeft = AvxSupport::shr1Epi16(thisCurr, thisN); - const __m256i prevRight = AvxSupport::shl1Epi16(prevCurr, prevP); - const __m256i prevLeft = AvxSupport::shr1Epi16(prevCurr, prevN); - const __m256i nextRight = AvxSupport::shl1Epi16(nextCurr, nextP); - const __m256i nextLeft = AvxSupport::shr1Epi16(nextCurr, nextN); - const __m256i LRinterpol = _mm256_avg_epu16(currRight, currLeft); - const __m256i UDinterpol = _mm256_avg_epu16(prevCurr, nextCurr); - const __m256i crossInterpol = _mm256_avg_epu16(_mm256_avg_epu16(prevRight, prevLeft), _mm256_avg_epu16(nextRight, nextLeft)); - const __m256i greenInterpol = _mm256_avg_epu16(UDinterpol, LRinterpol); - - // RGGB pattern: RG_ROW==0 -> even row -> RG-line - // GBRG pattern: RG_ROW==1 -> odd row -> RG-line - if ((row % 2) == RG_ROW) - { - const __m256i red = _mm256_blend_epi16(thisCurr, LRinterpol, 0xaa); // 0b10101010 = 0xaa - const __m256i green = _mm256_blend_epi16(greenInterpol, thisCurr, 0xaa); - const __m256i blue = _mm256_blend_epi16(crossInterpol, UDinterpol, 0xaa); - return { red, green, blue }; - } - else // GB-line - { - const __m256i red = _mm256_blend_epi16(UDinterpol, crossInterpol, 0xaa); - const __m256i green = _mm256_blend_epi16(thisCurr, greenInterpol, 0xaa); - const __m256i blue = _mm256_blend_epi16(LRinterpol, thisCurr, 0xaa); - return { red, green, blue }; - } - }; - - const std::int16_t nextRowMask = lineNdx == height - 1 ? 0x0 : -1; // Prevent loading pixels of the following row if the current one is already the last. - - thisRowCurrent = _mm256_setzero_si256(); - thisRowNext = _mm256_loadu_si256((__m256i*)pGray); - thisRowLast = 0; - prevRowCurrent = _mm256_setzero_si256(); - prevRowNext = _mm256_maskload_epi32((int*)(pGray - width), _mm256_set1_epi32(prevRowMask)); // Load entire vector or nothing. - prevRowLast = 0; - nextRowCurrent = _mm256_setzero_si256(); - nextRowNext = _mm256_maskload_epi32((int*)(pGray + width), _mm256_set1_epi32(nextRowMask)); // Load entire vector or nothing. - nextRowLast = 0; - - pRed = avxData.redCfaLine(row); - pGreen = avxData.greenCfaLine(row); - pBlue = avxData.blueCfaLine(row); - - for (size_t n = 1; n < nrVectors; ++n, pGray += VecSize, pRed += VecSize, pGreen += VecSize, pBlue += VecSize) // nrVectors - 1 iterations - { - thisRowLast = extract15(thisRowCurrent); - thisRowCurrent = thisRowNext; - thisRowNext = _mm256_loadu_si256((__m256i*)(pGray + VecSize)); - prevRowLast = extract15(prevRowCurrent); - prevRowCurrent = prevRowNext; - prevRowNext = _mm256_maskload_epi32((int*)(pGray + VecSize - width), _mm256_set1_epi32(prevRowMask)); - nextRowLast = extract15(nextRowCurrent); - nextRowCurrent = nextRowNext; - nextRowNext = _mm256_maskload_epi32((int*)(pGray + VecSize + width), _mm256_set1_epi32(nextRowMask)); - - storePixel(debayer(thisRowLast, thisRowCurrent, extract0(thisRowNext), prevRowLast, prevRowCurrent, extract0(prevRowNext), nextRowLast, nextRowCurrent, extract0(nextRowNext))); - } - - thisRowLast = extract15(thisRowCurrent); - thisRowCurrent = thisRowNext; - prevRowLast = extract15(prevRowCurrent); - prevRowCurrent = prevRowNext; - nextRowLast = extract15(nextRowCurrent); - nextRowCurrent = nextRowNext; - - if (remainingPixels == 0) - { - // Last full vector - storePixel(debayer(thisRowLast, thisRowCurrent, 0, prevRowLast, prevRowCurrent, 0, nextRowLast, nextRowCurrent, 0)); - pGray += VecSize; - } - else - { - thisRowNext = loadRemainingPixels(pGray + VecSize, true); - prevRowNext = loadRemainingPixels(pGray + VecSize - width, prevRowMask != 0); - nextRowNext = loadRemainingPixels(pGray + VecSize + width, nextRowMask != 0); - - // Last full vector - storePixel(debayer(thisRowLast, thisRowCurrent, extract0(thisRowNext), prevRowLast, prevRowCurrent, extract0(prevRowNext), nextRowLast, nextRowCurrent, extract0(nextRowNext))); - - pGray += VecSize + remainingPixels; - pRed += VecSize; - pGreen += VecSize; - pBlue += VecSize; - - // Process last few pixels that are less than a full vector. - storeRemainingPixel(debayer(extract15(thisRowCurrent), thisRowNext, 0, extract15(prevRowCurrent), prevRowNext, 0, extract15(nextRowCurrent), nextRowNext, 0)); - } - - prevRowMask = -1; - } - return 0; -} +#include "stdafx.h" +#include "avx_cfa.h" +#include "avx_support.h" + +AvxCfaProcessing::AvxCfaProcessing(const size_t lineStart, const size_t lineEnd, CMemoryBitmap& inputbm) : + redPixels{}, + greenPixels{}, + bluePixels{}, + inputBitmap{ inputbm }, + vectorsPerLine{ 0 }, + avxReady{ AvxSupport::checkSimdAvailability() } +{ + init(lineStart, lineEnd); +} + +void AvxCfaProcessing::init(const size_t lineStart, const size_t lineEnd) // You should be sure that lineEnd >= lineStart! +{ + const size_t height = lineEnd - lineStart; + vectorsPerLine = AvxSupport::numberOfAvxVectors(inputBitmap.Width()); + const size_t nrVectors = vectorsPerLine * height; + if (nrVectors != 0 && AvxSupport{ inputBitmap }.isMonochromeCfaBitmapOfType()) + { + redPixels.resize(nrVectors); + greenPixels.resize(nrVectors); + bluePixels.resize(nrVectors); + } +} + +int AvxCfaProcessing::interpolate(const size_t lineStart, const size_t lineEnd, const int pixelSizeMultiplier) +{ + if (!avxReady) + return 1; + if (pixelSizeMultiplier != 1) + return 1; + if (!AvxSupport{ inputBitmap }.isMonochromeCfaBitmapOfType()) + return 1; + + return AvxSupport{ inputBitmap }.getCfaType() == CFATYPE_RGGB + ? Avx256CfaProcessing{ *this }.interpolateGrayCFA2Color<0>(lineStart, lineEnd) + : Avx256CfaProcessing{ *this }.interpolateGrayCFA2Color<1>(lineStart, lineEnd); +} + + +// *********************************************** +// ************ AVX-256 interpolation ************ +// *********************************************** + +template +int Avx256CfaProcessing::interpolateGrayCFA2Color(const size_t lineStart, const size_t lineEnd) +{ + if (const auto* const p{ dynamic_cast*>(&avxData.inputBitmap) }) + { + if (!p->IsCFA()) + return 1; + } + else + return 1; + if ((lineStart % 2) != 0) // Must start with an even index (RG-line). + return 2; + + const size_t width = avxData.inputBitmap.Width(); + const size_t height = avxData.inputBitmap.Height(); + if (width < 64 || height < 8) // AVX makes no sense for super-small arrays. + return 2; + + constexpr size_t VecSize = sizeof(__m256i) / sizeof(std::uint16_t); + static_assert(VecSize == 16); + const size_t nrVectors = width / VecSize; + const unsigned int remainingPixels = width % VecSize; + + __m256i thisRowCurrent, thisRowNext; // Current and next vector of current row. + __m256i prevRowCurrent, prevRowNext; // ... of previous row. + __m256i nextRowCurrent, nextRowNext; // ... of following row. + int thisRowLast, prevRowLast, nextRowLast; // Last value of the previous line. + + const AvxSupport avxSupport{ avxData.inputBitmap }; + const std::uint16_t* pGray = avxSupport.grayPixels().data() + lineStart * width; + std::uint16_t* pRed = avxData.redCfaLine(0); + std::uint16_t* pGreen = avxData.greenCfaLine(0); + std::uint16_t* pBlue = avxData.blueCfaLine(0); + std::int16_t prevRowMask = lineStart == 0 ? 0x0 : -1; + + const auto extract0 = [](const __m256i x) -> int { return _mm256_cvtsi256_si32(x); }; + const auto extract15 = [](const __m256i x) -> int { return _mm256_extract_epi16(x, 15); }; + const auto storePixel = [&pRed, &pGreen, &pBlue](const auto rgbVec) -> void + { + const auto [r, g, b] = rgbVec; + _mm256_storeu_si256((__m256i*)pRed, r); + _mm256_storeu_si256((__m256i*)pGreen, g); + _mm256_storeu_si256((__m256i*)pBlue, b); + }; + const auto loadRemainingPixels = [remainingPixels](const std::uint16_t* pGray, const bool doLoad) -> __m256i + { + if (!doLoad) + return _mm256_setzero_si256(); + __m256i vec = _mm256_setzero_si256(); + memcpy(&vec, pGray, remainingPixels * sizeof(pGray[0])); + return vec; + }; + const auto storeRemainingPixel = [remainingPixels, &pRed, &pGreen, &pBlue](const auto rgbVec) -> void + { + const auto [r, g, b] = rgbVec; + memcpy(pRed, &r, remainingPixels * sizeof(pRed[0])); + memcpy(pGreen, &g, remainingPixels * sizeof(pGreen[0])); + memcpy(pBlue, &b, remainingPixels * sizeof(pBlue[0])); + }; + + for (size_t row = 0, lineNdx = lineStart; lineNdx < lineEnd; ++row, ++lineNdx) + { + // curr. row prev. pix/curr. row curr. vector/curr. row next pix./prev. row prev. pix./prev. row current vec./prev. row next pix. ... + const auto debayer = [row](const int thisP, const __m256i thisCurr, const int thisN, + const int prevP, const __m256i prevCurr, const int prevN, const int nextP, const __m256i nextCurr, const int nextN) -> std::tuple<__m256i, __m256i, __m256i> + { + const __m256i currRight = AvxSupport::shl1Epi16(thisCurr, thisP); // One pixel right is shifting to higher address. This is a shiftLeft(...). + const __m256i currLeft = AvxSupport::shr1Epi16(thisCurr, thisN); + const __m256i prevRight = AvxSupport::shl1Epi16(prevCurr, prevP); + const __m256i prevLeft = AvxSupport::shr1Epi16(prevCurr, prevN); + const __m256i nextRight = AvxSupport::shl1Epi16(nextCurr, nextP); + const __m256i nextLeft = AvxSupport::shr1Epi16(nextCurr, nextN); + const __m256i LRinterpol = _mm256_avg_epu16(currRight, currLeft); + const __m256i UDinterpol = _mm256_avg_epu16(prevCurr, nextCurr); + const __m256i crossInterpol = _mm256_avg_epu16(_mm256_avg_epu16(prevRight, prevLeft), _mm256_avg_epu16(nextRight, nextLeft)); + const __m256i greenInterpol = _mm256_avg_epu16(UDinterpol, LRinterpol); + + // RGGB pattern: RG_ROW==0 -> even row -> RG-line + // GBRG pattern: RG_ROW==1 -> odd row -> RG-line + if ((row % 2) == RG_ROW) + { + const __m256i red = _mm256_blend_epi16(thisCurr, LRinterpol, 0xaa); // 0b10101010 = 0xaa + const __m256i green = _mm256_blend_epi16(greenInterpol, thisCurr, 0xaa); + const __m256i blue = _mm256_blend_epi16(crossInterpol, UDinterpol, 0xaa); + return { red, green, blue }; + } + else // GB-line + { + const __m256i red = _mm256_blend_epi16(UDinterpol, crossInterpol, 0xaa); + const __m256i green = _mm256_blend_epi16(thisCurr, greenInterpol, 0xaa); + const __m256i blue = _mm256_blend_epi16(LRinterpol, thisCurr, 0xaa); + return { red, green, blue }; + } + }; + + const std::int16_t nextRowMask = lineNdx == height - 1 ? 0x0 : -1; // Prevent loading pixels of the following row if the current one is already the last. + + thisRowCurrent = _mm256_setzero_si256(); + thisRowNext = _mm256_loadu_si256((__m256i*)pGray); + thisRowLast = 0; + prevRowCurrent = _mm256_setzero_si256(); + prevRowNext = _mm256_maskload_epi32((int*)(pGray - width), _mm256_set1_epi32(prevRowMask)); // Load entire vector or nothing. + prevRowLast = 0; + nextRowCurrent = _mm256_setzero_si256(); + nextRowNext = _mm256_maskload_epi32((int*)(pGray + width), _mm256_set1_epi32(nextRowMask)); // Load entire vector or nothing. + nextRowLast = 0; + + pRed = avxData.redCfaLine(row); + pGreen = avxData.greenCfaLine(row); + pBlue = avxData.blueCfaLine(row); + + for (size_t n = 1; n < nrVectors; ++n, pGray += VecSize, pRed += VecSize, pGreen += VecSize, pBlue += VecSize) // nrVectors - 1 iterations + { + thisRowLast = extract15(thisRowCurrent); + thisRowCurrent = thisRowNext; + thisRowNext = _mm256_loadu_si256((__m256i*)(pGray + VecSize)); + prevRowLast = extract15(prevRowCurrent); + prevRowCurrent = prevRowNext; + prevRowNext = _mm256_maskload_epi32((int*)(pGray + VecSize - width), _mm256_set1_epi32(prevRowMask)); + nextRowLast = extract15(nextRowCurrent); + nextRowCurrent = nextRowNext; + nextRowNext = _mm256_maskload_epi32((int*)(pGray + VecSize + width), _mm256_set1_epi32(nextRowMask)); + + storePixel(debayer(thisRowLast, thisRowCurrent, extract0(thisRowNext), prevRowLast, prevRowCurrent, extract0(prevRowNext), nextRowLast, nextRowCurrent, extract0(nextRowNext))); + } + + thisRowLast = extract15(thisRowCurrent); + thisRowCurrent = thisRowNext; + prevRowLast = extract15(prevRowCurrent); + prevRowCurrent = prevRowNext; + nextRowLast = extract15(nextRowCurrent); + nextRowCurrent = nextRowNext; + + if (remainingPixels == 0) + { + // Last full vector + storePixel(debayer(thisRowLast, thisRowCurrent, 0, prevRowLast, prevRowCurrent, 0, nextRowLast, nextRowCurrent, 0)); + pGray += VecSize; + } + else + { + thisRowNext = loadRemainingPixels(pGray + VecSize, true); + prevRowNext = loadRemainingPixels(pGray + VecSize - width, prevRowMask != 0); + nextRowNext = loadRemainingPixels(pGray + VecSize + width, nextRowMask != 0); + + // Last full vector + storePixel(debayer(thisRowLast, thisRowCurrent, extract0(thisRowNext), prevRowLast, prevRowCurrent, extract0(prevRowNext), nextRowLast, nextRowCurrent, extract0(nextRowNext))); + + pGray += VecSize + remainingPixels; + pRed += VecSize; + pGreen += VecSize; + pBlue += VecSize; + + // Process last few pixels that are less than a full vector. + storeRemainingPixel(debayer(extract15(thisRowCurrent), thisRowNext, 0, extract15(prevRowCurrent), prevRowNext, 0, extract15(nextRowCurrent), nextRowNext, 0)); + } + + prevRowMask = -1; + } + return 0; +} diff --git a/DeepSkyStacker/avx_cfa.h b/DeepSkyStackerKernel/avx_cfa.h similarity index 97% rename from DeepSkyStacker/avx_cfa.h rename to DeepSkyStackerKernel/avx_cfa.h index 65a58db6..4a18383a 100644 --- a/DeepSkyStacker/avx_cfa.h +++ b/DeepSkyStackerKernel/avx_cfa.h @@ -1,79 +1,79 @@ -#pragma once - -#include "BitmapBase.h" - -class AvxCfaProcessing -{ -private: - typedef __m512i VectorElementType; - typedef std::vector VectorType; - - friend class Avx256CfaProcessing; - - VectorType redPixels; - VectorType greenPixels; - VectorType bluePixels; - CMemoryBitmap& inputBitmap; - size_t vectorsPerLine; - bool avxReady; -public: - AvxCfaProcessing() = delete; - AvxCfaProcessing(const size_t lineStart, const size_t lineEnd, CMemoryBitmap& inputbm); - AvxCfaProcessing(const AvxCfaProcessing&) = default; - AvxCfaProcessing(AvxCfaProcessing&&) = delete; - AvxCfaProcessing& operator=(const AvxCfaProcessing&) = delete; - - void init(const size_t lineStart, const size_t lineEnd); - int interpolate(const size_t lineStart, const size_t lineEnd, const int pixelSizeMultiplier); - - inline size_t nrVectorsPerLine() const { return this->vectorsPerLine; } - - template - inline const T* redCfaLine(const size_t rowIndex) const - { - if constexpr (std::is_same::value) - return reinterpret_cast(&this->redPixels[rowIndex * vectorsPerLine]); - else - return nullptr; - } - template - inline const T* greenCfaLine(const size_t rowIndex) const - { - if constexpr (std::is_same::value) - return reinterpret_cast(&this->greenPixels[rowIndex * vectorsPerLine]); - else - return nullptr; - } - template - inline const T* blueCfaLine(const size_t rowIndex) const - { - if constexpr (std::is_same::value) - return reinterpret_cast(&this->bluePixels[rowIndex * vectorsPerLine]); - else - return nullptr; - } - - inline const VectorElementType* redCfaBlock() const { return this->redPixels.data(); } - inline const VectorElementType* greenCfaBlock() const { return this->greenPixels.data(); } - inline const VectorElementType* blueCfaBlock() const { return this->bluePixels.data(); } -private: - std::uint16_t* redCfaLine(const size_t rowIndex) { return const_cast(static_cast(this)->redCfaLine(rowIndex)); } - std::uint16_t* greenCfaLine(const size_t rowIndex) { return const_cast(static_cast(this)->greenCfaLine(rowIndex)); } - std::uint16_t* blueCfaLine(const size_t rowIndex) { return const_cast(static_cast(this)->blueCfaLine(rowIndex)); } -}; - -// ********************************* -// ************ AVX-256 ************ -// ********************************* - -class Avx256CfaProcessing -{ -private: - friend class AvxCfaProcessing; - - AvxCfaProcessing& avxData; - Avx256CfaProcessing(AvxCfaProcessing& ad) : avxData{ ad } {} - - template // RG_ROW==0 for RGGB pattern, RG_ROW==1 for GBRG pattern. - int interpolateGrayCFA2Color(const size_t lineStart, const size_t lineEnd); -}; +#pragma once + +#include "BitmapBase.h" + +class AvxCfaProcessing +{ +private: + typedef __m512i VectorElementType; + typedef std::vector VectorType; + + friend class Avx256CfaProcessing; + + VectorType redPixels; + VectorType greenPixels; + VectorType bluePixels; + CMemoryBitmap& inputBitmap; + size_t vectorsPerLine; + bool avxReady; +public: + AvxCfaProcessing() = delete; + AvxCfaProcessing(const size_t lineStart, const size_t lineEnd, CMemoryBitmap& inputbm); + AvxCfaProcessing(const AvxCfaProcessing&) = default; + AvxCfaProcessing(AvxCfaProcessing&&) = delete; + AvxCfaProcessing& operator=(const AvxCfaProcessing&) = delete; + + void init(const size_t lineStart, const size_t lineEnd); + int interpolate(const size_t lineStart, const size_t lineEnd, const int pixelSizeMultiplier); + + inline size_t nrVectorsPerLine() const { return this->vectorsPerLine; } + + template + inline const T* redCfaLine(const size_t rowIndex) const + { + if constexpr (std::is_same::value) + return reinterpret_cast(&this->redPixels[rowIndex * vectorsPerLine]); + else + return nullptr; + } + template + inline const T* greenCfaLine(const size_t rowIndex) const + { + if constexpr (std::is_same::value) + return reinterpret_cast(&this->greenPixels[rowIndex * vectorsPerLine]); + else + return nullptr; + } + template + inline const T* blueCfaLine(const size_t rowIndex) const + { + if constexpr (std::is_same::value) + return reinterpret_cast(&this->bluePixels[rowIndex * vectorsPerLine]); + else + return nullptr; + } + + inline const VectorElementType* redCfaBlock() const { return this->redPixels.data(); } + inline const VectorElementType* greenCfaBlock() const { return this->greenPixels.data(); } + inline const VectorElementType* blueCfaBlock() const { return this->bluePixels.data(); } +private: + std::uint16_t* redCfaLine(const size_t rowIndex) { return const_cast(static_cast(this)->redCfaLine(rowIndex)); } + std::uint16_t* greenCfaLine(const size_t rowIndex) { return const_cast(static_cast(this)->greenCfaLine(rowIndex)); } + std::uint16_t* blueCfaLine(const size_t rowIndex) { return const_cast(static_cast(this)->blueCfaLine(rowIndex)); } +}; + +// ********************************* +// ************ AVX-256 ************ +// ********************************* + +class Avx256CfaProcessing +{ +private: + friend class AvxCfaProcessing; + + AvxCfaProcessing& avxData; + Avx256CfaProcessing(AvxCfaProcessing& ad) : avxData{ ad } {} + + template // RG_ROW==0 for RGGB pattern, RG_ROW==1 for GBRG pattern. + int interpolateGrayCFA2Color(const size_t lineStart, const size_t lineEnd); +}; diff --git a/DeepSkyStacker/avx_entropy.cpp b/DeepSkyStackerKernel/avx_entropy.cpp similarity index 97% rename from DeepSkyStacker/avx_entropy.cpp rename to DeepSkyStackerKernel/avx_entropy.cpp index 4b7b64c2..7e75a30f 100644 --- a/DeepSkyStacker/avx_entropy.cpp +++ b/DeepSkyStackerKernel/avx_entropy.cpp @@ -1,171 +1,171 @@ -#include "stdafx.h" -#include "avx_entropy.h" -#include "avx_support.h" -#include "avx_cfa.h" -#include "avx_histogram.h" -#include "Multitask.h" - -AvxEntropy::AvxEntropy(CMemoryBitmap& inputbm, const CEntropyInfo& entrinfo, CMemoryBitmap* entropycov) : - inputBitmap{ inputbm }, - entropyInfo{ entrinfo }, - pEntropyCoverage{ entropycov }, - avxReady{ AvxSupport::checkSimdAvailability() } -{ - if (pEntropyCoverage != nullptr && avxReady) - { - const size_t width = pEntropyCoverage->Width(); - const size_t height = pEntropyCoverage->Height(); - static_assert(std::is_same_v && std::is_same_v); - const size_t nrVectors = AvxSupport::numberOfAvxVectors(width); - redEntropyLayer.resize(height * nrVectors); - if (AvxSupport{ *pEntropyCoverage }.isColorBitmap()) - { - greenEntropyLayer.resize(height * nrVectors); - blueEntropyLayer.resize(height * nrVectors); - } - } -} - -int AvxEntropy::calcEntropies(const int squareSize, const int nSquaresX, const int nSquaresY, EntropyVectorType& redEntropies, EntropyVectorType& greenEntropies, EntropyVectorType& blueEntropies) -{ - if (!avxReady) - return 1; - - int rval = 1; - if (doCalcEntropies(squareSize, nSquaresX, nSquaresY, redEntropies, greenEntropies, blueEntropies) == 0 - || doCalcEntropies(squareSize, nSquaresX, nSquaresY, redEntropies, greenEntropies, blueEntropies) == 0 - || doCalcEntropies(squareSize, nSquaresX, nSquaresY, redEntropies, greenEntropies, blueEntropies) == 0) - { - rval = 0; - } - return AvxSupport::zeroUpper(rval); -} - -template -int AvxEntropy::doCalcEntropies(const int squareSize, const int nSquaresX, const int nSquaresY, EntropyVectorType& redEntropies, EntropyVectorType& greenEntropies, EntropyVectorType& blueEntropies) -{ - // Check input bitmap. - const AvxSupport avxInputSupport{ inputBitmap }; - if (!avxInputSupport.isColorBitmapOfType() && !avxInputSupport.isMonochromeBitmapOfType()) // Monochrome includes CFA - return 1; - - constexpr int vectorLen = 16; - const int width = inputBitmap.Width(); - const int height = inputBitmap.Height(); - - const auto getDistribution = [](const auto& histogram, T value) -> float - { - constexpr size_t Unsigned_short_max = size_t{ std::numeric_limits::max() }; - if constexpr (std::is_integral::value && sizeof(T) == 4) // 32 bit integral type - value >>= 16; - return static_cast(histogram[std::min(static_cast(value), Unsigned_short_max)]); - }; - - const auto calcEntropyOfSquare = [squareSize, width, height, vectorLen, &getDistribution](const int col, const int row, const T* const pColor, auto& histogram) -> EntropyVectorType::value_type - { - const int xmin = col * squareSize; - const int xmax = std::min(xmin + squareSize, width); - const int nx = xmax - xmin; - const int ymin = row * squareSize; - const int ymax = std::min(ymin + squareSize, height); - const int nrVectors = nx / vectorLen; - memset(histogram.data(), 0, histogram.size() * sizeof(histogram[0])); - - for (int y = ymin; y < ymax; ++y) - { - const T* p = pColor + y * width + xmin; - for (int n = 0; n < nrVectors; ++n, p += vectorLen) - { - const auto [lo, hi] = AvxSupport::read16PackedInt(p); - AvxHistogram::calcHistoOfVectorEpi32(lo, histogram); - AvxHistogram::calcHistoOfVectorEpi32(hi, histogram); - } - // Rest of line - for (int x = xmin + nrVectors * vectorLen; x < xmax; ++x, ++p) - AvxHistogram::addToHisto(histogram, *p); - } - - const float N = static_cast(nx * (ymax - ymin)); - const float lnN = std::log(N); - float entropy = 0.0f; - __m256 avxEntropy = _mm256_setzero_ps(); - const int* const pHisto = histogram.data(); - - for (int y = ymin; y < ymax; ++y) - { - const T* p = pColor + y * width + xmin; - for (int n = 0; n < nrVectors; ++n, p += vectorLen) - { - const auto [lo, hi] = AvxSupport::read16PackedInt(p); - const __m256 lh = _mm256_cvtepi32_ps(_mm256_i32gather_epi32(pHisto, lo, 4)); - const __m256 hh = _mm256_cvtepi32_ps(_mm256_i32gather_epi32(pHisto, hi, 4)); - const __m256 r0 = _mm256_fmadd_ps(lh, _mm256_sub_ps(_mm256_set1_ps(lnN), _mm256_log_ps(lh)), avxEntropy); - avxEntropy = _mm256_fmadd_ps(hh, _mm256_sub_ps(_mm256_set1_ps(lnN), _mm256_log_ps(hh)), r0); - } - // Rest of line adds to float entropy. - for (int x = xmin + nrVectors * vectorLen; x < xmax; ++x, ++p) - { - const float d = getDistribution(histogram, *p); - entropy += d * (lnN - std::log(d)); - } - } - // Accumulate float entropy and horizontal sum of avxEntropy. - const __m256 r0 = _mm256_hadd_ps(_mm256_hadd_ps(avxEntropy, _mm256_setzero_ps()), _mm256_setzero_ps()); // ., ., ., e4+e5+e6+e7, ., ., ., e0+e1+e2+e3 - entropy += _mm_cvtss_f32(_mm_add_ps(_mm256_castps256_ps128(r0), _mm256_extractf128_ps(r0, 1))); - - return entropy / (N * std::log(2.0f)); - }; - - const auto calcEntropy = [nSquaresX, nSquaresY, &calcEntropyOfSquare](const T* const pColor, EntropyVectorType& entropyVector) -> void - { -#pragma warning (suppress: 4189) - const int nrEnabledThreads = CMultitask::GetNrProcessors(); // Returns 1 if multithreading disabled by user, otherwise # HW threads - constexpr size_t HistoSize = std::numeric_limits::max() + size_t{ 1 }; - std::vector histogram(HistoSize, 0); - -#pragma omp parallel for default(none) firstprivate(histogram) schedule(dynamic, 50) if(nrEnabledThreads - 1) - for (int y = 0; y < nSquaresY; ++y) - { - for (int x = 0, ndx = y * nSquaresX; x < nSquaresX; ++x, ++ndx) - { - entropyVector[ndx] = calcEntropyOfSquare(x, y, pColor, histogram); - } - } - }; - - const bool isCFA = avxInputSupport.isMonochromeCfaBitmapOfType(); - - if (avxInputSupport.isColorBitmapOfType() || isCFA) - { - AvxCfaProcessing avxCfa{ 0, 0, inputBitmap }; - if (isCFA) - { - const size_t lineEnd = inputBitmap.Height(); - avxCfa.init(0, lineEnd); - avxCfa.interpolate(0, lineEnd, 1); - } - - const T* pRedPixels = isCFA ? avxCfa.redCfaLine(0) : &avxInputSupport.redPixels().at(0); - const T* pGreenPixels = isCFA ? avxCfa.greenCfaLine(0) : &avxInputSupport.greenPixels().at(0); - const T* pBluePixels = isCFA ? avxCfa.blueCfaLine(0) : &avxInputSupport.bluePixels().at(0); - - calcEntropy(pRedPixels, redEntropies); - calcEntropy(pGreenPixels, greenEntropies); - calcEntropy(pBluePixels, blueEntropies); - - return 0; - } - - if (avxInputSupport.isMonochromeBitmapOfType()) - { - const T* pGrayPixels = &avxInputSupport.grayPixels().at(0); - calcEntropy(pGrayPixels, redEntropies); - - memcpy(&greenEntropies[0], &redEntropies[0], redEntropies.size() * sizeof(EntropyVectorType::value_type)); - memcpy(&blueEntropies[0], &redEntropies[0], redEntropies.size() * sizeof(EntropyVectorType::value_type)); - - return 0; - } - - return 1; -} +#include "stdafx.h" +#include "avx_entropy.h" +#include "avx_support.h" +#include "avx_cfa.h" +#include "avx_histogram.h" +#include "Multitask.h" + +AvxEntropy::AvxEntropy(CMemoryBitmap& inputbm, const CEntropyInfo& entrinfo, CMemoryBitmap* entropycov) : + inputBitmap{ inputbm }, + entropyInfo{ entrinfo }, + pEntropyCoverage{ entropycov }, + avxReady{ AvxSupport::checkSimdAvailability() } +{ + if (pEntropyCoverage != nullptr && avxReady) + { + const size_t width = pEntropyCoverage->Width(); + const size_t height = pEntropyCoverage->Height(); + static_assert(std::is_same_v && std::is_same_v); + const size_t nrVectors = AvxSupport::numberOfAvxVectors(width); + redEntropyLayer.resize(height * nrVectors); + if (AvxSupport{ *pEntropyCoverage }.isColorBitmap()) + { + greenEntropyLayer.resize(height * nrVectors); + blueEntropyLayer.resize(height * nrVectors); + } + } +} + +int AvxEntropy::calcEntropies(const int squareSize, const int nSquaresX, const int nSquaresY, EntropyVectorType& redEntropies, EntropyVectorType& greenEntropies, EntropyVectorType& blueEntropies) +{ + if (!avxReady) + return 1; + + int rval = 1; + if (doCalcEntropies(squareSize, nSquaresX, nSquaresY, redEntropies, greenEntropies, blueEntropies) == 0 + || doCalcEntropies(squareSize, nSquaresX, nSquaresY, redEntropies, greenEntropies, blueEntropies) == 0 + || doCalcEntropies(squareSize, nSquaresX, nSquaresY, redEntropies, greenEntropies, blueEntropies) == 0) + { + rval = 0; + } + return AvxSupport::zeroUpper(rval); +} + +template +int AvxEntropy::doCalcEntropies(const int squareSize, const int nSquaresX, const int nSquaresY, EntropyVectorType& redEntropies, EntropyVectorType& greenEntropies, EntropyVectorType& blueEntropies) +{ + // Check input bitmap. + const AvxSupport avxInputSupport{ inputBitmap }; + if (!avxInputSupport.isColorBitmapOfType() && !avxInputSupport.isMonochromeBitmapOfType()) // Monochrome includes CFA + return 1; + + constexpr int vectorLen = 16; + const int width = inputBitmap.Width(); + const int height = inputBitmap.Height(); + + const auto getDistribution = [](const auto& histogram, T value) -> float + { + constexpr size_t Unsigned_short_max = size_t{ std::numeric_limits::max() }; + if constexpr (std::is_integral::value && sizeof(T) == 4) // 32 bit integral type + value >>= 16; + return static_cast(histogram[std::min(static_cast(value), Unsigned_short_max)]); + }; + + const auto calcEntropyOfSquare = [squareSize, width, height, vectorLen, &getDistribution](const int col, const int row, const T* const pColor, auto& histogram) -> EntropyVectorType::value_type + { + const int xmin = col * squareSize; + const int xmax = std::min(xmin + squareSize, width); + const int nx = xmax - xmin; + const int ymin = row * squareSize; + const int ymax = std::min(ymin + squareSize, height); + const int nrVectors = nx / vectorLen; + memset(histogram.data(), 0, histogram.size() * sizeof(histogram[0])); + + for (int y = ymin; y < ymax; ++y) + { + const T* p = pColor + y * width + xmin; + for (int n = 0; n < nrVectors; ++n, p += vectorLen) + { + const auto [lo, hi] = AvxSupport::read16PackedInt(p); + AvxHistogram::calcHistoOfVectorEpi32(lo, histogram); + AvxHistogram::calcHistoOfVectorEpi32(hi, histogram); + } + // Rest of line + for (int x = xmin + nrVectors * vectorLen; x < xmax; ++x, ++p) + AvxHistogram::addToHisto(histogram, *p); + } + + const float N = static_cast(nx * (ymax - ymin)); + const float lnN = std::log(N); + float entropy = 0.0f; + __m256 avxEntropy = _mm256_setzero_ps(); + const int* const pHisto = histogram.data(); + + for (int y = ymin; y < ymax; ++y) + { + const T* p = pColor + y * width + xmin; + for (int n = 0; n < nrVectors; ++n, p += vectorLen) + { + const auto [lo, hi] = AvxSupport::read16PackedInt(p); + const __m256 lh = _mm256_cvtepi32_ps(_mm256_i32gather_epi32(pHisto, lo, 4)); + const __m256 hh = _mm256_cvtepi32_ps(_mm256_i32gather_epi32(pHisto, hi, 4)); + const __m256 r0 = _mm256_fmadd_ps(lh, _mm256_sub_ps(_mm256_set1_ps(lnN), _mm256_log_ps(lh)), avxEntropy); + avxEntropy = _mm256_fmadd_ps(hh, _mm256_sub_ps(_mm256_set1_ps(lnN), _mm256_log_ps(hh)), r0); + } + // Rest of line adds to float entropy. + for (int x = xmin + nrVectors * vectorLen; x < xmax; ++x, ++p) + { + const float d = getDistribution(histogram, *p); + entropy += d * (lnN - std::log(d)); + } + } + // Accumulate float entropy and horizontal sum of avxEntropy. + const __m256 r0 = _mm256_hadd_ps(_mm256_hadd_ps(avxEntropy, _mm256_setzero_ps()), _mm256_setzero_ps()); // ., ., ., e4+e5+e6+e7, ., ., ., e0+e1+e2+e3 + entropy += _mm_cvtss_f32(_mm_add_ps(_mm256_castps256_ps128(r0), _mm256_extractf128_ps(r0, 1))); + + return entropy / (N * std::log(2.0f)); + }; + + const auto calcEntropy = [nSquaresX, nSquaresY, &calcEntropyOfSquare](const T* const pColor, EntropyVectorType& entropyVector) -> void + { +#pragma warning (suppress: 4189) + const int nrEnabledThreads = CMultitask::GetNrProcessors(); // Returns 1 if multithreading disabled by user, otherwise # HW threads + constexpr size_t HistoSize = std::numeric_limits::max() + size_t{ 1 }; + std::vector histogram(HistoSize, 0); + +#pragma omp parallel for default(none) firstprivate(histogram) schedule(dynamic, 50) if(nrEnabledThreads - 1) + for (int y = 0; y < nSquaresY; ++y) + { + for (int x = 0, ndx = y * nSquaresX; x < nSquaresX; ++x, ++ndx) + { + entropyVector[ndx] = calcEntropyOfSquare(x, y, pColor, histogram); + } + } + }; + + const bool isCFA = avxInputSupport.isMonochromeCfaBitmapOfType(); + + if (avxInputSupport.isColorBitmapOfType() || isCFA) + { + AvxCfaProcessing avxCfa{ 0, 0, inputBitmap }; + if (isCFA) + { + const size_t lineEnd = inputBitmap.Height(); + avxCfa.init(0, lineEnd); + avxCfa.interpolate(0, lineEnd, 1); + } + + const T* pRedPixels = isCFA ? avxCfa.redCfaLine(0) : &avxInputSupport.redPixels().at(0); + const T* pGreenPixels = isCFA ? avxCfa.greenCfaLine(0) : &avxInputSupport.greenPixels().at(0); + const T* pBluePixels = isCFA ? avxCfa.blueCfaLine(0) : &avxInputSupport.bluePixels().at(0); + + calcEntropy(pRedPixels, redEntropies); + calcEntropy(pGreenPixels, greenEntropies); + calcEntropy(pBluePixels, blueEntropies); + + return 0; + } + + if (avxInputSupport.isMonochromeBitmapOfType()) + { + const T* pGrayPixels = &avxInputSupport.grayPixels().at(0); + calcEntropy(pGrayPixels, redEntropies); + + memcpy(&greenEntropies[0], &redEntropies[0], redEntropies.size() * sizeof(EntropyVectorType::value_type)); + memcpy(&blueEntropies[0], &redEntropies[0], redEntropies.size() * sizeof(EntropyVectorType::value_type)); + + return 0; + } + + return 1; +} diff --git a/DeepSkyStacker/avx_entropy.h b/DeepSkyStackerKernel/avx_entropy.h similarity index 97% rename from DeepSkyStacker/avx_entropy.h rename to DeepSkyStackerKernel/avx_entropy.h index 30930797..0d12a71b 100644 --- a/DeepSkyStacker/avx_entropy.h +++ b/DeepSkyStackerKernel/avx_entropy.h @@ -1,40 +1,40 @@ -#pragma once - -/* -* Class for -* 1. Calculation of the entropy data of the squares, -* 2. Managing the entropy coverage (the coverage bitmap itself is calculated while stacking with the class AvxStacking). -*/ -class CMemoryBitmap; -class CEntropyInfo; -class AvxEntropy -{ -private: - friend class AvxStacking; - friend class AvxAccumulation; - - typedef std::vector EntropyVectorType; - typedef std::vector<__m256> EntropyLayerVectorType; - - CMemoryBitmap& inputBitmap; - const CEntropyInfo& entropyInfo; - CMemoryBitmap* pEntropyCoverage; - EntropyLayerVectorType redEntropyLayer; - EntropyLayerVectorType greenEntropyLayer; - EntropyLayerVectorType blueEntropyLayer; - bool avxReady; -public: - AvxEntropy() = delete; - AvxEntropy(CMemoryBitmap& inputbm, const CEntropyInfo& entrinfo, CMemoryBitmap* entropycov); - AvxEntropy(const AvxEntropy&) = default; - AvxEntropy(AvxEntropy&&) = delete; - AvxEntropy& operator=(const AvxEntropy&) = delete; -#if defined(UNIT_TESTS) - float* getRedEntropyLayer() { return reinterpret_cast(redEntropyLayer.data()); } -#endif - - int calcEntropies(const int squareSize, const int nSquaresX, const int nSquaresY, EntropyVectorType& redEntropies, EntropyVectorType& greenEntropies, EntropyVectorType& blueEntropies); -private: - template - int doCalcEntropies(const int squareSize, const int nSquaresX, const int nSquaresY, EntropyVectorType& redEntropies, EntropyVectorType& greenEntropies, EntropyVectorType& blueEntropies); -}; +#pragma once + +/* +* Class for +* 1. Calculation of the entropy data of the squares, +* 2. Managing the entropy coverage (the coverage bitmap itself is calculated while stacking with the class AvxStacking). +*/ +class CMemoryBitmap; +class CEntropyInfo; +class AvxEntropy +{ +private: + friend class AvxStacking; + friend class AvxAccumulation; + + typedef std::vector EntropyVectorType; + typedef std::vector<__m256> EntropyLayerVectorType; + + CMemoryBitmap& inputBitmap; + const CEntropyInfo& entropyInfo; + CMemoryBitmap* pEntropyCoverage; + EntropyLayerVectorType redEntropyLayer; + EntropyLayerVectorType greenEntropyLayer; + EntropyLayerVectorType blueEntropyLayer; + bool avxReady; +public: + AvxEntropy() = delete; + AvxEntropy(CMemoryBitmap& inputbm, const CEntropyInfo& entrinfo, CMemoryBitmap* entropycov); + AvxEntropy(const AvxEntropy&) = default; + AvxEntropy(AvxEntropy&&) = delete; + AvxEntropy& operator=(const AvxEntropy&) = delete; +#if defined(UNIT_TESTS) + float* getRedEntropyLayer() { return reinterpret_cast(redEntropyLayer.data()); } +#endif + + int calcEntropies(const int squareSize, const int nSquaresX, const int nSquaresY, EntropyVectorType& redEntropies, EntropyVectorType& greenEntropies, EntropyVectorType& blueEntropies); +private: + template + int doCalcEntropies(const int squareSize, const int nSquaresX, const int nSquaresY, EntropyVectorType& redEntropies, EntropyVectorType& greenEntropies, EntropyVectorType& blueEntropies); +}; diff --git a/DeepSkyStacker/avx_filter.cpp b/DeepSkyStackerKernel/avx_filter.cpp similarity index 97% rename from DeepSkyStacker/avx_filter.cpp rename to DeepSkyStackerKernel/avx_filter.cpp index eca1145b..f6b544c9 100644 --- a/DeepSkyStacker/avx_filter.cpp +++ b/DeepSkyStackerKernel/avx_filter.cpp @@ -1,191 +1,191 @@ -#include "stdafx.h" -#include "avx_filter.h" -#include "avx_support.h" -#include "MedianFilterEngine.h" - -template <> -AvxImageFilter::AvxImageFilter(CInternalMedianFilterEngineT* filEng) : - filterEngine{ filEng } -{ -} - -template -AvxImageFilter::AvxImageFilter(CInternalMedianFilterEngineT*) : - filterEngine{ nullptr } -{} - -template -int AvxImageFilter::filter(const size_t lineStart, const size_t lineEnd) -{ - if constexpr (!std::is_same::value) - return 1; - if (!AvxSupport::checkSimdAvailability()) - return 1; - if (filterEngine == nullptr) - return 1; - if (filterEngine->m_lFilterSize != 1) - return 1; - if (filterEngine->m_CFAType != CFATYPE_NONE) - return 1; - - const auto cmpLT = [](__m256& a, __m256& b) -> void - { - const __m256 mask = _mm256_cmp_ps(a, b, 17); // 17: CMP_LT_OQ -> IF (a < b) mask=1 ELSE mask=0 - const __m256 smaller = _mm256_blendv_ps(b, a, mask); // IF (mask==1) TAKE second arg -> a < b ? a : b -> the smaller elements - b = _mm256_blendv_ps(a, b, mask); // a < b ? b : a -> the larger elements - a = smaller; - }; - - const auto median9 = [&cmpLT](__m256 x0, __m256 x1, __m256 x2, __m256 x3, __m256 x4, __m256 x5, __m256 x6, __m256 x7, __m256 x8) -> __m256 - { - cmpLT(x0, x1); - - cmpLT(x0, x2); - cmpLT(x1, x2); - - cmpLT(x0, x3); - cmpLT(x1, x3); - cmpLT(x2, x3); - - cmpLT(x0, x4); - cmpLT(x1, x4); - cmpLT(x2, x4); - cmpLT(x3, x4); - - cmpLT(x0, x5); - cmpLT(x1, x5); - cmpLT(x2, x5); - cmpLT(x3, x5); - cmpLT(x4, x5); - - cmpLT(x0, x6); - cmpLT(x1, x6); - cmpLT(x2, x6); - cmpLT(x3, x6); - cmpLT(x4, x6); -// cmpLT(x5, x6); - - cmpLT(x0, x7); - cmpLT(x1, x7); - cmpLT(x2, x7); - cmpLT(x3, x7); - cmpLT(x4, x7); -// cmpLT(x5, x7); -// cmpLT(x6, x7); - - cmpLT(x0, x8); - cmpLT(x1, x8); - cmpLT(x2, x8); - cmpLT(x3, x8); - cmpLT(x4, x8); -// cmpLT(x5, x8); -// cmpLT(x6, x8); -// cmpLT(x7, x8); - - return x4; - }; - - const size_t width = filterEngine->m_lWidth; - const size_t height = filterEngine->m_lHeight; - const size_t nrVectors = width / 8; - - const double* pIn{ filterEngine->m_pvInValues + lineStart * width }; - const double* pInPrev = (lineStart == 0) ? pIn : (pIn - width); - const double* pInNext = (lineStart == height - 1) ? pIn : (pIn + width); - double* pOut{ filterEngine->m_pvOutValues + lineStart * width }; - - __m256 thisLine, thisLinePrev, thisLineNext; - __m256 prevLine, prevLinePrev, prevLineNext; - __m256 nextLine, nextLinePrev, nextLineNext; - - const auto load8Ps = [](const double *const pd) -> __m256 - { - const __m128 lo = _mm256_cvtpd_ps(_mm256_loadu_pd(pd)); - const __m128 hi = _mm256_cvtpd_ps(_mm256_loadu_pd(pd + 4)); - return _mm256_set_m128(hi, lo); - }; - const auto getPrevAndNext = [](const __m256 vector, const double* const p, __m256& prev, __m256& next, const __m128i elementIndices) -> void - { - const __m128 newElements = _mm256_cvtpd_ps(_mm256_i32gather_pd(p, elementIndices, 8)); -// const __m128 prevElement = _mm256_cvtpd_ps(_mm256_broadcast_sd(p - 1)); -// const __m128 nextElement = _mm256_cvtpd_ps(_mm256_broadcast_sd(p + 8)); - const __m256 shiftedLeft = _mm256_permutevar8x32_ps(vector, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); - const __m256 shiftedRight = _mm256_permutevar8x32_ps(vector, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 7)); - prev = _mm256_blend_ps(shiftedLeft, _mm256_castps128_ps256(newElements), 0x01); - next = _mm256_blend_ps(shiftedRight, _mm256_insertf128_ps(_mm256_setzero_ps(), newElements, 1), 0x80); - }; - const auto advancePointersAndVectors = [&](const size_t n, const __m128i elementIndices) -> void - { - std::advance(pIn, n); - std::advance(pInPrev, n); - std::advance(pInNext, n); - std::advance(pOut, n); - - thisLine = load8Ps(pIn); - getPrevAndNext(thisLine, pIn, thisLinePrev, thisLineNext, elementIndices); - - prevLine = load8Ps(pInPrev); - getPrevAndNext(prevLine, pInPrev, prevLinePrev, prevLineNext, elementIndices); - - nextLine = load8Ps(pInNext); - getPrevAndNext(nextLine, pInNext, nextLinePrev, nextLineNext, elementIndices); - }; - const auto storeMedian = [&pOut](const __m256 median) -> void - { - _mm256_storeu_pd(pOut, _mm256_cvtps_pd(_mm256_castps256_ps128(median))); - _mm256_storeu_pd(pOut + 4, _mm256_cvtps_pd(_mm256_extractf128_ps(median, 1))); - }; - - for (size_t row = lineStart; row < lineEnd; ++row) - { - // First column - thisLine = load8Ps(pIn); - getPrevAndNext(thisLine, pIn, thisLinePrev, thisLineNext, _mm_setr_epi32(0, 0, 0, 8)); - prevLine = load8Ps(pInPrev); - getPrevAndNext(prevLine, pInPrev, prevLinePrev, prevLineNext, _mm_setr_epi32(0, 0, 0, 8)); - nextLine = load8Ps(pInNext); - getPrevAndNext(nextLine, pInNext, nextLinePrev, nextLineNext, _mm_setr_epi32(0, 0, 0, 8)); - - __m256 median = median9(prevLinePrev, prevLine, prevLineNext, thisLinePrev, thisLine, thisLineNext, nextLinePrev, nextLine, nextLineNext); - storeMedian(median); - - // Main loop - for (size_t counter = 1; counter < nrVectors - 1; ++counter) - { - advancePointersAndVectors(8, _mm_setr_epi32(-1, 0, 0, 8)); - median = median9(prevLinePrev, prevLine, prevLineNext, thisLinePrev, thisLine, thisLineNext, nextLinePrev, nextLine, nextLineNext); - storeMedian(median); - } - - const size_t remainingPixels = width - nrVectors * 8; - - // Last vector - const __m128i lastIndex = remainingPixels != 0 ? _mm_setr_epi32(-1, 0, 0, 8) : _mm_setr_epi32(-1, 0, 0, 7); // Load following pixel only if there is one. - advancePointersAndVectors(8, lastIndex); - median = median9(prevLinePrev, prevLine, prevLineNext, thisLinePrev, thisLine, thisLineNext, nextLinePrev, nextLine, nextLineNext); - storeMedian(median); - - // Last few pixels - if (remainingPixels != 0) - { - advancePointersAndVectors(remainingPixels, _mm_setr_epi32(-1, 0, 0, 7)); - median = median9(prevLinePrev, prevLine, prevLineNext, thisLinePrev, thisLine, thisLineNext, nextLinePrev, nextLine, nextLineNext); - for (size_t n = 8 - remainingPixels; n < 8; ++n) - pOut[n] = static_cast(median.m256_f32[n]); - } - - pOut = filterEngine->m_pvOutValues + (row + 1) * width; - pInPrev = filterEngine->m_pvInValues + row * width; - pIn = pInPrev + width; - pInNext = (row == height - 2) ? pIn : (pIn + width); // If next round will be the last line of the bitmap -> pNext = pIn - } - - return AvxSupport::zeroUpper(0); -} - -// Explicit template instantiation for the types we need. -template AvxImageFilter; -template AvxImageFilter; -template AvxImageFilter; -template AvxImageFilter; -template AvxImageFilter; +#include "stdafx.h" +#include "avx_filter.h" +#include "avx_support.h" +#include "MedianFilterEngine.h" + +template <> +AvxImageFilter::AvxImageFilter(CInternalMedianFilterEngineT* filEng) : + filterEngine{ filEng } +{ +} + +template +AvxImageFilter::AvxImageFilter(CInternalMedianFilterEngineT*) : + filterEngine{ nullptr } +{} + +template +int AvxImageFilter::filter(const size_t lineStart, const size_t lineEnd) +{ + if constexpr (!std::is_same::value) + return 1; + if (!AvxSupport::checkSimdAvailability()) + return 1; + if (filterEngine == nullptr) + return 1; + if (filterEngine->m_lFilterSize != 1) + return 1; + if (filterEngine->m_CFAType != CFATYPE_NONE) + return 1; + + const auto cmpLT = [](__m256& a, __m256& b) -> void + { + const __m256 mask = _mm256_cmp_ps(a, b, 17); // 17: CMP_LT_OQ -> IF (a < b) mask=1 ELSE mask=0 + const __m256 smaller = _mm256_blendv_ps(b, a, mask); // IF (mask==1) TAKE second arg -> a < b ? a : b -> the smaller elements + b = _mm256_blendv_ps(a, b, mask); // a < b ? b : a -> the larger elements + a = smaller; + }; + + const auto median9 = [&cmpLT](__m256 x0, __m256 x1, __m256 x2, __m256 x3, __m256 x4, __m256 x5, __m256 x6, __m256 x7, __m256 x8) -> __m256 + { + cmpLT(x0, x1); + + cmpLT(x0, x2); + cmpLT(x1, x2); + + cmpLT(x0, x3); + cmpLT(x1, x3); + cmpLT(x2, x3); + + cmpLT(x0, x4); + cmpLT(x1, x4); + cmpLT(x2, x4); + cmpLT(x3, x4); + + cmpLT(x0, x5); + cmpLT(x1, x5); + cmpLT(x2, x5); + cmpLT(x3, x5); + cmpLT(x4, x5); + + cmpLT(x0, x6); + cmpLT(x1, x6); + cmpLT(x2, x6); + cmpLT(x3, x6); + cmpLT(x4, x6); +// cmpLT(x5, x6); + + cmpLT(x0, x7); + cmpLT(x1, x7); + cmpLT(x2, x7); + cmpLT(x3, x7); + cmpLT(x4, x7); +// cmpLT(x5, x7); +// cmpLT(x6, x7); + + cmpLT(x0, x8); + cmpLT(x1, x8); + cmpLT(x2, x8); + cmpLT(x3, x8); + cmpLT(x4, x8); +// cmpLT(x5, x8); +// cmpLT(x6, x8); +// cmpLT(x7, x8); + + return x4; + }; + + const size_t width = filterEngine->m_lWidth; + const size_t height = filterEngine->m_lHeight; + const size_t nrVectors = width / 8; + + const double* pIn{ filterEngine->m_pvInValues + lineStart * width }; + const double* pInPrev = (lineStart == 0) ? pIn : (pIn - width); + const double* pInNext = (lineStart == height - 1) ? pIn : (pIn + width); + double* pOut{ filterEngine->m_pvOutValues + lineStart * width }; + + __m256 thisLine, thisLinePrev, thisLineNext; + __m256 prevLine, prevLinePrev, prevLineNext; + __m256 nextLine, nextLinePrev, nextLineNext; + + const auto load8Ps = [](const double *const pd) -> __m256 + { + const __m128 lo = _mm256_cvtpd_ps(_mm256_loadu_pd(pd)); + const __m128 hi = _mm256_cvtpd_ps(_mm256_loadu_pd(pd + 4)); + return _mm256_set_m128(hi, lo); + }; + const auto getPrevAndNext = [](const __m256 vector, const double* const p, __m256& prev, __m256& next, const __m128i elementIndices) -> void + { + const __m128 newElements = _mm256_cvtpd_ps(_mm256_i32gather_pd(p, elementIndices, 8)); +// const __m128 prevElement = _mm256_cvtpd_ps(_mm256_broadcast_sd(p - 1)); +// const __m128 nextElement = _mm256_cvtpd_ps(_mm256_broadcast_sd(p + 8)); + const __m256 shiftedLeft = _mm256_permutevar8x32_ps(vector, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); + const __m256 shiftedRight = _mm256_permutevar8x32_ps(vector, _mm256_setr_epi32(1, 2, 3, 4, 5, 6, 7, 7)); + prev = _mm256_blend_ps(shiftedLeft, _mm256_castps128_ps256(newElements), 0x01); + next = _mm256_blend_ps(shiftedRight, _mm256_insertf128_ps(_mm256_setzero_ps(), newElements, 1), 0x80); + }; + const auto advancePointersAndVectors = [&](const size_t n, const __m128i elementIndices) -> void + { + std::advance(pIn, n); + std::advance(pInPrev, n); + std::advance(pInNext, n); + std::advance(pOut, n); + + thisLine = load8Ps(pIn); + getPrevAndNext(thisLine, pIn, thisLinePrev, thisLineNext, elementIndices); + + prevLine = load8Ps(pInPrev); + getPrevAndNext(prevLine, pInPrev, prevLinePrev, prevLineNext, elementIndices); + + nextLine = load8Ps(pInNext); + getPrevAndNext(nextLine, pInNext, nextLinePrev, nextLineNext, elementIndices); + }; + const auto storeMedian = [&pOut](const __m256 median) -> void + { + _mm256_storeu_pd(pOut, _mm256_cvtps_pd(_mm256_castps256_ps128(median))); + _mm256_storeu_pd(pOut + 4, _mm256_cvtps_pd(_mm256_extractf128_ps(median, 1))); + }; + + for (size_t row = lineStart; row < lineEnd; ++row) + { + // First column + thisLine = load8Ps(pIn); + getPrevAndNext(thisLine, pIn, thisLinePrev, thisLineNext, _mm_setr_epi32(0, 0, 0, 8)); + prevLine = load8Ps(pInPrev); + getPrevAndNext(prevLine, pInPrev, prevLinePrev, prevLineNext, _mm_setr_epi32(0, 0, 0, 8)); + nextLine = load8Ps(pInNext); + getPrevAndNext(nextLine, pInNext, nextLinePrev, nextLineNext, _mm_setr_epi32(0, 0, 0, 8)); + + __m256 median = median9(prevLinePrev, prevLine, prevLineNext, thisLinePrev, thisLine, thisLineNext, nextLinePrev, nextLine, nextLineNext); + storeMedian(median); + + // Main loop + for (size_t counter = 1; counter < nrVectors - 1; ++counter) + { + advancePointersAndVectors(8, _mm_setr_epi32(-1, 0, 0, 8)); + median = median9(prevLinePrev, prevLine, prevLineNext, thisLinePrev, thisLine, thisLineNext, nextLinePrev, nextLine, nextLineNext); + storeMedian(median); + } + + const size_t remainingPixels = width - nrVectors * 8; + + // Last vector + const __m128i lastIndex = remainingPixels != 0 ? _mm_setr_epi32(-1, 0, 0, 8) : _mm_setr_epi32(-1, 0, 0, 7); // Load following pixel only if there is one. + advancePointersAndVectors(8, lastIndex); + median = median9(prevLinePrev, prevLine, prevLineNext, thisLinePrev, thisLine, thisLineNext, nextLinePrev, nextLine, nextLineNext); + storeMedian(median); + + // Last few pixels + if (remainingPixels != 0) + { + advancePointersAndVectors(remainingPixels, _mm_setr_epi32(-1, 0, 0, 7)); + median = median9(prevLinePrev, prevLine, prevLineNext, thisLinePrev, thisLine, thisLineNext, nextLinePrev, nextLine, nextLineNext); + for (size_t n = 8 - remainingPixels; n < 8; ++n) + pOut[n] = static_cast(median.m256_f32[n]); + } + + pOut = filterEngine->m_pvOutValues + (row + 1) * width; + pInPrev = filterEngine->m_pvInValues + row * width; + pIn = pInPrev + width; + pInNext = (row == height - 2) ? pIn : (pIn + width); // If next round will be the last line of the bitmap -> pNext = pIn + } + + return AvxSupport::zeroUpper(0); +} + +// Explicit template instantiation for the types we need. +template AvxImageFilter; +template AvxImageFilter; +template AvxImageFilter; +template AvxImageFilter; +template AvxImageFilter; diff --git a/DeepSkyStacker/avx_filter.h b/DeepSkyStackerKernel/avx_filter.h similarity index 96% rename from DeepSkyStacker/avx_filter.h rename to DeepSkyStackerKernel/avx_filter.h index 91d20cd1..1dee5019 100644 --- a/DeepSkyStacker/avx_filter.h +++ b/DeepSkyStackerKernel/avx_filter.h @@ -1,19 +1,19 @@ -#pragma once - -template -class CInternalMedianFilterEngineT; - -template -class AvxImageFilter -{ -private: - CInternalMedianFilterEngineT* filterEngine; -public: - AvxImageFilter() = delete; - AvxImageFilter(CInternalMedianFilterEngineT* filEng); - AvxImageFilter(const AvxImageFilter&) = default; - AvxImageFilter(AvxImageFilter&&) = delete; - AvxImageFilter& operator=(const AvxImageFilter&) = delete; - - int filter(const size_t lineStart, const size_t lineEnd); -}; +#pragma once + +template +class CInternalMedianFilterEngineT; + +template +class AvxImageFilter +{ +private: + CInternalMedianFilterEngineT* filterEngine; +public: + AvxImageFilter() = delete; + AvxImageFilter(CInternalMedianFilterEngineT* filEng); + AvxImageFilter(const AvxImageFilter&) = default; + AvxImageFilter(AvxImageFilter&&) = delete; + AvxImageFilter& operator=(const AvxImageFilter&) = delete; + + int filter(const size_t lineStart, const size_t lineEnd); +}; diff --git a/DeepSkyStacker/avx_histogram.cpp b/DeepSkyStackerKernel/avx_histogram.cpp similarity index 97% rename from DeepSkyStacker/avx_histogram.cpp rename to DeepSkyStackerKernel/avx_histogram.cpp index d3e11737..f99eae2c 100644 --- a/DeepSkyStacker/avx_histogram.cpp +++ b/DeepSkyStackerKernel/avx_histogram.cpp @@ -1,172 +1,172 @@ -#include "stdafx.h" -#include "avx_histogram.h" - -AvxHistogram::AvxHistogram(CMemoryBitmap& inputbm) : - avxReady{ AvxSupport::checkSimdAvailability() }, - allRunsSuccessful{ true }, - redHisto(avxReady ? HistogramSize() : 0, 0), - greenHisto{}, - blueHisto{}, - avxCfa{ 0, 0, inputbm }, - inputBitmap{ inputbm } -{ - if (avxReady && AvxSupport{ inputBitmap }.isColorBitmapOrCfa()) - { - greenHisto.resize(HistogramSize(), 0); - blueHisto.resize(HistogramSize(), 0); - } - - static_assert(sizeof(HistogramVectorType::value_type) == sizeof(int)); -} - -int AvxHistogram::calcHistogram(const size_t lineStart, const size_t lineEnd) -{ - const auto rval = [this](const int rv) -> int - { - if (rv != 0) - this->allRunsSuccessful = false; - return rv; - }; - - if (!avxReady) - return rval(1); - - int rv = 1; - if (doCalcHistogram(lineStart, lineEnd) == 0 - || doCalcHistogram(lineStart, lineEnd) == 0 - || doCalcHistogram(lineStart, lineEnd) == 0 - || doCalcHistogram(lineStart, lineEnd) == 0) - { - rv = 0; - } - return AvxSupport::zeroUpper(rval(rv)); -} - -template -int AvxHistogram::doCalcHistogram(const size_t lineStart, const size_t lineEnd) -{ - // Check input bitmap. - const AvxSupport avxInputSupport{ inputBitmap }; - if (!avxInputSupport.isColorBitmapOfType() && !avxInputSupport.isMonochromeBitmapOfType()) // Monochrome includes CFA - return 1; - - constexpr size_t vectorLen = 16; - const size_t width = inputBitmap.Width(); - const size_t nrVectors = width / vectorLen; - - // AVX makes no sense for super-small arrays. - if (width < 256 || inputBitmap.Height() < 32) - return 1; - - const auto calcHistoOfTwoVectorsEpi32 = [](const std::tuple<__m256i, __m256i>& twoVectors, auto& histogram) -> void - { - const auto [lo, hi] = twoVectors; - calcHistoOfVectorEpi32(lo, histogram); - calcHistoOfVectorEpi32(hi, histogram); - }; - - const bool isCFA = avxInputSupport.isMonochromeCfaBitmapOfType(); - - // Color bitmap (incl. CFA) - // ------------------------ - if (avxInputSupport.isColorBitmapOfType() || isCFA) - { - if constexpr (std::is_same::value) // color-double not supported. - return 1; - else - { - if (isCFA) - { - avxCfa.init(lineStart, lineEnd); - avxCfa.interpolate(lineStart, lineEnd, 1); - } - - for (size_t row = lineStart, lineNdx = 0; row < lineEnd; ++row, ++lineNdx) - { - const T* pRedPixels = isCFA ? avxCfa.redCfaLine(lineNdx) : &avxInputSupport.redPixels().at(row * width); - const T* pGreenPixels = isCFA ? avxCfa.greenCfaLine(lineNdx) : &avxInputSupport.greenPixels().at(row * width); - const T* pBluePixels = isCFA ? avxCfa.blueCfaLine(lineNdx) : &avxInputSupport.bluePixels().at(row * width); - - for (size_t counter = 0; counter < nrVectors; ++counter, pRedPixels += vectorLen, pGreenPixels += vectorLen, pBluePixels += vectorLen) - { - calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pRedPixels), redHisto); - calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pGreenPixels), greenHisto); - calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pBluePixels), blueHisto); - } - for (size_t n = nrVectors * vectorLen; n < width; ++n, ++pRedPixels, ++pGreenPixels, ++pBluePixels) - { - addToHisto(redHisto, *pRedPixels); - addToHisto(greenHisto, *pGreenPixels); - addToHisto(blueHisto, *pBluePixels); - } - } - return 0; - } - } - - // Note: - // Gray input bitmaps of type double use a fix scaling factor of 256. - // This is for the histogram in the registering process, where the color values come from the luminance calculation. They are in the range [0, 256). - // Thus, they need up-scaling by a factor of 256. - if (avxInputSupport.isMonochromeBitmapOfType()) - { - for (size_t row = lineStart; row < lineEnd; ++row) - { - const T* pGrayPixels = &avxInputSupport.grayPixels().at(row * width); - for (size_t counter = 0; counter < nrVectors; ++counter, pGrayPixels += vectorLen) - { - if constexpr (std::is_same::value) - calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pGrayPixels, _mm256_set1_pd(256.0)), redHisto); - else - calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pGrayPixels), redHisto); - } - for (size_t n = nrVectors * vectorLen; n < width; ++n, ++pGrayPixels) - { - addToHisto(redHisto, *pGrayPixels); - } - } - return 0; - } - - return 1; -} - -int AvxHistogram::mergeHistograms(HistogramVectorType& red, HistogramVectorType& green, HistogramVectorType& blue) -{ - if (!avxReady) - return 1; - - const auto mergeHisto = [](HistogramVectorType& targetHisto, const HistogramVectorType& sourceHisto) -> void - { - if (targetHisto.size() == HistogramSize() && sourceHisto.size() == HistogramSize()) - { - constexpr size_t nrVectors = HistogramSize() / 8; - auto* pTarget{ &*targetHisto.begin() }; - const auto* pSource{ &*sourceHisto.begin() }; - - for (size_t n = 0; n < nrVectors; ++n, pTarget += 8, pSource += 8) - { - const __m256i tgt = _mm256_add_epi32(_mm256_loadu_si256((const __m256i*)pTarget), _mm256_loadu_si256((const __m256i*)pSource)); - _mm256_storeu_si256((__m256i*)pTarget, tgt); - } - for (size_t n = nrVectors * 8; n < HistogramSize(); ++n) - targetHisto[n] += sourceHisto[n]; - } - else // Why do we get here? - { - for (size_t n = 0; n < sourceHisto.size(); ++n) // Let's hope, the targetHisto is larger in size than the sourceHisto. - targetHisto[n] += sourceHisto[n]; - } - }; - - mergeHisto(red, redHisto); - mergeHisto(green, greenHisto.empty() ? redHisto : greenHisto); - mergeHisto(blue, blueHisto.empty() ? redHisto : blueHisto); - - return AvxSupport::zeroUpper(0); -} - -bool AvxHistogram::histogramSuccessful() const -{ - return allRunsSuccessful; -}; +#include "stdafx.h" +#include "avx_histogram.h" + +AvxHistogram::AvxHistogram(CMemoryBitmap& inputbm) : + avxReady{ AvxSupport::checkSimdAvailability() }, + allRunsSuccessful{ true }, + redHisto(avxReady ? HistogramSize() : 0, 0), + greenHisto{}, + blueHisto{}, + avxCfa{ 0, 0, inputbm }, + inputBitmap{ inputbm } +{ + if (avxReady && AvxSupport{ inputBitmap }.isColorBitmapOrCfa()) + { + greenHisto.resize(HistogramSize(), 0); + blueHisto.resize(HistogramSize(), 0); + } + + static_assert(sizeof(HistogramVectorType::value_type) == sizeof(int)); +} + +int AvxHistogram::calcHistogram(const size_t lineStart, const size_t lineEnd) +{ + const auto rval = [this](const int rv) -> int + { + if (rv != 0) + this->allRunsSuccessful = false; + return rv; + }; + + if (!avxReady) + return rval(1); + + int rv = 1; + if (doCalcHistogram(lineStart, lineEnd) == 0 + || doCalcHistogram(lineStart, lineEnd) == 0 + || doCalcHistogram(lineStart, lineEnd) == 0 + || doCalcHistogram(lineStart, lineEnd) == 0) + { + rv = 0; + } + return AvxSupport::zeroUpper(rval(rv)); +} + +template +int AvxHistogram::doCalcHistogram(const size_t lineStart, const size_t lineEnd) +{ + // Check input bitmap. + const AvxSupport avxInputSupport{ inputBitmap }; + if (!avxInputSupport.isColorBitmapOfType() && !avxInputSupport.isMonochromeBitmapOfType()) // Monochrome includes CFA + return 1; + + constexpr size_t vectorLen = 16; + const size_t width = inputBitmap.Width(); + const size_t nrVectors = width / vectorLen; + + // AVX makes no sense for super-small arrays. + if (width < 256 || inputBitmap.Height() < 32) + return 1; + + const auto calcHistoOfTwoVectorsEpi32 = [](const std::tuple<__m256i, __m256i>& twoVectors, auto& histogram) -> void + { + const auto [lo, hi] = twoVectors; + calcHistoOfVectorEpi32(lo, histogram); + calcHistoOfVectorEpi32(hi, histogram); + }; + + const bool isCFA = avxInputSupport.isMonochromeCfaBitmapOfType(); + + // Color bitmap (incl. CFA) + // ------------------------ + if (avxInputSupport.isColorBitmapOfType() || isCFA) + { + if constexpr (std::is_same::value) // color-double not supported. + return 1; + else + { + if (isCFA) + { + avxCfa.init(lineStart, lineEnd); + avxCfa.interpolate(lineStart, lineEnd, 1); + } + + for (size_t row = lineStart, lineNdx = 0; row < lineEnd; ++row, ++lineNdx) + { + const T* pRedPixels = isCFA ? avxCfa.redCfaLine(lineNdx) : &avxInputSupport.redPixels().at(row * width); + const T* pGreenPixels = isCFA ? avxCfa.greenCfaLine(lineNdx) : &avxInputSupport.greenPixels().at(row * width); + const T* pBluePixels = isCFA ? avxCfa.blueCfaLine(lineNdx) : &avxInputSupport.bluePixels().at(row * width); + + for (size_t counter = 0; counter < nrVectors; ++counter, pRedPixels += vectorLen, pGreenPixels += vectorLen, pBluePixels += vectorLen) + { + calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pRedPixels), redHisto); + calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pGreenPixels), greenHisto); + calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pBluePixels), blueHisto); + } + for (size_t n = nrVectors * vectorLen; n < width; ++n, ++pRedPixels, ++pGreenPixels, ++pBluePixels) + { + addToHisto(redHisto, *pRedPixels); + addToHisto(greenHisto, *pGreenPixels); + addToHisto(blueHisto, *pBluePixels); + } + } + return 0; + } + } + + // Note: + // Gray input bitmaps of type double use a fix scaling factor of 256. + // This is for the histogram in the registering process, where the color values come from the luminance calculation. They are in the range [0, 256). + // Thus, they need up-scaling by a factor of 256. + if (avxInputSupport.isMonochromeBitmapOfType()) + { + for (size_t row = lineStart; row < lineEnd; ++row) + { + const T* pGrayPixels = &avxInputSupport.grayPixels().at(row * width); + for (size_t counter = 0; counter < nrVectors; ++counter, pGrayPixels += vectorLen) + { + if constexpr (std::is_same::value) + calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pGrayPixels, _mm256_set1_pd(256.0)), redHisto); + else + calcHistoOfTwoVectorsEpi32(AvxSupport::read16PackedInt(pGrayPixels), redHisto); + } + for (size_t n = nrVectors * vectorLen; n < width; ++n, ++pGrayPixels) + { + addToHisto(redHisto, *pGrayPixels); + } + } + return 0; + } + + return 1; +} + +int AvxHistogram::mergeHistograms(HistogramVectorType& red, HistogramVectorType& green, HistogramVectorType& blue) +{ + if (!avxReady) + return 1; + + const auto mergeHisto = [](HistogramVectorType& targetHisto, const HistogramVectorType& sourceHisto) -> void + { + if (targetHisto.size() == HistogramSize() && sourceHisto.size() == HistogramSize()) + { + constexpr size_t nrVectors = HistogramSize() / 8; + auto* pTarget{ &*targetHisto.begin() }; + const auto* pSource{ &*sourceHisto.begin() }; + + for (size_t n = 0; n < nrVectors; ++n, pTarget += 8, pSource += 8) + { + const __m256i tgt = _mm256_add_epi32(_mm256_loadu_si256((const __m256i*)pTarget), _mm256_loadu_si256((const __m256i*)pSource)); + _mm256_storeu_si256((__m256i*)pTarget, tgt); + } + for (size_t n = nrVectors * 8; n < HistogramSize(); ++n) + targetHisto[n] += sourceHisto[n]; + } + else // Why do we get here? + { + for (size_t n = 0; n < sourceHisto.size(); ++n) // Let's hope, the targetHisto is larger in size than the sourceHisto. + targetHisto[n] += sourceHisto[n]; + } + }; + + mergeHisto(red, redHisto); + mergeHisto(green, greenHisto.empty() ? redHisto : greenHisto); + mergeHisto(blue, blueHisto.empty() ? redHisto : blueHisto); + + return AvxSupport::zeroUpper(0); +} + +bool AvxHistogram::histogramSuccessful() const +{ + return allRunsSuccessful; +}; diff --git a/DeepSkyStacker/avx_histogram.h b/DeepSkyStackerKernel/avx_histogram.h similarity index 97% rename from DeepSkyStacker/avx_histogram.h rename to DeepSkyStackerKernel/avx_histogram.h index 089bda6f..595f352f 100644 --- a/DeepSkyStacker/avx_histogram.h +++ b/DeepSkyStackerKernel/avx_histogram.h @@ -1,115 +1,115 @@ -#pragma once -#include "avx_cfa.h" -#include "avx_support.h" - -class AvxHistogram -{ -public: - typedef std::vector HistogramVectorType; -private: - bool avxReady; - bool allRunsSuccessful; - HistogramVectorType redHisto; - HistogramVectorType greenHisto; - HistogramVectorType blueHisto; - AvxCfaProcessing avxCfa; - CMemoryBitmap& inputBitmap; -public: - AvxHistogram() = delete; - AvxHistogram(CMemoryBitmap& inputbm); - AvxHistogram(const AvxHistogram&) = default; - AvxHistogram(AvxHistogram&&) = delete; - AvxHistogram& operator=(const AvxHistogram&) = delete; - - int calcHistogram(const size_t lineStart, const size_t lineEnd); - int mergeHistograms(HistogramVectorType& red, HistogramVectorType& green, HistogramVectorType& blue); - bool histogramSuccessful() const; - inline bool isAvxReady() const { return this->avxReady; } -private: - static constexpr size_t HistogramSize() { return std::numeric_limits::max() + size_t{1}; } - template - int doCalcHistogram(const size_t lineStart, const size_t lineEnd); - - // Conflict detection: Number of equal elements + blocking mask. - inline static std::tuple<__m256i, std::uint32_t> detectConflictsEpi32(const __m256i a) noexcept - { - __m256i counter = _mm256_set1_epi32(1); - std::uint32_t bitMask = 0; - - __m256i shifted = AvxSupport::shiftRightEpi32<1>(a); - __m256i c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0x80); // Set highest mask to "no conflict" (=0) - counter = _mm256_sub_epi32(counter, c); // Add one where there is a conflict with the element left from it (by one position). - bitMask |= (_mm256_movemask_epi8(c) << 4); // If there is a conflict with the element left from it -> set the mask of that element to 1. - - shifted = AvxSupport::shiftRightEpi32<1>(shifted); - c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xc0); - counter = _mm256_sub_epi32(counter, c); - bitMask |= (_mm256_movemask_epi8(c) << 8); - - shifted = AvxSupport::shiftRightEpi32<1>(shifted); - c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xe0); - counter = _mm256_sub_epi32(counter, c); - bitMask |= (_mm256_movemask_epi8(c) << 12); - - shifted = AvxSupport::shiftRightEpi32<1>(shifted); - c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xf0); - counter = _mm256_sub_epi32(counter, c); - bitMask |= (_mm256_movemask_epi8(c) << 16); - - shifted = AvxSupport::shiftRightEpi32<1>(shifted); - c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xf8); - counter = _mm256_sub_epi32(counter, c); - bitMask |= (_mm256_movemask_epi8(c) << 20); - - shifted = AvxSupport::shiftRightEpi32<1>(shifted); - c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xfc); - counter = _mm256_sub_epi32(counter, c); - bitMask |= (_mm256_movemask_epi8(c) << 24); - - shifted = AvxSupport::shiftRightEpi32<1>(shifted); - c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xfe); - counter = _mm256_sub_epi32(counter, c); - bitMask |= (_mm256_movemask_epi8(c) << 28); - - return { counter, bitMask }; - } -public: - template - inline static void calcHistoOfVectorEpi32(const __m256i colorVec, T& histogram) - { - const auto [nrEqualColors, bitMask] = detectConflictsEpi32(colorVec); - - const __m256i sourceHisto = _mm256_i32gather_epi32((const int*)&*histogram.begin(), colorVec, 4); - const __m256i updatedHisto = _mm256_add_epi32(sourceHisto, nrEqualColors); - - if ((bitMask & 1) == 0) // No conflict - histogram[_mm256_cvtsi256_si32(colorVec)] = _mm256_cvtsi256_si32(updatedHisto); - if ((bitMask & (1 << 4)) == 0) - histogram[_mm256_extract_epi32(colorVec, 1)] = _mm256_extract_epi32(updatedHisto, 1); - if ((bitMask & (1 << 8)) == 0) - histogram[_mm256_extract_epi32(colorVec, 2)] = _mm256_extract_epi32(updatedHisto, 2); - if ((bitMask & (1 << 12)) == 0) - histogram[_mm256_extract_epi32(colorVec, 3)] = _mm256_extract_epi32(updatedHisto, 3); - const __m128i colorHiLane = _mm256_extracti128_si256(colorVec, 1); - const __m128i histoHiLane = _mm256_extracti128_si256(updatedHisto, 1); - if ((bitMask & (1 << 16)) == 0) - histogram[_mm_cvtsi128_si32(colorHiLane)] = _mm_cvtsi128_si32(histoHiLane); - if ((bitMask & (1 << 20)) == 0) - histogram[_mm_extract_epi32(colorHiLane, 1)] = _mm_extract_epi32(histoHiLane, 1); - if ((bitMask & (1 << 24)) == 0) - histogram[_mm_extract_epi32(colorHiLane, 2)] = _mm_extract_epi32(histoHiLane, 2); - if ((bitMask & (1 << 28)) == 0) - histogram[_mm_extract_epi32(colorHiLane, 3)] = _mm_extract_epi32(histoHiLane, 3); - }; - - template - inline static void addToHisto(H& histo, T grayValue) - { - if constexpr (std::is_same::value) - grayValue *= 256.0; - if constexpr (std::is_integral::value && sizeof(T) == 4) // 32 bit integral type - grayValue >>= 16; - constexpr size_t Unsigned_short_max = size_t{ std::numeric_limits::max() }; - ++histo[std::min(static_cast(grayValue), Unsigned_short_max)]; - }; -}; +#pragma once +#include "avx_cfa.h" +#include "avx_support.h" + +class AvxHistogram +{ +public: + typedef std::vector HistogramVectorType; +private: + bool avxReady; + bool allRunsSuccessful; + HistogramVectorType redHisto; + HistogramVectorType greenHisto; + HistogramVectorType blueHisto; + AvxCfaProcessing avxCfa; + CMemoryBitmap& inputBitmap; +public: + AvxHistogram() = delete; + AvxHistogram(CMemoryBitmap& inputbm); + AvxHistogram(const AvxHistogram&) = default; + AvxHistogram(AvxHistogram&&) = delete; + AvxHistogram& operator=(const AvxHistogram&) = delete; + + int calcHistogram(const size_t lineStart, const size_t lineEnd); + int mergeHistograms(HistogramVectorType& red, HistogramVectorType& green, HistogramVectorType& blue); + bool histogramSuccessful() const; + inline bool isAvxReady() const { return this->avxReady; } +private: + static constexpr size_t HistogramSize() { return std::numeric_limits::max() + size_t{1}; } + template + int doCalcHistogram(const size_t lineStart, const size_t lineEnd); + + // Conflict detection: Number of equal elements + blocking mask. + inline static std::tuple<__m256i, std::uint32_t> detectConflictsEpi32(const __m256i a) noexcept + { + __m256i counter = _mm256_set1_epi32(1); + std::uint32_t bitMask = 0; + + __m256i shifted = AvxSupport::shiftRightEpi32<1>(a); + __m256i c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0x80); // Set highest mask to "no conflict" (=0) + counter = _mm256_sub_epi32(counter, c); // Add one where there is a conflict with the element left from it (by one position). + bitMask |= (_mm256_movemask_epi8(c) << 4); // If there is a conflict with the element left from it -> set the mask of that element to 1. + + shifted = AvxSupport::shiftRightEpi32<1>(shifted); + c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xc0); + counter = _mm256_sub_epi32(counter, c); + bitMask |= (_mm256_movemask_epi8(c) << 8); + + shifted = AvxSupport::shiftRightEpi32<1>(shifted); + c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xe0); + counter = _mm256_sub_epi32(counter, c); + bitMask |= (_mm256_movemask_epi8(c) << 12); + + shifted = AvxSupport::shiftRightEpi32<1>(shifted); + c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xf0); + counter = _mm256_sub_epi32(counter, c); + bitMask |= (_mm256_movemask_epi8(c) << 16); + + shifted = AvxSupport::shiftRightEpi32<1>(shifted); + c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xf8); + counter = _mm256_sub_epi32(counter, c); + bitMask |= (_mm256_movemask_epi8(c) << 20); + + shifted = AvxSupport::shiftRightEpi32<1>(shifted); + c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xfc); + counter = _mm256_sub_epi32(counter, c); + bitMask |= (_mm256_movemask_epi8(c) << 24); + + shifted = AvxSupport::shiftRightEpi32<1>(shifted); + c = _mm256_blend_epi32(_mm256_cmpeq_epi32(a, shifted), _mm256_setzero_si256(), 0xfe); + counter = _mm256_sub_epi32(counter, c); + bitMask |= (_mm256_movemask_epi8(c) << 28); + + return { counter, bitMask }; + } +public: + template + inline static void calcHistoOfVectorEpi32(const __m256i colorVec, T& histogram) + { + const auto [nrEqualColors, bitMask] = detectConflictsEpi32(colorVec); + + const __m256i sourceHisto = _mm256_i32gather_epi32((const int*)&*histogram.begin(), colorVec, 4); + const __m256i updatedHisto = _mm256_add_epi32(sourceHisto, nrEqualColors); + + if ((bitMask & 1) == 0) // No conflict + histogram[_mm256_cvtsi256_si32(colorVec)] = _mm256_cvtsi256_si32(updatedHisto); + if ((bitMask & (1 << 4)) == 0) + histogram[_mm256_extract_epi32(colorVec, 1)] = _mm256_extract_epi32(updatedHisto, 1); + if ((bitMask & (1 << 8)) == 0) + histogram[_mm256_extract_epi32(colorVec, 2)] = _mm256_extract_epi32(updatedHisto, 2); + if ((bitMask & (1 << 12)) == 0) + histogram[_mm256_extract_epi32(colorVec, 3)] = _mm256_extract_epi32(updatedHisto, 3); + const __m128i colorHiLane = _mm256_extracti128_si256(colorVec, 1); + const __m128i histoHiLane = _mm256_extracti128_si256(updatedHisto, 1); + if ((bitMask & (1 << 16)) == 0) + histogram[_mm_cvtsi128_si32(colorHiLane)] = _mm_cvtsi128_si32(histoHiLane); + if ((bitMask & (1 << 20)) == 0) + histogram[_mm_extract_epi32(colorHiLane, 1)] = _mm_extract_epi32(histoHiLane, 1); + if ((bitMask & (1 << 24)) == 0) + histogram[_mm_extract_epi32(colorHiLane, 2)] = _mm_extract_epi32(histoHiLane, 2); + if ((bitMask & (1 << 28)) == 0) + histogram[_mm_extract_epi32(colorHiLane, 3)] = _mm_extract_epi32(histoHiLane, 3); + }; + + template + inline static void addToHisto(H& histo, T grayValue) + { + if constexpr (std::is_same::value) + grayValue *= 256.0; + if constexpr (std::is_integral::value && sizeof(T) == 4) // 32 bit integral type + grayValue >>= 16; + constexpr size_t Unsigned_short_max = size_t{ std::numeric_limits::max() }; + ++histo[std::min(static_cast(grayValue), Unsigned_short_max)]; + }; +}; diff --git a/DeepSkyStacker/avx_luminance.cpp b/DeepSkyStackerKernel/avx_luminance.cpp similarity index 97% rename from DeepSkyStacker/avx_luminance.cpp rename to DeepSkyStackerKernel/avx_luminance.cpp index 3781820f..bfcaab9b 100644 --- a/DeepSkyStacker/avx_luminance.cpp +++ b/DeepSkyStackerKernel/avx_luminance.cpp @@ -1,145 +1,145 @@ -#include "stdafx.h" -#include "avx_luminance.h" -#include "avx_cfa.h" -#include "avx_support.h" - -AvxLuminance::AvxLuminance(CMemoryBitmap& inputbm, CMemoryBitmap& outbm) : - inputBitmap{ inputbm }, - outputBitmap{ outbm }, - avxReady{ true } -{ - if (!AvxSupport::checkSimdAvailability()) - avxReady = false; - - // Check output bitmap (must be monochrome-double). - if (!AvxSupport{ outputBitmap }.isMonochromeBitmapOfType()) - avxReady = false; -} - -int AvxLuminance::computeLuminanceBitmap(const size_t lineStart, const size_t lineEnd) -{ - if (!avxReady) - return 1; - - int rval = 1; - if (doComputeLuminance(lineStart, lineEnd) == 0 - || doComputeLuminance(lineStart, lineEnd) == 0 - || doComputeLuminance(lineStart, lineEnd) == 0) - { - rval = 0; - } - return AvxSupport::zeroUpper(rval); -} - -template -int AvxLuminance::doComputeLuminance(const size_t lineStart, const size_t lineEnd) -{ - constexpr double scalingFactor = 1.0 / 256.0; - - // Check input bitmap. - const AvxSupport avxInputSupport{ inputBitmap }; - if (!avxInputSupport.isColorBitmapOfType() && !avxInputSupport.isMonochromeBitmapOfType()) // Monochrome includes CFA - return 1; - - AvxSupport avxOutputSupport{ outputBitmap }; - const size_t width = inputBitmap.Width(); - constexpr size_t vectorLen = 16; - const size_t nrVectors = width / vectorLen; - - const auto scaleAndStoreLuminance = [scalingFactor](const __m256d d0, const __m256d d1, const __m256d d2, const __m256d d3, double *const pOut) -> void - { - const __m256d vScalingFactor = _mm256_set1_pd(scalingFactor); - _mm256_storeu_pd(pOut, _mm256_mul_pd(d0, vScalingFactor)); - _mm256_storeu_pd(pOut + 4, _mm256_mul_pd(d1, vScalingFactor)); - _mm256_storeu_pd(pOut + 8, _mm256_mul_pd(d2, vScalingFactor)); - _mm256_storeu_pd(pOut + 12, _mm256_mul_pd(d3, vScalingFactor)); - }; - - const auto readColorValue = [](const T* const pColor) -> T - { - if constexpr (std::is_integral::value && sizeof(T) == 4) // 32 bit integral type - return (*pColor) >> 16; - else - return *pColor; - }; - - const bool isCFA = avxInputSupport.isMonochromeCfaBitmapOfType(); - - if (avxInputSupport.isColorBitmapOfType() || isCFA) - { - AvxCfaProcessing avxCfa{ 0, 0, inputBitmap }; - if (isCFA) - { - avxCfa.init(lineStart, lineEnd); - avxCfa.interpolate(lineStart, lineEnd, 1); - } - - for (size_t row = lineStart, lineNdx = 0; row < lineEnd; ++row, ++lineNdx) - { - const T* pRedPixels = isCFA ? avxCfa.redCfaLine(lineNdx) : &avxInputSupport.redPixels().at(row * width); - const T* pGreenPixels = isCFA ? avxCfa.greenCfaLine(lineNdx) : &avxInputSupport.greenPixels().at(row * width); - const T* pBluePixels = isCFA ? avxCfa.blueCfaLine(lineNdx) : &avxInputSupport.bluePixels().at(row * width); - double* pOut = &avxOutputSupport.grayPixels().at(row * width); - - for (size_t counter = 0; counter < nrVectors; ++counter, pRedPixels += vectorLen, pGreenPixels += vectorLen, pBluePixels += vectorLen, pOut += vectorLen) - { - const auto [d0, d1, d2, d3] = colorLuminance(pRedPixels, pGreenPixels, pBluePixels); - scaleAndStoreLuminance(d0, d1, d2, d3, pOut); - } - // Remaining pixels of line. - for (size_t n = nrVectors * vectorLen; n < width; ++n, ++pRedPixels, ++pGreenPixels, ++pBluePixels, ++pOut) - { - const T red = readColorValue(pRedPixels); - const T green = readColorValue(pGreenPixels); - const T blue = readColorValue(pBluePixels); - const T minColor = std::min(std::min(red, green), blue); - const T maxColor = std::max(std::max(red, green), blue); - *pOut = (static_cast(minColor) + static_cast(maxColor)) * (0.5 * scalingFactor); - } - } - return 0; - } - - if (avxInputSupport.isMonochromeBitmapOfType()) - { - for (size_t row = lineStart; row < lineEnd; ++row) - { - const T* pGreyPixels = &avxInputSupport.grayPixels().at(row * width); - double* pOut = &avxOutputSupport.grayPixels().at(row * width); - - for (size_t counter = 0; counter < nrVectors; ++counter, pGreyPixels += vectorLen, pOut += vectorLen) - { - const auto [d0, d1, d2, d3] = greyLuminance(pGreyPixels); - scaleAndStoreLuminance(d0, d1, d2, d3, pOut); - } - // Remaining pixels of line. - for (size_t n = nrVectors * vectorLen; n < width; ++n, ++pGreyPixels, ++pOut) - { - const T grey = readColorValue(pGreyPixels); - *pOut = static_cast(grey) * scalingFactor; - } - } - return 0; - } - - return 1; -} - -template -std::tuple<__m256d, __m256d, __m256d, __m256d> AvxLuminance::colorLuminance(const T *const pRed, const T *const pGreen, const T *const pBlue) -{ - const __m256i red = AvxSupport::read16PackedShort(pRed); - const __m256i green = AvxSupport::read16PackedShort(pGreen); - const __m256i blue = AvxSupport::read16PackedShort(pBlue); - const __m256i minColor = _mm256_min_epu16(_mm256_min_epu16(red, green), blue); - const __m256i maxColor = _mm256_max_epu16(_mm256_max_epu16(red, green), blue); - const __m256i minMaxAvg = _mm256_avg_epu16(minColor, maxColor); - return AvxSupport::wordToPackedDouble(minMaxAvg); -} - -template -std::tuple<__m256d, __m256d, __m256d, __m256d> AvxLuminance::greyLuminance(const T* const pGray) -{ - const __m256i gray = AvxSupport::read16PackedShort(pGray); - return AvxSupport::wordToPackedDouble(gray); -} +#include "stdafx.h" +#include "avx_luminance.h" +#include "avx_cfa.h" +#include "avx_support.h" + +AvxLuminance::AvxLuminance(CMemoryBitmap& inputbm, CMemoryBitmap& outbm) : + inputBitmap{ inputbm }, + outputBitmap{ outbm }, + avxReady{ true } +{ + if (!AvxSupport::checkSimdAvailability()) + avxReady = false; + + // Check output bitmap (must be monochrome-double). + if (!AvxSupport{ outputBitmap }.isMonochromeBitmapOfType()) + avxReady = false; +} + +int AvxLuminance::computeLuminanceBitmap(const size_t lineStart, const size_t lineEnd) +{ + if (!avxReady) + return 1; + + int rval = 1; + if (doComputeLuminance(lineStart, lineEnd) == 0 + || doComputeLuminance(lineStart, lineEnd) == 0 + || doComputeLuminance(lineStart, lineEnd) == 0) + { + rval = 0; + } + return AvxSupport::zeroUpper(rval); +} + +template +int AvxLuminance::doComputeLuminance(const size_t lineStart, const size_t lineEnd) +{ + constexpr double scalingFactor = 1.0 / 256.0; + + // Check input bitmap. + const AvxSupport avxInputSupport{ inputBitmap }; + if (!avxInputSupport.isColorBitmapOfType() && !avxInputSupport.isMonochromeBitmapOfType()) // Monochrome includes CFA + return 1; + + AvxSupport avxOutputSupport{ outputBitmap }; + const size_t width = inputBitmap.Width(); + constexpr size_t vectorLen = 16; + const size_t nrVectors = width / vectorLen; + + const auto scaleAndStoreLuminance = [scalingFactor](const __m256d d0, const __m256d d1, const __m256d d2, const __m256d d3, double *const pOut) -> void + { + const __m256d vScalingFactor = _mm256_set1_pd(scalingFactor); + _mm256_storeu_pd(pOut, _mm256_mul_pd(d0, vScalingFactor)); + _mm256_storeu_pd(pOut + 4, _mm256_mul_pd(d1, vScalingFactor)); + _mm256_storeu_pd(pOut + 8, _mm256_mul_pd(d2, vScalingFactor)); + _mm256_storeu_pd(pOut + 12, _mm256_mul_pd(d3, vScalingFactor)); + }; + + const auto readColorValue = [](const T* const pColor) -> T + { + if constexpr (std::is_integral::value && sizeof(T) == 4) // 32 bit integral type + return (*pColor) >> 16; + else + return *pColor; + }; + + const bool isCFA = avxInputSupport.isMonochromeCfaBitmapOfType(); + + if (avxInputSupport.isColorBitmapOfType() || isCFA) + { + AvxCfaProcessing avxCfa{ 0, 0, inputBitmap }; + if (isCFA) + { + avxCfa.init(lineStart, lineEnd); + avxCfa.interpolate(lineStart, lineEnd, 1); + } + + for (size_t row = lineStart, lineNdx = 0; row < lineEnd; ++row, ++lineNdx) + { + const T* pRedPixels = isCFA ? avxCfa.redCfaLine(lineNdx) : &avxInputSupport.redPixels().at(row * width); + const T* pGreenPixels = isCFA ? avxCfa.greenCfaLine(lineNdx) : &avxInputSupport.greenPixels().at(row * width); + const T* pBluePixels = isCFA ? avxCfa.blueCfaLine(lineNdx) : &avxInputSupport.bluePixels().at(row * width); + double* pOut = &avxOutputSupport.grayPixels().at(row * width); + + for (size_t counter = 0; counter < nrVectors; ++counter, pRedPixels += vectorLen, pGreenPixels += vectorLen, pBluePixels += vectorLen, pOut += vectorLen) + { + const auto [d0, d1, d2, d3] = colorLuminance(pRedPixels, pGreenPixels, pBluePixels); + scaleAndStoreLuminance(d0, d1, d2, d3, pOut); + } + // Remaining pixels of line. + for (size_t n = nrVectors * vectorLen; n < width; ++n, ++pRedPixels, ++pGreenPixels, ++pBluePixels, ++pOut) + { + const T red = readColorValue(pRedPixels); + const T green = readColorValue(pGreenPixels); + const T blue = readColorValue(pBluePixels); + const T minColor = std::min(std::min(red, green), blue); + const T maxColor = std::max(std::max(red, green), blue); + *pOut = (static_cast(minColor) + static_cast(maxColor)) * (0.5 * scalingFactor); + } + } + return 0; + } + + if (avxInputSupport.isMonochromeBitmapOfType()) + { + for (size_t row = lineStart; row < lineEnd; ++row) + { + const T* pGreyPixels = &avxInputSupport.grayPixels().at(row * width); + double* pOut = &avxOutputSupport.grayPixels().at(row * width); + + for (size_t counter = 0; counter < nrVectors; ++counter, pGreyPixels += vectorLen, pOut += vectorLen) + { + const auto [d0, d1, d2, d3] = greyLuminance(pGreyPixels); + scaleAndStoreLuminance(d0, d1, d2, d3, pOut); + } + // Remaining pixels of line. + for (size_t n = nrVectors * vectorLen; n < width; ++n, ++pGreyPixels, ++pOut) + { + const T grey = readColorValue(pGreyPixels); + *pOut = static_cast(grey) * scalingFactor; + } + } + return 0; + } + + return 1; +} + +template +std::tuple<__m256d, __m256d, __m256d, __m256d> AvxLuminance::colorLuminance(const T *const pRed, const T *const pGreen, const T *const pBlue) +{ + const __m256i red = AvxSupport::read16PackedShort(pRed); + const __m256i green = AvxSupport::read16PackedShort(pGreen); + const __m256i blue = AvxSupport::read16PackedShort(pBlue); + const __m256i minColor = _mm256_min_epu16(_mm256_min_epu16(red, green), blue); + const __m256i maxColor = _mm256_max_epu16(_mm256_max_epu16(red, green), blue); + const __m256i minMaxAvg = _mm256_avg_epu16(minColor, maxColor); + return AvxSupport::wordToPackedDouble(minMaxAvg); +} + +template +std::tuple<__m256d, __m256d, __m256d, __m256d> AvxLuminance::greyLuminance(const T* const pGray) +{ + const __m256i gray = AvxSupport::read16PackedShort(pGray); + return AvxSupport::wordToPackedDouble(gray); +} diff --git a/DeepSkyStacker/avx_luminance.h b/DeepSkyStackerKernel/avx_luminance.h similarity index 96% rename from DeepSkyStacker/avx_luminance.h rename to DeepSkyStackerKernel/avx_luminance.h index c2afcdfd..dcfee492 100644 --- a/DeepSkyStacker/avx_luminance.h +++ b/DeepSkyStackerKernel/avx_luminance.h @@ -1,28 +1,28 @@ -#pragma once - -#include "BitmapBase.h" - -class AvxLuminance -{ -private: - CMemoryBitmap& inputBitmap; - CMemoryBitmap& outputBitmap; - bool avxReady; -public: - AvxLuminance() = delete; - AvxLuminance(CMemoryBitmap& inputbm, CMemoryBitmap& outbm); - AvxLuminance(const AvxLuminance&) = default; - AvxLuminance(AvxLuminance&&) = delete; - AvxLuminance& operator=(const AvxLuminance&) = delete; - - int computeLuminanceBitmap(const size_t lineStart, const size_t lineEnd); -private: - template - int doComputeLuminance(const size_t lineStart, const size_t lineEnd); - - template - static std::tuple<__m256d, __m256d, __m256d, __m256d> colorLuminance(const T *const pRed, const T *const pGreen, const T *const pBlue); - - template - static std::tuple<__m256d, __m256d, __m256d, __m256d> greyLuminance(const T* const pGray); -}; +#pragma once + +#include "BitmapBase.h" + +class AvxLuminance +{ +private: + CMemoryBitmap& inputBitmap; + CMemoryBitmap& outputBitmap; + bool avxReady; +public: + AvxLuminance() = delete; + AvxLuminance(CMemoryBitmap& inputbm, CMemoryBitmap& outbm); + AvxLuminance(const AvxLuminance&) = default; + AvxLuminance(AvxLuminance&&) = delete; + AvxLuminance& operator=(const AvxLuminance&) = delete; + + int computeLuminanceBitmap(const size_t lineStart, const size_t lineEnd); +private: + template + int doComputeLuminance(const size_t lineStart, const size_t lineEnd); + + template + static std::tuple<__m256d, __m256d, __m256d, __m256d> colorLuminance(const T *const pRed, const T *const pGreen, const T *const pBlue); + + template + static std::tuple<__m256d, __m256d, __m256d, __m256d> greyLuminance(const T* const pGray); +}; diff --git a/DeepSkyStacker/avx_median.h b/DeepSkyStackerKernel/avx_median.h similarity index 95% rename from DeepSkyStacker/avx_median.h rename to DeepSkyStackerKernel/avx_median.h index 5638660f..3607d0bf 100644 --- a/DeepSkyStacker/avx_median.h +++ b/DeepSkyStackerKernel/avx_median.h @@ -1,84 +1,84 @@ -#pragma once - -namespace Medianhelper -{ - - template - inline void swap(T* a, T* b) - { - T temp = *a; - *a = *b; - *b = temp; - } - - // Returns the index i of the pivot element, so that (arr[n] < pivot) for (n < i). - template - inline int partition(T arr[], const int l, const int r) - { - const T pivot = arr[r]; - int i = l, j = l; - while (j < r) - { - if (arr[j++] < pivot) - i++; - else - break; - } - while (j < r) - { - if (arr[j] < pivot) - { - swap(&arr[i], &arr[j]); - i++; - } - j++; - } - swap(&arr[i], &arr[r]); - return i; - } - - template - inline void MedianUtil(T arr[], const int l, const int r, const int quantile, T& a, T& b, bool& x, bool& y) - { - if (l <= r) - { - int partitionIndex = partition(arr, l, r); // Left of partitionIndex are only smaller elements. - - // If partion index = quantile, then we found the median of odd number element in arr[] - if (partitionIndex == quantile) - { - b = arr[partitionIndex]; - y = true; - if (x) - return; - } - // If index = quantile - 1, then we get a & b as middle element of arr[] - else if (partitionIndex == quantile - 1) - { - a = arr[partitionIndex]; - x = true; - if (y) - return; - } - - // If partitionIndex >= quantile then find the index in first half of arr[] - if (partitionIndex >= quantile) - return MedianUtil(arr, l, partitionIndex - 1, quantile, a, b, x, y); - // If partitionIndex <= quantile then find the index in second half of arr[] - else - return MedianUtil(arr, partitionIndex + 1, r, quantile, a, b, x, y); - } - } - -}; - -template -inline T qMedian(T arr[], const int n, const int quantile) -{ - T a, b; - bool x = false, y = false; - Medianhelper::MedianUtil(arr, 0, n - 1, quantile, a, b, x, y); - - // If n is even -> (a+b)/2 ELSE b - return ((n & 1) == 0) ? ((a + b) / 2) : b; -} +#pragma once + +namespace Medianhelper +{ + + template + inline void swap(T* a, T* b) + { + T temp = *a; + *a = *b; + *b = temp; + } + + // Returns the index i of the pivot element, so that (arr[n] < pivot) for (n < i). + template + inline int partition(T arr[], const int l, const int r) + { + const T pivot = arr[r]; + int i = l, j = l; + while (j < r) + { + if (arr[j++] < pivot) + i++; + else + break; + } + while (j < r) + { + if (arr[j] < pivot) + { + swap(&arr[i], &arr[j]); + i++; + } + j++; + } + swap(&arr[i], &arr[r]); + return i; + } + + template + inline void MedianUtil(T arr[], const int l, const int r, const int quantile, T& a, T& b, bool& x, bool& y) + { + if (l <= r) + { + int partitionIndex = partition(arr, l, r); // Left of partitionIndex are only smaller elements. + + // If partion index = quantile, then we found the median of odd number element in arr[] + if (partitionIndex == quantile) + { + b = arr[partitionIndex]; + y = true; + if (x) + return; + } + // If index = quantile - 1, then we get a & b as middle element of arr[] + else if (partitionIndex == quantile - 1) + { + a = arr[partitionIndex]; + x = true; + if (y) + return; + } + + // If partitionIndex >= quantile then find the index in first half of arr[] + if (partitionIndex >= quantile) + return MedianUtil(arr, l, partitionIndex - 1, quantile, a, b, x, y); + // If partitionIndex <= quantile then find the index in second half of arr[] + else + return MedianUtil(arr, partitionIndex + 1, r, quantile, a, b, x, y); + } + } + +}; + +template +inline T qMedian(T arr[], const int n, const int quantile) +{ + T a, b; + bool x = false, y = false; + Medianhelper::MedianUtil(arr, 0, n - 1, quantile, a, b, x, y); + + // If n is even -> (a+b)/2 ELSE b + return ((n & 1) == 0) ? ((a + b) / 2) : b; +} diff --git a/DeepSkyStacker/avx_output.cpp b/DeepSkyStackerKernel/avx_output.cpp similarity index 97% rename from DeepSkyStacker/avx_output.cpp rename to DeepSkyStackerKernel/avx_output.cpp index 1f031334..7c75c5ec 100644 --- a/DeepSkyStacker/avx_output.cpp +++ b/DeepSkyStackerKernel/avx_output.cpp @@ -1,572 +1,572 @@ -#include "stdafx.h" -#include "avx_output.h" -#include "avx_support.h" -#include "avx_median.h" -#include "MultiBitmap.h" -#include "ColorMultiBitmap.h" -#include "GreyMultiBitmap.h" - -AvxOutputComposition::AvxOutputComposition(CMultiBitmap& mBitmap, CMemoryBitmap& outputbm) : - inputBitmap{ mBitmap }, - outputBitmap{ outputbm }, - avxReady{ true } -{ - if (!AvxSupport::checkSimdAvailability()) - avxReady = false; - // Homogenization not implemented with AVX - if (inputBitmap.GetHomogenization()) - avxReady = false; - // Output must be float values - if (AvxSupport{outputBitmap}.bitmapHasCorrectType() == false) - avxReady = false; -} - -template -static bool AvxOutputComposition::bitmapColorOrGray(const CMultiBitmap& bitmap) noexcept -{ - return - (dynamic_cast*>(&bitmap) != nullptr) || // dynamic_cast for pointers does not throw - (dynamic_cast*>(&bitmap) != nullptr); // (for references it could). -} - -template -inline static float AvxOutputComposition::convertToFloat(const T value) noexcept -{ - if constexpr (std::is_integral_v && sizeof(T) == 4) // 32 bit integral type - return static_cast(value >> 16); - else - return static_cast(value); -} - -int AvxOutputComposition::compose(const int line, std::vector const& lineAddresses) -{ - if (!avxReady) - return 1; - // If this is not equal, something went wrong and we cannot continue without risking access violations. - if (lineAddresses.size() != inputBitmap.GetNrAddedBitmaps()) - return 1; - // No line addresses? - if (lineAddresses.empty()) - return 1; - - int rval = 2; - switch (inputBitmap.GetProcessingMethod()) - { - case MBP_MEDIAN: rval = processMedianKappaSigma(line, lineAddresses); break; - case MBP_SIGMACLIP: rval = processMedianKappaSigma(line, lineAddresses); break; - case MBP_AUTOADAPTIVE: rval = processAutoAdaptiveWeightedAverage(line, lineAddresses); break; - case MBP_MEDIANSIGMACLIP: rval = processMedianKappaSigma(line, lineAddresses); break; - default: rval = 2; break; - } - - return AvxSupport::zeroUpper(rval); -} - -template -int AvxOutputComposition::processMedianKappaSigma(const int line, std::vector const& lineAddresses) -{ - if (doProcessMedianKappaSigma(line, lineAddresses) == 0) - return 0; - if (doProcessMedianKappaSigma(line, lineAddresses) == 0) - return 0; - if (doProcessMedianKappaSigma(line, lineAddresses) == 0) - return 0; - return 1; -} - -#pragma warning( push ) -#pragma warning( disable : 4324 ) // Structure was padded -#pragma warning( disable : 4100 ) // Unreferenced variable - -template -int AvxOutputComposition::doProcessMedianKappaSigma(const int line, std::vector const& lineAddresses) -{ - static_assert(std::is_same_v || (std::is_integral_v && std::is_unsigned_v)); - - // CMultiBitmap - template: Input must be of type T, and output type must be float. - if (bitmapColorOrGray(inputBitmap) == false) - return 1; - - const auto parameters = inputBitmap.GetProcessingParameters(); - - const int width = outputBitmap.RealWidth(); - const int nrVectors = width / 16; - const int nrLightframes = static_cast(lineAddresses.size()); - - const auto accumulateSquared = [](const __m256d accumulator, const __m128 colorValue) noexcept -> __m256d - { - const __m256d pd = _mm256_cvtps_pd(colorValue); - return _mm256_fmadd_pd(pd, pd, accumulator); - }; - const auto sigma = [](const __m256 sum, const __m256d sumSqLo, const __m256d sumSqHi, const __m256 N) -> __m256 - { - // Sigma� = sumSquared / N - �� = 1/N * (sumSquared - sum� / N) - const __m256d Nlo = _mm256_cvtps_pd(_mm256_extractf128_ps(N, 0)); - const __m256d Nhi = _mm256_cvtps_pd(_mm256_extractf128_ps(N, 1)); - const __m256d sumLo = _mm256_cvtps_pd(_mm256_extractf128_ps(sum, 0)); - const __m256d sumHi = _mm256_cvtps_pd(_mm256_extractf128_ps(sum, 1)); - const __m256d sigmaSqLoN = _mm256_sub_pd(sumSqLo, _mm256_div_pd(_mm256_mul_pd(sumLo, sumLo), Nlo)); - const __m256d sigmaSqHiN = _mm256_sub_pd(sumSqHi, _mm256_div_pd(_mm256_mul_pd(sumHi, sumHi), Nhi)); - const __m256 sigmaSqN = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm256_cvtpd_ps(sigmaSqLoN)), _mm256_cvtpd_ps(sigmaSqHiN), 1); - return _mm256_sqrt_ps(_mm256_div_ps(sigmaSqN, N)); - }; - const __m256 kappa = _mm256_set1_ps(static_cast(std::get<0>(parameters))); - - - // ************* Median ************* - - std::vector medianData(nrLightframes * size_t{ 16 }); // 16 pixels x nLightframes - int sizes[16]; // Nr of lightframes != 0 for the 16 pixels - - const auto initMedianData = [&lineAddresses, &medianData, &sizes, nrLightframes](const size_t offset, const int nPixels) -> void - { - memset(medianData.data(), 0, medianData.size() * sizeof(T)); - memset(sizes, 0, sizeof(sizes)); - - std::for_each(lineAddresses.cbegin(), lineAddresses.cend(), [&medianData, &sizes, offset, nPixels, nrLightframes](const void* const p) - { - constexpr T zero = T{ 0 }; - const T* const pT = static_cast(p) + offset; - for (int n = 0; n < nPixels; ++n) // nPixels is 1..16 - { - auto& N = sizes[n]; - T element = pT[n]; - if constexpr (std::is_same_v) // First divide by scaling factor, then compare with zero. - element >>= 16; - if (element != zero) // Copy all lightframe values that are != 0. - medianData[n * nrLightframes + (N++)] = element; - } - }); - }; - - const auto quickMedian = [&medianData, &sizes, nrLightframes](const size_t pixelIndex, const float lBound, const float uBound, const float currMedian) -> float - { - const auto N = sizes[pixelIndex]; - if (N == 0) - return 0.0f; - - const T lowerBound = static_cast(lBound); - const T upperBound = static_cast(uBound); - const T currentMedian = static_cast(currMedian); - T* const pData = medianData.data() + pixelIndex * nrLightframes; - - if constexpr (std::is_same::value) - { - for (int n = 0; n < N / 8; ++n) - { - const __m128i v = _mm_loadu_si128(((const __m128i*)pData) + n); - const __m256i v32 = _mm256_cvtepu16_epi32(v); - const __m256i outOfRange = _mm256_or_si256(_mm256_cmpgt_epi32(_mm256_set1_epi32(lowerBound), v32), _mm256_cmpgt_epi32(v32, _mm256_set1_epi32(upperBound))); - const __m256i vCorrected = _mm256_blendv_epi8(v32, _mm256_set1_epi32(currentMedian), outOfRange); - _mm_storeu_si128(((__m128i*)pData) + n, AvxSupport::cvtEpi32Epu16(vCorrected)); - } - for (int n = (N / 8) * 8; n < N; ++n) - if (pData[n] < lowerBound || pData[n] > upperBound) - pData[n] = currentMedian; - } - else // uint32, float - { - for (int n = 0; n < N; ++n) // All lightframes != 0 - if (pData[n] < lowerBound || pData[n] > upperBound) - pData[n] = currentMedian; - } - - return static_cast(qMedian(pData, N, N / 2)); - }; - - const auto vectorMedian = [&quickMedian](__m256& loMedian, __m256& hiMedian, const __m256 loLoBound, const __m256 hiLoBound, const __m256 loHiBound, const __m256 hiHiBound) -> void - { - for (size_t n = 0; n < 8; ++n) - loMedian.m256_f32[n] = quickMedian(n, loLoBound.m256_f32[n], loHiBound.m256_f32[n], loMedian.m256_f32[n]); - for (size_t n = 0; n < 8; ++n) - hiMedian.m256_f32[n] = quickMedian(n + 8, hiLoBound.m256_f32[n], hiHiBound.m256_f32[n], hiMedian.m256_f32[n]); - }; - - - // ************* Updater for the data sequences ************* - - const auto zeroVectorUpdater = [](__m256& lo8, __m256& hi8, __m256& loN, __m256& hiN, const __m256 loOutOfRangeMask, const __m256 hiOutOfRangeMask) -> void - { - lo8 = _mm256_andnot_ps(loOutOfRangeMask, lo8); // Set to zero where value is outside my +- kappa * sigma. - hi8 = _mm256_andnot_ps(hiOutOfRangeMask, hi8); - loN = _mm256_add_ps(loN, _mm256_and_ps(_mm256_set1_ps(1.0f), _mm256_cmp_ps(lo8, _mm256_setzero_ps(), 12))); // if (value != 0) ++N; (12 means: not equal) - hiN = _mm256_add_ps(hiN, _mm256_and_ps(_mm256_set1_ps(1.0f), _mm256_cmp_ps(hi8, _mm256_setzero_ps(), 12))); - }; - const auto zeroScalarUpdater = [](float& value, float& N, const bool outOfRange) -> void - { - if (value == 0.0f) - return; - if (outOfRange) - value = 0.0f; - else - ++N; // If value != 0 -> ++N - }; -/* const auto medianVectorUpdater = [](__m256& lo8, __m256& hi8, __m256& loN, __m256& hiN, const __m256 loMedian, const __m256 hiMedian, const __m256 loOutOfRangeMask, const __m256 hiOutOfRangeMask) -> void - { - const __m256 m0 = _mm256_cmp_ps(lo8, _mm256_setzero_ps(), 12); // != 0 - const __m256 m1 = _mm256_cmp_ps(hi8, _mm256_setzero_ps(), 12); - lo8 = _mm256_and_ps(_mm256_blendv_ps(lo8, loMedian, loOutOfRangeMask), m0); // Set to median where value is out of range (outside my +- kappa * sigma). - hi8 = _mm256_and_ps(_mm256_blendv_ps(hi8, hiMedian, hiOutOfRangeMask), m1); // If value was zero -> reset it to zero again. - loN = _mm256_add_ps(loN, _mm256_and_ps(_mm256_set1_ps(1.0f), m0)); // if (value != 0) ++N; - hiN = _mm256_add_ps(hiN, _mm256_and_ps(_mm256_set1_ps(1.0f), m1)); - }; - const auto medianScalarUpdater = [](float& value, float& N, const float median, const bool outOfRange) -> void - { - if (value == 0.0f) - return; - if (outOfRange) - value = median; - ++N; - }; -*/ - - constexpr const auto initialUpperBound = []() -> float - { - if constexpr (std::is_floating_point_v) - return static_cast(std::numeric_limits::max()); - else - return static_cast(std::numeric_limits::max()); // We use 65535 for all integers - }; - - // ************* Loops ************* - - const auto kappaSigmaLoop = [&](float* pOut, const int colorOffset) -> void - { - const size_t outputWidth = outputBitmap.Width(); - - for (int counter = 0; counter < nrVectors; ++counter, pOut += 16) - { - __m256 lowerBound1{ _mm256_setzero_ps() }; - __m256 lowerBound2{ _mm256_setzero_ps() }; - __m256 upperBound1{ _mm256_set1_ps(initialUpperBound()) }; - __m256 upperBound2{ _mm256_set1_ps(initialUpperBound()) }; - __m256 my1{ _mm256_undefined_ps() }; - __m256 my2{ _mm256_undefined_ps() }; - __m256 loMedian = _mm256_undefined_ps(); - __m256 hiMedian = _mm256_undefined_ps(); - - if constexpr (Method == MedianKappaSigma) - initMedianData(counter * size_t{ 16 } + colorOffset, 16); - - for (int iteration = 0; iteration < std::get<1>(parameters); ++iteration) - { - __m256 sum1 = _mm256_setzero_ps(); - __m256 sum2 = _mm256_setzero_ps(); - __m256 N1 = _mm256_setzero_ps(); - __m256 N2 = _mm256_setzero_ps(); - __m256d sumSq1 = _mm256_setzero_pd(); - __m256d sumSq2 = _mm256_setzero_pd(); - __m256d sumSq3 = _mm256_setzero_pd(); - __m256d sumSq4 = _mm256_setzero_pd(); - - if constexpr (Method == MedianKappaSigma) - { - vectorMedian(loMedian, hiMedian, lowerBound1, lowerBound2, upperBound1, upperBound2); - N1 = _mm256_cvtepi32_ps(_mm256_loadu_si256((const __m256i*)&sizes[0])); - N2 = _mm256_cvtepi32_ps(_mm256_loadu_si256((const __m256i*)&sizes[8])); - } - - // Loop over the light frames -// for (auto frameAddress : lineAddresses) - for (int lightFrame = 0; lightFrame < nrLightframes; ++lightFrame) - { - __m256 lo8, hi8; - if constexpr (Method == MedianKappaSigma) - { - const T* const pColor = medianData.data() + lightFrame; - const auto [l, h] = AvxSupport::read16PackedSingleStride(pColor, nrLightframes); - lo8 = l; - hi8 = h; - } - if constexpr (Method == KappaSigma) - { - const T* const pColor = static_cast(lineAddresses[lightFrame]) + counter * 16ULL + colorOffset; - const auto [l, h] = AvxSupport::read16PackedSingle(pColor); - lo8 = l; - hi8 = h; - const __m256 outOfRange1 = _mm256_or_ps(_mm256_cmp_ps(lo8, lowerBound1, 17), _mm256_cmp_ps(lo8, upperBound1, 30)); // 17: _CMP_LT_OQ, 30: _CMP_GT_OQ (x < lo OR x > hi) - const __m256 outOfRange2 = _mm256_or_ps(_mm256_cmp_ps(hi8, lowerBound2, 17), _mm256_cmp_ps(hi8, upperBound2, 30)); - zeroVectorUpdater(lo8, hi8, N1, N2, outOfRange1, outOfRange2); - } - - sum1 = _mm256_add_ps(sum1, lo8); - sum2 = _mm256_add_ps(sum2, hi8); - sumSq1 = accumulateSquared(sumSq1, _mm256_castps256_ps128(lo8)); - sumSq2 = accumulateSquared(sumSq2, _mm256_extractf128_ps(lo8, 1)); - sumSq3 = accumulateSquared(sumSq3, _mm256_castps256_ps128(hi8)); - sumSq4 = accumulateSquared(sumSq4, _mm256_extractf128_ps(hi8, 1)); - } - // Calc the new averages - const __m256 noValuesMask1 = _mm256_cmp_ps(N1, _mm256_setzero_ps(), 0); - const __m256 noValuesMask2 = _mm256_cmp_ps(N2, _mm256_setzero_ps(), 0); - my1 = _mm256_blendv_ps(_mm256_div_ps(sum1, N1), _mm256_setzero_ps(), noValuesMask1); // Low 8 floats. Set 0 where N==0. - my2 = _mm256_blendv_ps(_mm256_div_ps(sum2, N2), _mm256_setzero_ps(), noValuesMask2); // Hi 8 floats. Set 0 where N==0. - // Update lower and upper bound with new � +- kappa * sigma - const __m256 sigma1 = sigma(sum1, sumSq1, sumSq2, N1); - const __m256 sigma2 = sigma(sum2, sumSq3, sumSq4, N2); - upperBound1 = _mm256_blendv_ps(_mm256_fmadd_ps(sigma1, kappa, my1), _mm256_setzero_ps(), noValuesMask1); // Set 0 where N==0. - upperBound2 = _mm256_blendv_ps(_mm256_fmadd_ps(sigma2, kappa, my2), _mm256_setzero_ps(), noValuesMask2); - lowerBound1 = _mm256_blendv_ps(_mm256_fnmadd_ps(sigma1, kappa, my1), _mm256_set1_ps(1.0f), noValuesMask1); // Set 1 where N==0. - lowerBound2 = _mm256_blendv_ps(_mm256_fnmadd_ps(sigma2, kappa, my2), _mm256_set1_ps(1.0f), noValuesMask2); - } - _mm256_storeu_ps(pOut + static_cast(line) * outputWidth, my1); - _mm256_storeu_ps(pOut + 8 + static_cast(line) * outputWidth, my2); - } - // Rest of line - for (int n = nrVectors * 16; n < width; ++n, ++pOut) - { - float lowerBound = 1.0f; - float upperBound = static_cast(std::numeric_limits::max()); - float my = 0.0f; -#pragma warning (suppress: 4189) - float median = 0.0f; - - if constexpr (Method == MedianKappaSigma) - initMedianData(n + colorOffset, 1); // 1 = only 1 pixel. - - for (int iteration = 0; iteration < std::get<1>(parameters); ++iteration) - { - float sum{ 0.0f }; - float N{ 0.0f }; - float sumSq{ 0.0f }; - - if constexpr (Method == MedianKappaSigma) - { - median = quickMedian(0, lowerBound, upperBound, median); - N = static_cast(sizes[0]); - } - - for (int lightFrame = 0; lightFrame < nrLightframes; ++lightFrame) - { - float colorValue; - if constexpr (Method == MedianKappaSigma) - { - const T* const pColor = medianData.data() + lightFrame; - colorValue = *pColor; - } - if constexpr (Method == KappaSigma) - { - const T* const pColor = static_cast(lineAddresses[lightFrame]) + n + colorOffset; - colorValue = convertToFloat(*pColor); - zeroScalarUpdater(colorValue, N, colorValue < lowerBound || colorValue > upperBound); - } - - sum += colorValue; - sumSq += colorValue * colorValue; - } - my = (N == 0.0f ? 0.0f : (sum / N)); - if (N == 0.0f || N == static_cast(nrLightframes)) - break; - const float sig = sqrtf(sumSq / N - my * my); - const float sigmakappa = sig * static_cast(std::get<0>(parameters)); - lowerBound = my - sigmakappa; - upperBound = my + sigmakappa; - } - *(pOut + static_cast(line) * outputWidth) = my; - } - }; - - const auto medianLoop = [&](float* pOut, const int colorOffset) -> void - { - const size_t outputWidth = outputBitmap.Width(); - const float uBound = initialUpperBound(); - - for (int counter = 0; counter < nrVectors; ++counter, pOut += 16) - { - __m256 loMedian = _mm256_setzero_ps(); - __m256 hiMedian = _mm256_setzero_ps(); - - initMedianData(counter * size_t{ 16 } + colorOffset, 16); - vectorMedian(loMedian, hiMedian, _mm256_set1_ps(0.0f), _mm256_set1_ps(0.0f), _mm256_set1_ps(uBound), _mm256_set1_ps(uBound)); - _mm256_storeu_ps(pOut + static_cast(line) * outputWidth, loMedian); - _mm256_storeu_ps(pOut + 8 + static_cast(line) * outputWidth, hiMedian); - } - // Rest of line - for (int n = nrVectors * 16; n < width; ++n, ++pOut) - { - initMedianData(n + colorOffset, 1); - const float median = quickMedian(0, 0.0f, uBound, 0.0f); - *(pOut + static_cast(line) * outputWidth) = median; - } - }; - - const auto methodSelectorLoop = [&](float* pOut, const int colorOffset) -> void - { - if constexpr (Method == MedianOnly) - medianLoop(pOut, colorOffset); - else - kappaSigmaLoop(pOut, colorOffset); - }; - - if (auto pOutputBitmap{ dynamic_cast(&outputBitmap) }) - { - methodSelectorLoop(&*pOutputBitmap->m_Red.m_vPixels.begin(), 0); - methodSelectorLoop(&*pOutputBitmap->m_Green.m_vPixels.begin(), width); - methodSelectorLoop(&*pOutputBitmap->m_Blue.m_vPixels.begin(), width * 2); - return 0; - } - if (auto pOutputBitmap{ dynamic_cast(&outputBitmap) }) - { - methodSelectorLoop(&*pOutputBitmap->m_vPixels.begin(), 0); - return 0; - } - - // Neither gray (1 float) nor color (3 floats). - return 1; -} - - -int AvxOutputComposition::processAutoAdaptiveWeightedAverage(const int line, std::vector const& lineAddresses) -{ - if (doProcessAutoAdaptiveWeightedAverage(line, lineAddresses) == 0) - return 0; - if (doProcessAutoAdaptiveWeightedAverage(line, lineAddresses) == 0) - return 0; - if (doProcessAutoAdaptiveWeightedAverage(line, lineAddresses) == 0) - return 0; - return 1; -} - - -template -int AvxOutputComposition::doProcessAutoAdaptiveWeightedAverage(const int line, std::vector const& lineAddresses) -{ - // CMultiBitmap - template: Input must be of type T, and output type must be float. - if (bitmapColorOrGray(inputBitmap) == false) - return 1; - - const int nIterations = std::get<1>(inputBitmap.GetProcessingParameters()); - const int width = outputBitmap.RealWidth(); - const int nrVectors = width / 16; - const __m256 N = _mm256_set1_ps(static_cast(lineAddresses.size())); - const size_t outputWidth = outputBitmap.Width(); - - const auto autoAdaptLoop = [line, &lineAddresses, nIterations, width, nrVectors, N, outputWidth](float* pOut, const int colorOffset) -> void - { - // Loop over the pixels of the row, process 16 at a time. - for (int counter = 0; counter < nrVectors; ++counter, pOut += 16) - { - __m256 my1 = _mm256_setzero_ps(); - __m256 my2 = _mm256_setzero_ps(); - - // Calculate initial (unweighted) mean. - for (auto frameAddress : lineAddresses) - { - const T *const pColor = static_cast(frameAddress) + counter * 16ULL + colorOffset; - const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); - my1 = _mm256_add_ps(my1, lo8); - my2 = _mm256_add_ps(my2, hi8); - } - my1 = _mm256_div_ps(my1, N); // N != 0 guaranteed - my2 = _mm256_div_ps(my2, N); - - for (int iteration = 0; iteration < nIterations; ++iteration) - { - __m256 S1 = _mm256_setzero_ps(); - __m256 S2 = _mm256_setzero_ps(); - - // Calculate sigma� related to � of last iteration. - for (auto frameAddress : lineAddresses) - { - const T *const pColor = static_cast(frameAddress) + counter * 16ULL + colorOffset; - const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); - const __m256 d1 = _mm256_sub_ps(lo8, my1); - const __m256 d2 = _mm256_sub_ps(hi8, my2); - S1 = _mm256_fmadd_ps(d1, d1, S1); // Sum of (x-�)� - S2 = _mm256_fmadd_ps(d2, d2, S2); - } - const __m256 sigmaSq1 = _mm256_div_ps(S1, N); // sigma� = sum(x-�)� / N - const __m256 sigmaSq2 = _mm256_div_ps(S2, N); - - // Calculate new � using current sigma�. - __m256 W1 = _mm256_setzero_ps(); - __m256 W2 = _mm256_setzero_ps(); - S1 = _mm256_setzero_ps(); - S2 = _mm256_setzero_ps(); - for (auto frameAddress : lineAddresses) - { - const T *const pColor = static_cast(frameAddress) + counter * 16ULL + colorOffset; - const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); - const __m256 d1 = _mm256_sub_ps(lo8, my1); // x-� - const __m256 d2 = _mm256_sub_ps(hi8, my2); - const __m256 denominator1 = _mm256_fmadd_ps(d1, d1, sigmaSq1); // sigma� + (x-�)� - const __m256 denominator2 = _mm256_fmadd_ps(d2, d2, sigmaSq2); - const __m256 weight1 = _mm256_blendv_ps(_mm256_div_ps(sigmaSq1, denominator1), _mm256_set1_ps(1.0f), _mm256_cmp_ps(denominator1, _mm256_setzero_ps(), 0)); // sigma� / (sigma� + (x-�)�) = 1 / (1 + (x-�)�/sigma�) - const __m256 weight2 = _mm256_blendv_ps(_mm256_div_ps(sigmaSq2, denominator2), _mm256_set1_ps(1.0f), _mm256_cmp_ps(denominator2, _mm256_setzero_ps(), 0)); // Set weight to 1 when sigma==0. - W1 = _mm256_add_ps(W1, weight1); // W = sum(weights) - W2 = _mm256_add_ps(W2, weight2); - S1 = _mm256_fmadd_ps(lo8, weight1, S1); // S = sum(x * weight) - S2 = _mm256_fmadd_ps(hi8, weight2, S2); - } - - my1 = _mm256_div_ps(S1, W1); // W == 0 (sum of weights) cannot happen. - my2 = _mm256_div_ps(S2, W2); - } - _mm256_storeu_ps(pOut + static_cast(line) * outputWidth, my1); - _mm256_storeu_ps(pOut + 8 + static_cast(line) * outputWidth, my2); - } - - // Rest of line - const float nLineAddresses = static_cast(lineAddresses.size()); - for (int n = nrVectors * 16; n < width; ++n, ++pOut) - { - float my = 0.0f; - // Calculate initial (unweighted) mean. - for (auto frameAddress : lineAddresses) - { - const T *const pColor = static_cast(frameAddress) + n + colorOffset; - my += convertToFloat(*pColor); - } - my /= nLineAddresses; - - for (int iteration = 0; iteration < nIterations; ++iteration) - { - float S{ 0.0f }; - - // Calculate sigma� related to � of last iteration. - for (auto frameAddress : lineAddresses) - { - const T *const pColor = static_cast(frameAddress) + n + colorOffset; - const float d = convertToFloat(*pColor) - my; - S += (d * d); - } - const float sigmaSq = S / nLineAddresses; - - // Calculate new � using current sigma�. - float W{ 0.0f }; - S = 0.0f; - for (auto frameAddress : lineAddresses) - { - const T *const pColor = static_cast(frameAddress) + n + colorOffset; - const float color = convertToFloat(*pColor); - const float d = color - my; - const float denominator = sigmaSq + d * d; - const float w = denominator == 0.0f ? 1.0f : (sigmaSq / denominator); - W += w; - S += color * w; - } - my = S / W; // W cannot be zero. - } - - *(pOut + static_cast(line) * outputWidth) = my; - } - }; - - if (auto pOutputBitmap{ dynamic_cast(&outputBitmap) }) - { - autoAdaptLoop(&*pOutputBitmap->m_Red.m_vPixels.begin(), 0); - autoAdaptLoop(&*pOutputBitmap->m_Green.m_vPixels.begin(), width); - autoAdaptLoop(&*pOutputBitmap->m_Blue.m_vPixels.begin(), width * 2); - return 0; - } - if (auto pOutputBitmap{ dynamic_cast(&outputBitmap) }) - { - autoAdaptLoop(&*pOutputBitmap->m_vPixels.begin(), 0); - return 0; - } - - // Neither gray (1 float) nor color (3 floats). - return 1; -} - -#pragma warning( pop ) +#include "stdafx.h" +#include "avx_output.h" +#include "avx_support.h" +#include "avx_median.h" +#include "MultiBitmap.h" +#include "ColorMultiBitmap.h" +#include "GreyMultiBitmap.h" + +AvxOutputComposition::AvxOutputComposition(CMultiBitmap& mBitmap, CMemoryBitmap& outputbm) : + inputBitmap{ mBitmap }, + outputBitmap{ outputbm }, + avxReady{ true } +{ + if (!AvxSupport::checkSimdAvailability()) + avxReady = false; + // Homogenization not implemented with AVX + if (inputBitmap.GetHomogenization()) + avxReady = false; + // Output must be float values + if (AvxSupport{outputBitmap}.bitmapHasCorrectType() == false) + avxReady = false; +} + +template +static bool AvxOutputComposition::bitmapColorOrGray(const CMultiBitmap& bitmap) noexcept +{ + return + (dynamic_cast*>(&bitmap) != nullptr) || // dynamic_cast for pointers does not throw + (dynamic_cast*>(&bitmap) != nullptr); // (for references it could). +} + +template +inline static float AvxOutputComposition::convertToFloat(const T value) noexcept +{ + if constexpr (std::is_integral_v && sizeof(T) == 4) // 32 bit integral type + return static_cast(value >> 16); + else + return static_cast(value); +} + +int AvxOutputComposition::compose(const int line, std::vector const& lineAddresses) +{ + if (!avxReady) + return 1; + // If this is not equal, something went wrong and we cannot continue without risking access violations. + if (lineAddresses.size() != inputBitmap.GetNrAddedBitmaps()) + return 1; + // No line addresses? + if (lineAddresses.empty()) + return 1; + + int rval = 2; + switch (inputBitmap.GetProcessingMethod()) + { + case MBP_MEDIAN: rval = processMedianKappaSigma(line, lineAddresses); break; + case MBP_SIGMACLIP: rval = processMedianKappaSigma(line, lineAddresses); break; + case MBP_AUTOADAPTIVE: rval = processAutoAdaptiveWeightedAverage(line, lineAddresses); break; + case MBP_MEDIANSIGMACLIP: rval = processMedianKappaSigma(line, lineAddresses); break; + default: rval = 2; break; + } + + return AvxSupport::zeroUpper(rval); +} + +template +int AvxOutputComposition::processMedianKappaSigma(const int line, std::vector const& lineAddresses) +{ + if (doProcessMedianKappaSigma(line, lineAddresses) == 0) + return 0; + if (doProcessMedianKappaSigma(line, lineAddresses) == 0) + return 0; + if (doProcessMedianKappaSigma(line, lineAddresses) == 0) + return 0; + return 1; +} + +#pragma warning( push ) +#pragma warning( disable : 4324 ) // Structure was padded +#pragma warning( disable : 4100 ) // Unreferenced variable + +template +int AvxOutputComposition::doProcessMedianKappaSigma(const int line, std::vector const& lineAddresses) +{ + static_assert(std::is_same_v || (std::is_integral_v && std::is_unsigned_v)); + + // CMultiBitmap - template: Input must be of type T, and output type must be float. + if (bitmapColorOrGray(inputBitmap) == false) + return 1; + + const auto parameters = inputBitmap.GetProcessingParameters(); + + const int width = outputBitmap.RealWidth(); + const int nrVectors = width / 16; + const int nrLightframes = static_cast(lineAddresses.size()); + + const auto accumulateSquared = [](const __m256d accumulator, const __m128 colorValue) noexcept -> __m256d + { + const __m256d pd = _mm256_cvtps_pd(colorValue); + return _mm256_fmadd_pd(pd, pd, accumulator); + }; + const auto sigma = [](const __m256 sum, const __m256d sumSqLo, const __m256d sumSqHi, const __m256 N) -> __m256 + { + // Sigma� = sumSquared / N - �� = 1/N * (sumSquared - sum� / N) + const __m256d Nlo = _mm256_cvtps_pd(_mm256_extractf128_ps(N, 0)); + const __m256d Nhi = _mm256_cvtps_pd(_mm256_extractf128_ps(N, 1)); + const __m256d sumLo = _mm256_cvtps_pd(_mm256_extractf128_ps(sum, 0)); + const __m256d sumHi = _mm256_cvtps_pd(_mm256_extractf128_ps(sum, 1)); + const __m256d sigmaSqLoN = _mm256_sub_pd(sumSqLo, _mm256_div_pd(_mm256_mul_pd(sumLo, sumLo), Nlo)); + const __m256d sigmaSqHiN = _mm256_sub_pd(sumSqHi, _mm256_div_pd(_mm256_mul_pd(sumHi, sumHi), Nhi)); + const __m256 sigmaSqN = _mm256_insertf128_ps(_mm256_castps128_ps256(_mm256_cvtpd_ps(sigmaSqLoN)), _mm256_cvtpd_ps(sigmaSqHiN), 1); + return _mm256_sqrt_ps(_mm256_div_ps(sigmaSqN, N)); + }; + const __m256 kappa = _mm256_set1_ps(static_cast(std::get<0>(parameters))); + + + // ************* Median ************* + + std::vector medianData(nrLightframes * size_t{ 16 }); // 16 pixels x nLightframes + int sizes[16]; // Nr of lightframes != 0 for the 16 pixels + + const auto initMedianData = [&lineAddresses, &medianData, &sizes, nrLightframes](const size_t offset, const int nPixels) -> void + { + memset(medianData.data(), 0, medianData.size() * sizeof(T)); + memset(sizes, 0, sizeof(sizes)); + + std::for_each(lineAddresses.cbegin(), lineAddresses.cend(), [&medianData, &sizes, offset, nPixels, nrLightframes](const void* const p) + { + constexpr T zero = T{ 0 }; + const T* const pT = static_cast(p) + offset; + for (int n = 0; n < nPixels; ++n) // nPixels is 1..16 + { + auto& N = sizes[n]; + T element = pT[n]; + if constexpr (std::is_same_v) // First divide by scaling factor, then compare with zero. + element >>= 16; + if (element != zero) // Copy all lightframe values that are != 0. + medianData[n * nrLightframes + (N++)] = element; + } + }); + }; + + const auto quickMedian = [&medianData, &sizes, nrLightframes](const size_t pixelIndex, const float lBound, const float uBound, const float currMedian) -> float + { + const auto N = sizes[pixelIndex]; + if (N == 0) + return 0.0f; + + const T lowerBound = static_cast(lBound); + const T upperBound = static_cast(uBound); + const T currentMedian = static_cast(currMedian); + T* const pData = medianData.data() + pixelIndex * nrLightframes; + + if constexpr (std::is_same::value) + { + for (int n = 0; n < N / 8; ++n) + { + const __m128i v = _mm_loadu_si128(((const __m128i*)pData) + n); + const __m256i v32 = _mm256_cvtepu16_epi32(v); + const __m256i outOfRange = _mm256_or_si256(_mm256_cmpgt_epi32(_mm256_set1_epi32(lowerBound), v32), _mm256_cmpgt_epi32(v32, _mm256_set1_epi32(upperBound))); + const __m256i vCorrected = _mm256_blendv_epi8(v32, _mm256_set1_epi32(currentMedian), outOfRange); + _mm_storeu_si128(((__m128i*)pData) + n, AvxSupport::cvtEpi32Epu16(vCorrected)); + } + for (int n = (N / 8) * 8; n < N; ++n) + if (pData[n] < lowerBound || pData[n] > upperBound) + pData[n] = currentMedian; + } + else // uint32, float + { + for (int n = 0; n < N; ++n) // All lightframes != 0 + if (pData[n] < lowerBound || pData[n] > upperBound) + pData[n] = currentMedian; + } + + return static_cast(qMedian(pData, N, N / 2)); + }; + + const auto vectorMedian = [&quickMedian](__m256& loMedian, __m256& hiMedian, const __m256 loLoBound, const __m256 hiLoBound, const __m256 loHiBound, const __m256 hiHiBound) -> void + { + for (size_t n = 0; n < 8; ++n) + loMedian.m256_f32[n] = quickMedian(n, loLoBound.m256_f32[n], loHiBound.m256_f32[n], loMedian.m256_f32[n]); + for (size_t n = 0; n < 8; ++n) + hiMedian.m256_f32[n] = quickMedian(n + 8, hiLoBound.m256_f32[n], hiHiBound.m256_f32[n], hiMedian.m256_f32[n]); + }; + + + // ************* Updater for the data sequences ************* + + const auto zeroVectorUpdater = [](__m256& lo8, __m256& hi8, __m256& loN, __m256& hiN, const __m256 loOutOfRangeMask, const __m256 hiOutOfRangeMask) -> void + { + lo8 = _mm256_andnot_ps(loOutOfRangeMask, lo8); // Set to zero where value is outside my +- kappa * sigma. + hi8 = _mm256_andnot_ps(hiOutOfRangeMask, hi8); + loN = _mm256_add_ps(loN, _mm256_and_ps(_mm256_set1_ps(1.0f), _mm256_cmp_ps(lo8, _mm256_setzero_ps(), 12))); // if (value != 0) ++N; (12 means: not equal) + hiN = _mm256_add_ps(hiN, _mm256_and_ps(_mm256_set1_ps(1.0f), _mm256_cmp_ps(hi8, _mm256_setzero_ps(), 12))); + }; + const auto zeroScalarUpdater = [](float& value, float& N, const bool outOfRange) -> void + { + if (value == 0.0f) + return; + if (outOfRange) + value = 0.0f; + else + ++N; // If value != 0 -> ++N + }; +/* const auto medianVectorUpdater = [](__m256& lo8, __m256& hi8, __m256& loN, __m256& hiN, const __m256 loMedian, const __m256 hiMedian, const __m256 loOutOfRangeMask, const __m256 hiOutOfRangeMask) -> void + { + const __m256 m0 = _mm256_cmp_ps(lo8, _mm256_setzero_ps(), 12); // != 0 + const __m256 m1 = _mm256_cmp_ps(hi8, _mm256_setzero_ps(), 12); + lo8 = _mm256_and_ps(_mm256_blendv_ps(lo8, loMedian, loOutOfRangeMask), m0); // Set to median where value is out of range (outside my +- kappa * sigma). + hi8 = _mm256_and_ps(_mm256_blendv_ps(hi8, hiMedian, hiOutOfRangeMask), m1); // If value was zero -> reset it to zero again. + loN = _mm256_add_ps(loN, _mm256_and_ps(_mm256_set1_ps(1.0f), m0)); // if (value != 0) ++N; + hiN = _mm256_add_ps(hiN, _mm256_and_ps(_mm256_set1_ps(1.0f), m1)); + }; + const auto medianScalarUpdater = [](float& value, float& N, const float median, const bool outOfRange) -> void + { + if (value == 0.0f) + return; + if (outOfRange) + value = median; + ++N; + }; +*/ + + constexpr const auto initialUpperBound = []() -> float + { + if constexpr (std::is_floating_point_v) + return static_cast(std::numeric_limits::max()); + else + return static_cast(std::numeric_limits::max()); // We use 65535 for all integers + }; + + // ************* Loops ************* + + const auto kappaSigmaLoop = [&](float* pOut, const int colorOffset) -> void + { + const size_t outputWidth = outputBitmap.Width(); + + for (int counter = 0; counter < nrVectors; ++counter, pOut += 16) + { + __m256 lowerBound1{ _mm256_setzero_ps() }; + __m256 lowerBound2{ _mm256_setzero_ps() }; + __m256 upperBound1{ _mm256_set1_ps(initialUpperBound()) }; + __m256 upperBound2{ _mm256_set1_ps(initialUpperBound()) }; + __m256 my1{ _mm256_undefined_ps() }; + __m256 my2{ _mm256_undefined_ps() }; + __m256 loMedian = _mm256_undefined_ps(); + __m256 hiMedian = _mm256_undefined_ps(); + + if constexpr (Method == MedianKappaSigma) + initMedianData(counter * size_t{ 16 } + colorOffset, 16); + + for (int iteration = 0; iteration < std::get<1>(parameters); ++iteration) + { + __m256 sum1 = _mm256_setzero_ps(); + __m256 sum2 = _mm256_setzero_ps(); + __m256 N1 = _mm256_setzero_ps(); + __m256 N2 = _mm256_setzero_ps(); + __m256d sumSq1 = _mm256_setzero_pd(); + __m256d sumSq2 = _mm256_setzero_pd(); + __m256d sumSq3 = _mm256_setzero_pd(); + __m256d sumSq4 = _mm256_setzero_pd(); + + if constexpr (Method == MedianKappaSigma) + { + vectorMedian(loMedian, hiMedian, lowerBound1, lowerBound2, upperBound1, upperBound2); + N1 = _mm256_cvtepi32_ps(_mm256_loadu_si256((const __m256i*)&sizes[0])); + N2 = _mm256_cvtepi32_ps(_mm256_loadu_si256((const __m256i*)&sizes[8])); + } + + // Loop over the light frames +// for (auto frameAddress : lineAddresses) + for (int lightFrame = 0; lightFrame < nrLightframes; ++lightFrame) + { + __m256 lo8, hi8; + if constexpr (Method == MedianKappaSigma) + { + const T* const pColor = medianData.data() + lightFrame; + const auto [l, h] = AvxSupport::read16PackedSingleStride(pColor, nrLightframes); + lo8 = l; + hi8 = h; + } + if constexpr (Method == KappaSigma) + { + const T* const pColor = static_cast(lineAddresses[lightFrame]) + counter * 16ULL + colorOffset; + const auto [l, h] = AvxSupport::read16PackedSingle(pColor); + lo8 = l; + hi8 = h; + const __m256 outOfRange1 = _mm256_or_ps(_mm256_cmp_ps(lo8, lowerBound1, 17), _mm256_cmp_ps(lo8, upperBound1, 30)); // 17: _CMP_LT_OQ, 30: _CMP_GT_OQ (x < lo OR x > hi) + const __m256 outOfRange2 = _mm256_or_ps(_mm256_cmp_ps(hi8, lowerBound2, 17), _mm256_cmp_ps(hi8, upperBound2, 30)); + zeroVectorUpdater(lo8, hi8, N1, N2, outOfRange1, outOfRange2); + } + + sum1 = _mm256_add_ps(sum1, lo8); + sum2 = _mm256_add_ps(sum2, hi8); + sumSq1 = accumulateSquared(sumSq1, _mm256_castps256_ps128(lo8)); + sumSq2 = accumulateSquared(sumSq2, _mm256_extractf128_ps(lo8, 1)); + sumSq3 = accumulateSquared(sumSq3, _mm256_castps256_ps128(hi8)); + sumSq4 = accumulateSquared(sumSq4, _mm256_extractf128_ps(hi8, 1)); + } + // Calc the new averages + const __m256 noValuesMask1 = _mm256_cmp_ps(N1, _mm256_setzero_ps(), 0); + const __m256 noValuesMask2 = _mm256_cmp_ps(N2, _mm256_setzero_ps(), 0); + my1 = _mm256_blendv_ps(_mm256_div_ps(sum1, N1), _mm256_setzero_ps(), noValuesMask1); // Low 8 floats. Set 0 where N==0. + my2 = _mm256_blendv_ps(_mm256_div_ps(sum2, N2), _mm256_setzero_ps(), noValuesMask2); // Hi 8 floats. Set 0 where N==0. + // Update lower and upper bound with new � +- kappa * sigma + const __m256 sigma1 = sigma(sum1, sumSq1, sumSq2, N1); + const __m256 sigma2 = sigma(sum2, sumSq3, sumSq4, N2); + upperBound1 = _mm256_blendv_ps(_mm256_fmadd_ps(sigma1, kappa, my1), _mm256_setzero_ps(), noValuesMask1); // Set 0 where N==0. + upperBound2 = _mm256_blendv_ps(_mm256_fmadd_ps(sigma2, kappa, my2), _mm256_setzero_ps(), noValuesMask2); + lowerBound1 = _mm256_blendv_ps(_mm256_fnmadd_ps(sigma1, kappa, my1), _mm256_set1_ps(1.0f), noValuesMask1); // Set 1 where N==0. + lowerBound2 = _mm256_blendv_ps(_mm256_fnmadd_ps(sigma2, kappa, my2), _mm256_set1_ps(1.0f), noValuesMask2); + } + _mm256_storeu_ps(pOut + static_cast(line) * outputWidth, my1); + _mm256_storeu_ps(pOut + 8 + static_cast(line) * outputWidth, my2); + } + // Rest of line + for (int n = nrVectors * 16; n < width; ++n, ++pOut) + { + float lowerBound = 1.0f; + float upperBound = static_cast(std::numeric_limits::max()); + float my = 0.0f; +#pragma warning (suppress: 4189) + float median = 0.0f; + + if constexpr (Method == MedianKappaSigma) + initMedianData(n + colorOffset, 1); // 1 = only 1 pixel. + + for (int iteration = 0; iteration < std::get<1>(parameters); ++iteration) + { + float sum{ 0.0f }; + float N{ 0.0f }; + float sumSq{ 0.0f }; + + if constexpr (Method == MedianKappaSigma) + { + median = quickMedian(0, lowerBound, upperBound, median); + N = static_cast(sizes[0]); + } + + for (int lightFrame = 0; lightFrame < nrLightframes; ++lightFrame) + { + float colorValue; + if constexpr (Method == MedianKappaSigma) + { + const T* const pColor = medianData.data() + lightFrame; + colorValue = *pColor; + } + if constexpr (Method == KappaSigma) + { + const T* const pColor = static_cast(lineAddresses[lightFrame]) + n + colorOffset; + colorValue = convertToFloat(*pColor); + zeroScalarUpdater(colorValue, N, colorValue < lowerBound || colorValue > upperBound); + } + + sum += colorValue; + sumSq += colorValue * colorValue; + } + my = (N == 0.0f ? 0.0f : (sum / N)); + if (N == 0.0f || N == static_cast(nrLightframes)) + break; + const float sig = sqrtf(sumSq / N - my * my); + const float sigmakappa = sig * static_cast(std::get<0>(parameters)); + lowerBound = my - sigmakappa; + upperBound = my + sigmakappa; + } + *(pOut + static_cast(line) * outputWidth) = my; + } + }; + + const auto medianLoop = [&](float* pOut, const int colorOffset) -> void + { + const size_t outputWidth = outputBitmap.Width(); + const float uBound = initialUpperBound(); + + for (int counter = 0; counter < nrVectors; ++counter, pOut += 16) + { + __m256 loMedian = _mm256_setzero_ps(); + __m256 hiMedian = _mm256_setzero_ps(); + + initMedianData(counter * size_t{ 16 } + colorOffset, 16); + vectorMedian(loMedian, hiMedian, _mm256_set1_ps(0.0f), _mm256_set1_ps(0.0f), _mm256_set1_ps(uBound), _mm256_set1_ps(uBound)); + _mm256_storeu_ps(pOut + static_cast(line) * outputWidth, loMedian); + _mm256_storeu_ps(pOut + 8 + static_cast(line) * outputWidth, hiMedian); + } + // Rest of line + for (int n = nrVectors * 16; n < width; ++n, ++pOut) + { + initMedianData(n + colorOffset, 1); + const float median = quickMedian(0, 0.0f, uBound, 0.0f); + *(pOut + static_cast(line) * outputWidth) = median; + } + }; + + const auto methodSelectorLoop = [&](float* pOut, const int colorOffset) -> void + { + if constexpr (Method == MedianOnly) + medianLoop(pOut, colorOffset); + else + kappaSigmaLoop(pOut, colorOffset); + }; + + if (auto pOutputBitmap{ dynamic_cast(&outputBitmap) }) + { + methodSelectorLoop(&*pOutputBitmap->m_Red.m_vPixels.begin(), 0); + methodSelectorLoop(&*pOutputBitmap->m_Green.m_vPixels.begin(), width); + methodSelectorLoop(&*pOutputBitmap->m_Blue.m_vPixels.begin(), width * 2); + return 0; + } + if (auto pOutputBitmap{ dynamic_cast(&outputBitmap) }) + { + methodSelectorLoop(&*pOutputBitmap->m_vPixels.begin(), 0); + return 0; + } + + // Neither gray (1 float) nor color (3 floats). + return 1; +} + + +int AvxOutputComposition::processAutoAdaptiveWeightedAverage(const int line, std::vector const& lineAddresses) +{ + if (doProcessAutoAdaptiveWeightedAverage(line, lineAddresses) == 0) + return 0; + if (doProcessAutoAdaptiveWeightedAverage(line, lineAddresses) == 0) + return 0; + if (doProcessAutoAdaptiveWeightedAverage(line, lineAddresses) == 0) + return 0; + return 1; +} + + +template +int AvxOutputComposition::doProcessAutoAdaptiveWeightedAverage(const int line, std::vector const& lineAddresses) +{ + // CMultiBitmap - template: Input must be of type T, and output type must be float. + if (bitmapColorOrGray(inputBitmap) == false) + return 1; + + const int nIterations = std::get<1>(inputBitmap.GetProcessingParameters()); + const int width = outputBitmap.RealWidth(); + const int nrVectors = width / 16; + const __m256 N = _mm256_set1_ps(static_cast(lineAddresses.size())); + const size_t outputWidth = outputBitmap.Width(); + + const auto autoAdaptLoop = [line, &lineAddresses, nIterations, width, nrVectors, N, outputWidth](float* pOut, const int colorOffset) -> void + { + // Loop over the pixels of the row, process 16 at a time. + for (int counter = 0; counter < nrVectors; ++counter, pOut += 16) + { + __m256 my1 = _mm256_setzero_ps(); + __m256 my2 = _mm256_setzero_ps(); + + // Calculate initial (unweighted) mean. + for (auto frameAddress : lineAddresses) + { + const T *const pColor = static_cast(frameAddress) + counter * 16ULL + colorOffset; + const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); + my1 = _mm256_add_ps(my1, lo8); + my2 = _mm256_add_ps(my2, hi8); + } + my1 = _mm256_div_ps(my1, N); // N != 0 guaranteed + my2 = _mm256_div_ps(my2, N); + + for (int iteration = 0; iteration < nIterations; ++iteration) + { + __m256 S1 = _mm256_setzero_ps(); + __m256 S2 = _mm256_setzero_ps(); + + // Calculate sigma� related to � of last iteration. + for (auto frameAddress : lineAddresses) + { + const T *const pColor = static_cast(frameAddress) + counter * 16ULL + colorOffset; + const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); + const __m256 d1 = _mm256_sub_ps(lo8, my1); + const __m256 d2 = _mm256_sub_ps(hi8, my2); + S1 = _mm256_fmadd_ps(d1, d1, S1); // Sum of (x-�)� + S2 = _mm256_fmadd_ps(d2, d2, S2); + } + const __m256 sigmaSq1 = _mm256_div_ps(S1, N); // sigma� = sum(x-�)� / N + const __m256 sigmaSq2 = _mm256_div_ps(S2, N); + + // Calculate new � using current sigma�. + __m256 W1 = _mm256_setzero_ps(); + __m256 W2 = _mm256_setzero_ps(); + S1 = _mm256_setzero_ps(); + S2 = _mm256_setzero_ps(); + for (auto frameAddress : lineAddresses) + { + const T *const pColor = static_cast(frameAddress) + counter * 16ULL + colorOffset; + const auto [lo8, hi8] = AvxSupport::read16PackedSingle(pColor); + const __m256 d1 = _mm256_sub_ps(lo8, my1); // x-� + const __m256 d2 = _mm256_sub_ps(hi8, my2); + const __m256 denominator1 = _mm256_fmadd_ps(d1, d1, sigmaSq1); // sigma� + (x-�)� + const __m256 denominator2 = _mm256_fmadd_ps(d2, d2, sigmaSq2); + const __m256 weight1 = _mm256_blendv_ps(_mm256_div_ps(sigmaSq1, denominator1), _mm256_set1_ps(1.0f), _mm256_cmp_ps(denominator1, _mm256_setzero_ps(), 0)); // sigma� / (sigma� + (x-�)�) = 1 / (1 + (x-�)�/sigma�) + const __m256 weight2 = _mm256_blendv_ps(_mm256_div_ps(sigmaSq2, denominator2), _mm256_set1_ps(1.0f), _mm256_cmp_ps(denominator2, _mm256_setzero_ps(), 0)); // Set weight to 1 when sigma==0. + W1 = _mm256_add_ps(W1, weight1); // W = sum(weights) + W2 = _mm256_add_ps(W2, weight2); + S1 = _mm256_fmadd_ps(lo8, weight1, S1); // S = sum(x * weight) + S2 = _mm256_fmadd_ps(hi8, weight2, S2); + } + + my1 = _mm256_div_ps(S1, W1); // W == 0 (sum of weights) cannot happen. + my2 = _mm256_div_ps(S2, W2); + } + _mm256_storeu_ps(pOut + static_cast(line) * outputWidth, my1); + _mm256_storeu_ps(pOut + 8 + static_cast(line) * outputWidth, my2); + } + + // Rest of line + const float nLineAddresses = static_cast(lineAddresses.size()); + for (int n = nrVectors * 16; n < width; ++n, ++pOut) + { + float my = 0.0f; + // Calculate initial (unweighted) mean. + for (auto frameAddress : lineAddresses) + { + const T *const pColor = static_cast(frameAddress) + n + colorOffset; + my += convertToFloat(*pColor); + } + my /= nLineAddresses; + + for (int iteration = 0; iteration < nIterations; ++iteration) + { + float S{ 0.0f }; + + // Calculate sigma� related to � of last iteration. + for (auto frameAddress : lineAddresses) + { + const T *const pColor = static_cast(frameAddress) + n + colorOffset; + const float d = convertToFloat(*pColor) - my; + S += (d * d); + } + const float sigmaSq = S / nLineAddresses; + + // Calculate new � using current sigma�. + float W{ 0.0f }; + S = 0.0f; + for (auto frameAddress : lineAddresses) + { + const T *const pColor = static_cast(frameAddress) + n + colorOffset; + const float color = convertToFloat(*pColor); + const float d = color - my; + const float denominator = sigmaSq + d * d; + const float w = denominator == 0.0f ? 1.0f : (sigmaSq / denominator); + W += w; + S += color * w; + } + my = S / W; // W cannot be zero. + } + + *(pOut + static_cast(line) * outputWidth) = my; + } + }; + + if (auto pOutputBitmap{ dynamic_cast(&outputBitmap) }) + { + autoAdaptLoop(&*pOutputBitmap->m_Red.m_vPixels.begin(), 0); + autoAdaptLoop(&*pOutputBitmap->m_Green.m_vPixels.begin(), width); + autoAdaptLoop(&*pOutputBitmap->m_Blue.m_vPixels.begin(), width * 2); + return 0; + } + if (auto pOutputBitmap{ dynamic_cast(&outputBitmap) }) + { + autoAdaptLoop(&*pOutputBitmap->m_vPixels.begin(), 0); + return 0; + } + + // Neither gray (1 float) nor color (3 floats). + return 1; +} + +#pragma warning( pop ) diff --git a/DeepSkyStacker/avx_output.h b/DeepSkyStackerKernel/avx_output.h similarity index 96% rename from DeepSkyStacker/avx_output.h rename to DeepSkyStackerKernel/avx_output.h index 59a6d39a..415d0a3f 100644 --- a/DeepSkyStacker/avx_output.h +++ b/DeepSkyStackerKernel/avx_output.h @@ -1,42 +1,42 @@ -#pragma once - -#include "BitmapBase.h" - -class CMultiBitmap; -class AvxOutputComposition -{ -private: - CMultiBitmap& inputBitmap; - CMemoryBitmap& outputBitmap; - bool avxReady; -public: - AvxOutputComposition() = delete; - AvxOutputComposition(CMultiBitmap& mBitmap, CMemoryBitmap& outputbm); - AvxOutputComposition(const AvxOutputComposition&) = default; - AvxOutputComposition(AvxOutputComposition&&) = delete; - AvxOutputComposition& operator=(const AvxOutputComposition&) = delete; - - int compose(const int line, std::vector const& lineAddresses); -private: - enum MethodSelection - { - KappaSigma = 0, - MedianKappaSigma = 1, - MedianOnly = 2 - }; - template - static bool bitmapColorOrGray(const CMultiBitmap& bitmap) noexcept; - - template - static float convertToFloat(const T value) noexcept; - - template - int processMedianKappaSigma(const int line, std::vector const& lineAddresses); - - template - int doProcessMedianKappaSigma(const int line, std::vector const& lineAddresses); - - int processAutoAdaptiveWeightedAverage(const int line, std::vector const& lineAddresses); - template - int doProcessAutoAdaptiveWeightedAverage(const int line, std::vector const& lineAddresses); -}; +#pragma once + +#include "BitmapBase.h" + +class CMultiBitmap; +class AvxOutputComposition +{ +private: + CMultiBitmap& inputBitmap; + CMemoryBitmap& outputBitmap; + bool avxReady; +public: + AvxOutputComposition() = delete; + AvxOutputComposition(CMultiBitmap& mBitmap, CMemoryBitmap& outputbm); + AvxOutputComposition(const AvxOutputComposition&) = default; + AvxOutputComposition(AvxOutputComposition&&) = delete; + AvxOutputComposition& operator=(const AvxOutputComposition&) = delete; + + int compose(const int line, std::vector const& lineAddresses); +private: + enum MethodSelection + { + KappaSigma = 0, + MedianKappaSigma = 1, + MedianOnly = 2 + }; + template + static bool bitmapColorOrGray(const CMultiBitmap& bitmap) noexcept; + + template + static float convertToFloat(const T value) noexcept; + + template + int processMedianKappaSigma(const int line, std::vector const& lineAddresses); + + template + int doProcessMedianKappaSigma(const int line, std::vector const& lineAddresses); + + int processAutoAdaptiveWeightedAverage(const int line, std::vector const& lineAddresses); + template + int doProcessAutoAdaptiveWeightedAverage(const int line, std::vector const& lineAddresses); +}; diff --git a/DeepSkyStacker/avx_support.cpp b/DeepSkyStackerKernel/avx_support.cpp similarity index 96% rename from DeepSkyStacker/avx_support.cpp rename to DeepSkyStackerKernel/avx_support.cpp index 7942fe95..14355338 100644 --- a/DeepSkyStacker/avx_support.cpp +++ b/DeepSkyStackerKernel/avx_support.cpp @@ -1,220 +1,220 @@ -#include "stdafx.h" -#include "avx_support.h" -#include "BitmapCharacteristics.h" -#include "Multitask.h" -#include "Ztrace.h" - - - -AvxSupport::AvxSupport(CMemoryBitmap& b) noexcept : - bitmap{ b } -{}; - -int AvxSupport::getNrChannels() const -{ - CBitmapCharacteristics bitmapCharacteristics; - const_cast(bitmap).GetCharacteristics(bitmapCharacteristics); - return bitmapCharacteristics.m_lNrChannels; -}; - -bool AvxSupport::isColorBitmap() const -{ - return getNrChannels() == 3; -}; - -template -bool AvxSupport::isColorBitmapOfType() const -{ - auto* const p = const_cast(this)->getColorPtr(); - const bool isColor = p != nullptr && p->isTopDown(); - if constexpr (std::is_same::value) - return isColor && p->IsFloat() && p->GetMultiplier() == 256.0; - else - return isColor; -} - -bool AvxSupport::isMonochromeBitmap() const -{ - return getNrChannels() == 1; -}; - -template -bool AvxSupport::isMonochromeBitmapOfType() const -{ - if (auto* const p = const_cast(this)->getGrayPtr()) - { - // Note that Monochrome bitmaps are always topdown -> no extra check required! CF. CGrayBitmap::GetOffset(). - if constexpr (std::is_same_v) - return (p->IsFloat() && !p->IsCFA() && p->GetMultiplier() == 256.0); - else if constexpr (std::is_same_v) - return (!p->IsCFA() || isMonochromeCfaBitmapOfType()); - else - return !p->IsCFA(); - } - else - return false; -} - -template -bool AvxSupport::isMonochromeCfaBitmapOfType() const -{ - // CFA only supported for T=16 bits unsigned - if constexpr (std::is_same::value) - { - auto* const pGray = const_cast(this)->getGrayPtr(); - // We support CFA only for RGGB Bayer matrices with BILINEAR interpolation and no offsets. - return (pGray != nullptr && pGray->IsCFA() && pGray->GetCFATransformation() == CFAT_BILINEAR && pGray->xOffset() == 0 && pGray->yOffset() == 0 - && (pGray->GetCFAType() == CFATYPE_RGGB || pGray->GetCFAType() == CFATYPE_GBRG)); - } - else - return false; -}; - -bool AvxSupport::isColorBitmapOrCfa() const -{ - return isColorBitmap() || isMonochromeCfaBitmapOfType(); -} - -CFATYPE AvxSupport::getCfaType() const -{ - if (auto* pGray = const_cast(this)->getGrayPtr()) // GetCFAType is a non-const funtion :-( - return pGray->GetCFAType(); - else - return CFATYPE_NONE; -} - -const int AvxSupport::width() const { - return bitmap.Width(); -} - -template -bool AvxSupport::bitmapHasCorrectType() const -{ - return (isColorBitmapOfType() || isMonochromeBitmapOfType()); -} - -bool AvxSupport::checkAvx2CpuSupport() -{ -#if defined(_WINDOWS) - SYSTEM_INFO info; - GetNativeSystemInfo(&info); - if (info.wProcessorArchitecture != PROCESSOR_ARCHITECTURE_AMD64) // AVX instructions can only be supported on x64 CPUs. - return false; - - int cpuid[4] = { -1 }; - - __cpuidex(cpuid, 1, 0); - const bool FMAsupported = ((cpuid[2] & (1 << 12)) != 0); - const bool XSAVEsupported = ((cpuid[2] & (1 << 26)) != 0); - const bool OSXSAVEsupported = ((cpuid[2] & (1 << 27)) != 0); - - __cpuidex(cpuid, 7, 0); - const bool AVX2supported = ((cpuid[1] & (1 << 5)) != 0); - //const bool BMI1supported = ((cpuid[1] & (1 << 3) != 0); - //const bool BMI2supported = ((cpuid[1] & (1 << 8)) != 0); - - const bool RequiredCpuFlags = FMAsupported && AVX2supported && XSAVEsupported && OSXSAVEsupported; - - // OS supports AVX (YMM registers) - Note: XGETBV may only be executed on CPUs with XSAVE flag set. - const bool AVXenabledOS = RequiredCpuFlags ? ((_xgetbv(0) & 6) == 6) : false; // 6 = SSE (0x2) + YMM (0x4) - - // Additionally set flush to zero and denormals to zero - Note: (S)GETCSR are SSE instructions, so supported by all x64 CPUs. - _mm_setcsr(_mm_getcsr() | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON); - - return (RequiredCpuFlags && AVXenabledOS); -#else - return false; -#endif -}; - -bool AvxSupport::checkSimdAvailability() -{ - // If user has disabled SIMD vectorisation (settings dialog) -> return false; - return CMultitask::GetUseSimd() && checkAvx2CpuSupport(); -} - -void AvxSupport::reportCpuType() -{ -#if defined(_WINDOWS) - char architecture[8]{ 0x00 }; - SYSTEM_INFO info; - - const auto getArchitectureString = [&architecture](const auto architectureId) -> void - { - constexpr auto maxSize = sizeof(architecture); - switch (architectureId) - { - case PROCESSOR_ARCHITECTURE_INTEL: - strcpy_s(architecture, maxSize, "x86"); - break; - case PROCESSOR_ARCHITECTURE_ARM: - strcpy_s(architecture, maxSize, "ARM"); - break; - case PROCESSOR_ARCHITECTURE_IA64: - strcpy_s(architecture, maxSize, "IA64"); - break; - case PROCESSOR_ARCHITECTURE_AMD64: - strcpy_s(architecture, maxSize, "x64"); - break; - case PROCESSOR_ARCHITECTURE_ARM64: - strcpy_s(architecture, maxSize, "ARM64"); - break; - default: - strcpy_s(architecture, maxSize, "Unknown"); - } - }; - - GetNativeSystemInfo(&info); - const auto nativeArchitecture = info.wProcessorArchitecture; - getArchitectureString(nativeArchitecture); - - ZTRACE_RUNTIME("Native processor architecture: %s", architecture); - std::cerr << "Native processor architecture: " << architecture << std::endl; - - GetSystemInfo(&info); - if (info.wProcessorArchitecture != nativeArchitecture) - { - getArchitectureString(info.wProcessorArchitecture); - ZTRACE_RUNTIME("Emulated processor architecture: %s", architecture); - std::cerr << "Emulated processor architecture: " << architecture << std::endl; - } -#endif - int cpuid[4] = { -1 }; - __cpuid(cpuid, 0x80000000); - const int nExtIds = cpuid[0]; - char brand[64] = { '\0' }; - if (nExtIds >= 0x80000004) - { - __cpuidex(cpuid, 0x80000002, 0); - memcpy(brand, cpuid, sizeof(cpuid)); - __cpuidex(cpuid, 0x80000003, 0); - memcpy(brand + 16, cpuid, sizeof(cpuid)); - __cpuidex(cpuid, 0x80000004, 0); - memcpy(brand + 32, cpuid, sizeof(cpuid)); - } - else - memcpy(brand, "CPU brand not detected", 22); - - // - // Also report this on stderr so if we get a SIGILL the information - // will be there along with the exception traceback. - // - std::cerr << "CPU Type: " << brand << std::endl; - ZTRACE_RUNTIME("CPU type: %s", brand); -} - -void reportCpuType() -{ - AvxSupport::reportCpuType(); -} - -// Explicit template instantiation for the types we need. -template bool AvxSupport::bitmapHasCorrectType() const; -template bool AvxSupport::bitmapHasCorrectType() const; -template bool AvxSupport::bitmapHasCorrectType() const; -template bool AvxSupport::bitmapHasCorrectType() const; - -template bool AvxSupport::isMonochromeCfaBitmapOfType() const; -template bool AvxSupport::isMonochromeCfaBitmapOfType() const; -template bool AvxSupport::isMonochromeCfaBitmapOfType() const; -template bool AvxSupport::isMonochromeCfaBitmapOfType() const; +#include "stdafx.h" +#include "avx_support.h" +#include "BitmapCharacteristics.h" +#include "Multitask.h" +#include "Ztrace.h" + + + +AvxSupport::AvxSupport(CMemoryBitmap& b) noexcept : + bitmap{ b } +{}; + +int AvxSupport::getNrChannels() const +{ + CBitmapCharacteristics bitmapCharacteristics; + const_cast(bitmap).GetCharacteristics(bitmapCharacteristics); + return bitmapCharacteristics.m_lNrChannels; +}; + +bool AvxSupport::isColorBitmap() const +{ + return getNrChannels() == 3; +}; + +template +bool AvxSupport::isColorBitmapOfType() const +{ + auto* const p = const_cast(this)->getColorPtr(); + const bool isColor = p != nullptr && p->isTopDown(); + if constexpr (std::is_same::value) + return isColor && p->IsFloat() && p->GetMultiplier() == 256.0; + else + return isColor; +} + +bool AvxSupport::isMonochromeBitmap() const +{ + return getNrChannels() == 1; +}; + +template +bool AvxSupport::isMonochromeBitmapOfType() const +{ + if (auto* const p = const_cast(this)->getGrayPtr()) + { + // Note that Monochrome bitmaps are always topdown -> no extra check required! CF. CGrayBitmap::GetOffset(). + if constexpr (std::is_same_v) + return (p->IsFloat() && !p->IsCFA() && p->GetMultiplier() == 256.0); + else if constexpr (std::is_same_v) + return (!p->IsCFA() || isMonochromeCfaBitmapOfType()); + else + return !p->IsCFA(); + } + else + return false; +} + +template +bool AvxSupport::isMonochromeCfaBitmapOfType() const +{ + // CFA only supported for T=16 bits unsigned + if constexpr (std::is_same::value) + { + auto* const pGray = const_cast(this)->getGrayPtr(); + // We support CFA only for RGGB Bayer matrices with BILINEAR interpolation and no offsets. + return (pGray != nullptr && pGray->IsCFA() && pGray->GetCFATransformation() == CFAT_BILINEAR && pGray->xOffset() == 0 && pGray->yOffset() == 0 + && (pGray->GetCFAType() == CFATYPE_RGGB || pGray->GetCFAType() == CFATYPE_GBRG)); + } + else + return false; +}; + +bool AvxSupport::isColorBitmapOrCfa() const +{ + return isColorBitmap() || isMonochromeCfaBitmapOfType(); +} + +CFATYPE AvxSupport::getCfaType() const +{ + if (auto* pGray = const_cast(this)->getGrayPtr()) // GetCFAType is a non-const funtion :-( + return pGray->GetCFAType(); + else + return CFATYPE_NONE; +} + +const int AvxSupport::width() const { + return bitmap.Width(); +} + +template +bool AvxSupport::bitmapHasCorrectType() const +{ + return (isColorBitmapOfType() || isMonochromeBitmapOfType()); +} + +bool AvxSupport::checkAvx2CpuSupport() +{ +#if defined(_WINDOWS) + SYSTEM_INFO info; + GetNativeSystemInfo(&info); + if (info.wProcessorArchitecture != PROCESSOR_ARCHITECTURE_AMD64) // AVX instructions can only be supported on x64 CPUs. + return false; + + int cpuid[4] = { -1 }; + + __cpuidex(cpuid, 1, 0); + const bool FMAsupported = ((cpuid[2] & (1 << 12)) != 0); + const bool XSAVEsupported = ((cpuid[2] & (1 << 26)) != 0); + const bool OSXSAVEsupported = ((cpuid[2] & (1 << 27)) != 0); + + __cpuidex(cpuid, 7, 0); + const bool AVX2supported = ((cpuid[1] & (1 << 5)) != 0); + //const bool BMI1supported = ((cpuid[1] & (1 << 3) != 0); + //const bool BMI2supported = ((cpuid[1] & (1 << 8)) != 0); + + const bool RequiredCpuFlags = FMAsupported && AVX2supported && XSAVEsupported && OSXSAVEsupported; + + // OS supports AVX (YMM registers) - Note: XGETBV may only be executed on CPUs with XSAVE flag set. + const bool AVXenabledOS = RequiredCpuFlags ? ((_xgetbv(0) & 6) == 6) : false; // 6 = SSE (0x2) + YMM (0x4) + + // Additionally set flush to zero and denormals to zero - Note: (S)GETCSR are SSE instructions, so supported by all x64 CPUs. + _mm_setcsr(_mm_getcsr() | _MM_FLUSH_ZERO_ON | _MM_DENORMALS_ZERO_ON); + + return (RequiredCpuFlags && AVXenabledOS); +#else + return false; +#endif +}; + +bool AvxSupport::checkSimdAvailability() +{ + // If user has disabled SIMD vectorisation (settings dialog) -> return false; + return CMultitask::GetUseSimd() && checkAvx2CpuSupport(); +} + +void AvxSupport::reportCpuType() +{ +#if defined(_WINDOWS) + char architecture[8]{ 0x00 }; + SYSTEM_INFO info; + + const auto getArchitectureString = [&architecture](const auto architectureId) -> void + { + constexpr auto maxSize = sizeof(architecture); + switch (architectureId) + { + case PROCESSOR_ARCHITECTURE_INTEL: + strcpy_s(architecture, maxSize, "x86"); + break; + case PROCESSOR_ARCHITECTURE_ARM: + strcpy_s(architecture, maxSize, "ARM"); + break; + case PROCESSOR_ARCHITECTURE_IA64: + strcpy_s(architecture, maxSize, "IA64"); + break; + case PROCESSOR_ARCHITECTURE_AMD64: + strcpy_s(architecture, maxSize, "x64"); + break; + case PROCESSOR_ARCHITECTURE_ARM64: + strcpy_s(architecture, maxSize, "ARM64"); + break; + default: + strcpy_s(architecture, maxSize, "Unknown"); + } + }; + + GetNativeSystemInfo(&info); + const auto nativeArchitecture = info.wProcessorArchitecture; + getArchitectureString(nativeArchitecture); + + ZTRACE_RUNTIME("Native processor architecture: %s", architecture); + std::cerr << "Native processor architecture: " << architecture << std::endl; + + GetSystemInfo(&info); + if (info.wProcessorArchitecture != nativeArchitecture) + { + getArchitectureString(info.wProcessorArchitecture); + ZTRACE_RUNTIME("Emulated processor architecture: %s", architecture); + std::cerr << "Emulated processor architecture: " << architecture << std::endl; + } +#endif + int cpuid[4] = { -1 }; + __cpuid(cpuid, 0x80000000); + const int nExtIds = cpuid[0]; + char brand[64] = { '\0' }; + if (nExtIds >= 0x80000004) + { + __cpuidex(cpuid, 0x80000002, 0); + memcpy(brand, cpuid, sizeof(cpuid)); + __cpuidex(cpuid, 0x80000003, 0); + memcpy(brand + 16, cpuid, sizeof(cpuid)); + __cpuidex(cpuid, 0x80000004, 0); + memcpy(brand + 32, cpuid, sizeof(cpuid)); + } + else + memcpy(brand, "CPU brand not detected", 22); + + // + // Also report this on stderr so if we get a SIGILL the information + // will be there along with the exception traceback. + // + std::cerr << "CPU Type: " << brand << std::endl; + ZTRACE_RUNTIME("CPU type: %s", brand); +} + +void reportCpuType() +{ + AvxSupport::reportCpuType(); +} + +// Explicit template instantiation for the types we need. +template bool AvxSupport::bitmapHasCorrectType() const; +template bool AvxSupport::bitmapHasCorrectType() const; +template bool AvxSupport::bitmapHasCorrectType() const; +template bool AvxSupport::bitmapHasCorrectType() const; + +template bool AvxSupport::isMonochromeCfaBitmapOfType() const; +template bool AvxSupport::isMonochromeCfaBitmapOfType() const; +template bool AvxSupport::isMonochromeCfaBitmapOfType() const; +template bool AvxSupport::isMonochromeCfaBitmapOfType() const; diff --git a/DeepSkyStacker/avx_support.h b/DeepSkyStackerKernel/avx_support.h similarity index 97% rename from DeepSkyStacker/avx_support.h rename to DeepSkyStackerKernel/avx_support.h index 5af538ee..accd7755 100644 --- a/DeepSkyStacker/avx_support.h +++ b/DeepSkyStackerKernel/avx_support.h @@ -1,478 +1,478 @@ -#pragma once - -#include "cfa.h" -#include "ColorBitmap.h" -#include "GrayBitmap.h" - -class AvxSupport -{ -private: - // Unfortunately, we cannot use const here, because the member function are hardly never const declared. :-( - CMemoryBitmap& bitmap; - - template - auto* getColorPtr() { return dynamic_cast*>(&bitmap); } - template - auto* getGrayPtr() { return dynamic_cast*>(&bitmap); } - template - const auto* getColorPtr() const { return dynamic_cast*>(&bitmap); } - template - const auto* getGrayPtr() const { return dynamic_cast*>(&bitmap); } - - int getNrChannels() const; -public: - AvxSupport(CMemoryBitmap& b) noexcept; - - bool isColorBitmap() const; - template bool isColorBitmapOfType() const; - bool isMonochromeBitmap() const; - template bool isMonochromeBitmapOfType() const; - template bool isMonochromeCfaBitmapOfType() const; - bool isColorBitmapOrCfa() const; - - CFATYPE getCfaType() const; - - template - const std::vector& redPixels() const { return getColorPtr()->m_Red.m_vPixels; } - template - const std::vector& greenPixels() const { return getColorPtr()->m_Green.m_vPixels; } - template - const std::vector& bluePixels() const { return getColorPtr()->m_Blue.m_vPixels; } - template - const std::vector& grayPixels() const { return getGrayPtr()->m_vPixels; } - - template - std::vector& redPixels() { return getColorPtr()->m_Red.m_vPixels; } - template - std::vector& greenPixels() { return getColorPtr()->m_Green.m_vPixels; } - template - std::vector& bluePixels() { return getColorPtr()->m_Blue.m_vPixels; } - template - std::vector& grayPixels() { return getGrayPtr()->m_vPixels; } - - const int width() const; - - template - bool bitmapHasCorrectType() const; - - static bool checkAvx2CpuSupport(); - static bool checkSimdAvailability(); - static void reportCpuType(); - - template - inline static size_t numberOfAvxVectors(const size_t width) - { - static_assert(sizeof(ElementType) == 1 || sizeof(ElementType) == 2 || sizeof(ElementType) == 4 || sizeof(ElementType) == 8); - return width == 0 ? 0 : ((width - 1) * sizeof(ElementType)) / sizeof(VectorElementType) + 1; - } - - // When returning from AVX-code to non-AVX-code we should zero the upper 128 bits of all ymm registers. - // Otherwise old Intel CPUs could suffer from performance degradations. - template - inline static T zeroUpper(const T returnValue) - { - static_assert(std::is_integral::value); - _mm256_zeroupper(); - return returnValue; - } - - // SIMD functions - - inline static __m256 wordToPackedFloat(const __m128i x) noexcept - { - return _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(x)); - } - - inline static std::tuple<__m256d, __m256d, __m256d, __m256d> wordToPackedDouble(const __m256i x) noexcept - { - const __m256i i1 = _mm256_cvtepu16_epi32(_mm256_castsi256_si128(x)); - const __m256i i2 = _mm256_cvtepu16_epi32(_mm256_extracti128_si256(x, 1)); - return { - _mm256_cvtepi32_pd(_mm256_castsi256_si128(i1)), - _mm256_cvtepi32_pd(_mm256_extracti128_si256(i1, 1)), - _mm256_cvtepi32_pd(_mm256_castsi256_si128(i2)), - _mm256_cvtepi32_pd(_mm256_extracti128_si256(i2, 1)) - }; - } - - inline static __m256 cvtEpu32Ps(const __m256i x) noexcept - { - const __m256i mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), x); // 0 > x (= x < 0) - const __m256 ps = _mm256_cvtepi32_ps(x); - const __m256 corr = _mm256_add_ps(_mm256_set1_ps(static_cast(0x100000000ULL)), ps); // UINTMAX - x (Note: 'add_ps' is correct!) - return _mm256_blendv_ps(ps, corr, _mm256_castsi256_ps(mask)); // Take (UINTMAX - x) where x < 0 - } - - inline static std::tuple<__m256d, __m256d> cvtEpu32Pd(const __m256i x) noexcept - { - const __m256i mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), x); // 0 > x (= x < 0) - const __m256d d1 = _mm256_cvtepi32_pd(_mm256_castsi256_si128(x)); - const __m256d d2 = _mm256_cvtepi32_pd(_mm256_extracti128_si256(x, 1)); - const __m256d corr1 = _mm256_add_pd(_mm256_set1_pd(static_cast(0x100000000ULL)), d1); // UINTMAX - x (Note: 'add_pd' is correct!) - const __m256d corr2 = _mm256_add_pd(_mm256_set1_pd(static_cast(0x100000000ULL)), d2); - return { - _mm256_blendv_pd(d1, corr1, _mm256_cmp_pd(d1, _mm256_setzero_pd(), 17)), // 17: OP := _CMP_LT_OQ - _mm256_blendv_pd(d2, corr2, _mm256_cmp_pd(d2, _mm256_setzero_pd(), 17)) // Take (UINTMAX - x) where x < 0 - }; - } - - inline static std::tuple<__m256d, __m256d> cvtPsPd(const __m256 x) noexcept - { - return { - _mm256_cvtps_pd(_mm256_castps256_ps128(x)), - _mm256_cvtps_pd(_mm256_extractf128_ps(x, 1)) - }; - } - - inline static __m128i cvtEpi32Epu16(const __m256i epi32) noexcept - { - const __m256i epu16 = _mm256_packus_epi32(epi32, _mm256_castsi128_si256(_mm256_extracti128_si256(epi32, 1))); // (?, ?, ?, ?, a7, a6, a5, a4, a7, a6, a5, a4, a3, a2, a1, a0) - // Upper lane is now wrong and useless. - return _mm256_castsi256_si128(epu16); - } - - inline static __m128i cvtPsEpu16(const __m256 x) noexcept - { - const __m256i epi32 = _mm256_cvtps_epi32(x); - return cvtEpi32Epu16(epi32); - } - - inline static __m128i cvtTruncatePsEpu16(const __m256 x) noexcept - { - const __m256i epi32 = _mm256_cvttps_epi32(x); - return cvtEpi32Epu16(epi32); - } - - inline static __m256i cvtPsEpu32(const __m256 x) noexcept - { - // x >= INTMAX + 1 - const __m256 mask = _mm256_cmp_ps(x, _mm256_set1_ps(2147483648.0f), 29); // 29 = _CMP_GE_OQ (greater or equal, ordered, quiet) - const __m256 corr = _mm256_sub_ps(x, _mm256_set1_ps(4294967296.0f)); - return _mm256_cvttps_epi32(_mm256_blendv_ps(x, corr, mask)); - } - - inline static __m256i cmpGtEpu16(const __m256i a, const __m256i b) noexcept - { - const __m256i highBit = _mm256_set1_epi16(std::uint16_t{ 0x8000 }); - return _mm256_cmpgt_epi16(_mm256_xor_si256(a, highBit), _mm256_xor_si256(b, highBit)); - }; - - // Read color values from T* and return 2 x 8 packed single. - inline static std::tuple<__m256, __m256> read16PackedSingle(const std::uint16_t* const pColor) noexcept - { - const __m256i icolor = _mm256_loadu_si256((const __m256i*)pColor); - const __m256 lo8 = wordToPackedFloat(_mm256_castsi256_si128(icolor)); - const __m256 hi8 = wordToPackedFloat(_mm256_extracti128_si256(icolor, 1)); - return { lo8, hi8 }; - }; - inline static std::tuple<__m256, __m256> read16PackedSingle(const std::uint32_t* const pColor) noexcept - { - return { - _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_loadu_epi32(pColor), 16)), // Shift 16 bits right while shifting in zeros. - _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_loadu_epi32(pColor + 8), 16)) - }; - } - inline static std::tuple<__m256, __m256> read16PackedSingle(const float* const pColor) noexcept - { - return { _mm256_loadu_ps(pColor), _mm256_loadu_ps(pColor + 8) }; - } - - // Read 16 color values from T* with stride - inline static std::tuple<__m256, __m256> read16PackedSingleStride(const std::uint16_t* const pColor, const int stride) noexcept - { - const __m256i ndx = _mm256_mullo_epi32(_mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7), _mm256_set1_epi32(stride)); - const __m256i v1 = _mm256_i32gather_epi32((const int*)pColor, ndx, 2); - const __m256i v2 = _mm256_i32gather_epi32((const int*)pColor, _mm256_add_epi32(ndx, _mm256_set1_epi32(8 * stride)), 2); // 8, 9, 10, 11, 12, 13, 14, 15 - return { - _mm256_cvtepi32_ps(_mm256_blend_epi16(v1, _mm256_setzero_si256(), 0xaa)), - _mm256_cvtepi32_ps(_mm256_blend_epi16(v2, _mm256_setzero_si256(), 0xaa)) - }; - } - // Note: ***** DOES NOT SHIFT 16 BITS RIGHT! ***** - inline static std::tuple<__m256, __m256> read16PackedSingleStride(const std::uint32_t* const pColor, const int stride) noexcept - { - const __m256i ndx = _mm256_mullo_epi32(_mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7), _mm256_set1_epi32(stride)); - const __m256i v1 = _mm256_i32gather_epi32((const int*)pColor, ndx, 4); - const __m256i v2 = _mm256_i32gather_epi32((const int*)pColor, _mm256_add_epi32(ndx, _mm256_set1_epi32(8 * stride)), 4); - return { - _mm256_cvtepi32_ps(v1), - _mm256_cvtepi32_ps(v2) - }; - } - inline static std::tuple<__m256, __m256> read16PackedSingleStride(const float* const pColor, const int stride) noexcept - { - const __m256i ndx1 = _mm256_mullo_epi32(_mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7), _mm256_set1_epi32(stride)); - const __m256i ndx2 = _mm256_add_epi32(ndx1, _mm256_set1_epi32(8 * stride)); - return { _mm256_i32gather_ps(pColor, ndx1, 4), _mm256_i32gather_ps(pColor, ndx2, 4) }; - } - - inline static __m256i cvt2xEpi32Epu16(const __m256i lo, const __m256i hi) - { - return _mm256_packus_epi32(_mm256_permute2x128_si256(lo, hi, 0x20), _mm256_permute2x128_si256(lo, hi, 0x31)); - } - - // Read color values from T* and return 16 x packed short - inline static __m256i read16PackedShort(const std::uint16_t* const pColor) - { - return _mm256_loadu_epi16(pColor); - } - inline static __m256i read16PackedShort(const std::uint32_t* const pColor) - { - const __m256i lo = _mm256_srli_epi32(_mm256_loadu_epi32(pColor), 16); // Shift 16 bits right while shifting in zeros. - const __m256i hi = _mm256_srli_epi32(_mm256_loadu_epi32(pColor + 8), 16); - return cvt2xEpi32Epu16(lo, hi); - } - inline static __m256i read16PackedShort(const float* const pColor) - { - // Min with 65536 not needed, because cvt2xEpi32Epu16 applies unsigned saturation to 16 bits. -// const __m256i lo = _mm256_min_epi32(_mm256_cvtps_epi32(_mm256_loadu_ps(pColor)), _mm256_set1_epi32(0x0ffff)); -// const __m256i hi = _mm256_min_epi32(_mm256_cvtps_epi32(_mm256_loadu_ps(pColor + 8)), _mm256_set1_epi32(0x0ffff)); - const __m256i loEpi32 = _mm256_cvtps_epi32(_mm256_loadu_ps(pColor)); - const __m256i hiEpi32 = _mm256_cvtps_epi32(_mm256_loadu_ps(pColor + 8)); - return cvt2xEpi32Epu16(loEpi32, hiEpi32); - } - - // Read color values from T* and return 2 x 8 x packed int - inline static std::tuple<__m256i, __m256i> read16PackedInt(const std::uint16_t* const pColor) - { - const __m256i epi16 = _mm256_loadu_si256((const __m256i*)pColor); - return { - _mm256_cvtepu16_epi32(_mm256_castsi256_si128(epi16)), - _mm256_cvtepu16_epi32(_mm256_extracti128_si256(epi16, 1)) - }; - } - inline static std::tuple<__m256i, __m256i> read16PackedInt(const std::uint32_t* const pColor) - { - return { - _mm256_srli_epi32(_mm256_loadu_si256((const __m256i*)pColor), 16), // Shift 16 bits right while shifting in zeros (divide by 65536). - _mm256_srli_epi32(_mm256_loadu_si256(((const __m256i*)pColor) + 1), 16) - }; - } - inline static std::tuple<__m256i, __m256i> read16PackedInt(const float* const pColor) - { - return { - _mm256_min_epi32(_mm256_cvttps_epi32(_mm256_loadu_ps(pColor)), _mm256_set1_epi32(0x0000ffff)), - _mm256_min_epi32(_mm256_cvttps_epi32(_mm256_loadu_ps(pColor + 8)), _mm256_set1_epi32(0x0000ffff)) - }; - } - inline static std::tuple<__m256i, __m256i> read16PackedInt(const double*) - { - throw "read16PackedInt(const double*) is not implemented!"; - } - inline static std::tuple<__m256i, __m256i> read16PackedInt(const double* const pColor, const __m256d scalingFactor) - { - const __m128i lo1 = _mm256_cvttpd_epi32(_mm256_mul_pd(_mm256_loadu_pd(pColor), scalingFactor)); - const __m128i hi1 = _mm256_cvttpd_epi32(_mm256_mul_pd(_mm256_loadu_pd(pColor + 4), scalingFactor)); - - const __m128i lo2 = _mm256_cvttpd_epi32(_mm256_mul_pd(_mm256_loadu_pd(pColor + 8), scalingFactor)); - const __m128i hi2 = _mm256_cvttpd_epi32(_mm256_mul_pd(_mm256_loadu_pd(pColor + 12), scalingFactor)); - - return { - _mm256_min_epi32(_mm256_set_m128i(hi1, lo1), _mm256_set1_epi32(0x0000ffff)), - _mm256_min_epi32(_mm256_set_m128i(hi2, lo2), _mm256_set1_epi32(0x0000ffff)) - }; - } - - // Accumulate packed single newColor to T* oldColor - inline static __m256 accumulateColorValues(const __m256i outNdx, const __m256 colorValue, const __m256 fraction, const __m256i mask, const std::uint16_t* const pOutputBitmap, const bool fastload) noexcept - { - __m256i tempColor = _mm256_undefined_si256(); - if (fastload) - tempColor = _mm256_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast(pOutputBitmap + _mm256_cvtsi256_si32(outNdx)))); - else - { - // Gather with scale factor of 2 -> outNdx points to WORDs. Load these 8 WORDs and interpret them as epi32. - const __m256i tempColorAsI16 = _mm256_mask_i32gather_epi32(_mm256_setzero_si256(), reinterpret_cast(pOutputBitmap), outNdx, mask, 2); - // The high words of each epi32 color value are wrong -> we null them out. - tempColor = _mm256_blend_epi16(tempColorAsI16, _mm256_setzero_si256(), 0xaa); - } - const __m256 accumulatedColor = _mm256_fmadd_ps(colorValue, fraction, _mm256_cvtepi32_ps(tempColor)); // tempColor = 8 int in the range [0, 65535] - return _mm256_min_ps(accumulatedColor, _mm256_set1_ps(static_cast(0x0000ffff))); - } - - inline static __m256 accumulateColorValues(const __m256i outNdx, const __m256 colorValue, const __m256 fraction, const __m256i mask, const std::uint32_t* const pOutputBitmap, const bool fastload) noexcept - { - static_assert(sizeof(unsigned int) == sizeof(std::uint32_t)); - - const __m256 scalingFactor = _mm256_set1_ps(65536.0f); - - const __m256i tempColor = fastload - ? _mm256_loadu_si256(reinterpret_cast(pOutputBitmap + _mm256_cvtsi256_si32(outNdx))) - : _mm256_mask_i32gather_epi32(_mm256_setzero_si256(), reinterpret_cast(pOutputBitmap), outNdx, mask, 4); - const __m256 accumulatedColor = _mm256_fmadd_ps(colorValue, _mm256_mul_ps(fraction, scalingFactor), cvtEpu32Ps(tempColor)); - return _mm256_min_ps(accumulatedColor, _mm256_set1_ps(4294967040.0f)); // This constant is the next lower float value below UINTMAX. - } - - inline static __m256 accumulateColorValues(const __m256i outNdx, const __m256 colorValue, const __m256 fraction, const __m256i mask, const float* const pOutputBitmap, const bool fastload) noexcept - { - const __m256 tempColor = fastload - ? _mm256_loadu_ps(pOutputBitmap + _mm256_cvtsi256_si32(outNdx)) - : _mm256_mask_i32gather_ps(_mm256_setzero_ps(), pOutputBitmap, outNdx, _mm256_castsi256_ps(mask), 4); - return _mm256_fmadd_ps(colorValue, fraction, tempColor); - } - - // Store accumulated color - - inline static void storeColorValue(const __m256i outNdx, const __m256 colorValue, const __m256i mask, std::uint16_t* const pOutputBitmap, const bool faststore) noexcept - { - if (faststore) - _mm_storeu_si128(reinterpret_cast<__m128i*>(pOutputBitmap + _mm256_cvtsi256_si32(outNdx)), cvtPsEpu16(colorValue)); - else - { - const int iMask = _mm256_movemask_epi8(mask); - const auto checkWrite = [pOutputBitmap, iMask](const int mask, const size_t ndx, const float color) -> void - { - if ((iMask & mask) != 0) - pOutputBitmap[ndx] = static_cast(color); - }; - __m128 color = _mm256_castps256_ps128(colorValue); - checkWrite(1, _mm256_cvtsi256_si32(outNdx), AvxSupport::extractPs<0>(color)); // Note: extract_ps(x, i) returns the bits of the i-th float as int. - checkWrite(1 << 4, _mm256_extract_epi32(outNdx, 1), AvxSupport::extractPs<1>(color)); - checkWrite(1 << 8, _mm256_extract_epi32(outNdx, 2), AvxSupport::extractPs<2>(color)); - checkWrite(1 << 12, _mm256_extract_epi32(outNdx, 3), AvxSupport::extractPs<3>(color)); - color = _mm256_extractf128_ps(colorValue, 1); - checkWrite(1 << 16, _mm256_extract_epi32(outNdx, 4), AvxSupport::extractPs<0>(color)); - checkWrite(1 << 20, _mm256_extract_epi32(outNdx, 5), AvxSupport::extractPs<1>(color)); - checkWrite(1 << 24, _mm256_extract_epi32(outNdx, 6), AvxSupport::extractPs<2>(color)); - checkWrite(1 << 28, _mm256_extract_epi32(outNdx, 7), AvxSupport::extractPs<3>(color)); - } - } - - inline static void storeColorValue(const __m256i outNdx, const __m256 colorValue, const __m256i mask, std::uint32_t* const pOutputBitmap, const bool faststore) noexcept - { - static_assert(sizeof(unsigned int) == sizeof(std::uint32_t)); - - if (faststore) - _mm256_storeu_si256(reinterpret_cast<__m256i*>(pOutputBitmap + _mm256_cvtsi256_si32(outNdx)), cvtPsEpu32(colorValue)); - else - { - const int iMask = _mm256_movemask_epi8(mask); - const auto checkWrite = [pOutputBitmap, iMask](const int mask, const size_t ndx, const float color) -> void - { - if ((iMask & mask) != 0) - pOutputBitmap[ndx] = static_cast(color); - }; - __m128 color = _mm256_castps256_ps128(colorValue); - checkWrite(1, _mm256_cvtsi256_si32(outNdx), AvxSupport::extractPs<0>(color)); - checkWrite(1 << 4, _mm256_extract_epi32(outNdx, 1), AvxSupport::extractPs<1>(color)); - checkWrite(1 << 8, _mm256_extract_epi32(outNdx, 2), AvxSupport::extractPs<2>(color)); - checkWrite(1 << 12, _mm256_extract_epi32(outNdx, 3), AvxSupport::extractPs<3>(color)); - color = _mm256_extractf128_ps(colorValue, 1); - checkWrite(1 << 16, _mm256_extract_epi32(outNdx, 4), AvxSupport::extractPs<0>(color)); - checkWrite(1 << 20, _mm256_extract_epi32(outNdx, 5), AvxSupport::extractPs<1>(color)); - checkWrite(1 << 24, _mm256_extract_epi32(outNdx, 6), AvxSupport::extractPs<2>(color)); - checkWrite(1 << 28, _mm256_extract_epi32(outNdx, 7), AvxSupport::extractPs<3>(color)); - } - } - - inline static void storeColorValue(const __m256i outNdx, const __m256 colorValue, const __m256i mask, float* const pOutputBitmap, const bool faststore) noexcept - { - if (faststore) - _mm256_storeu_ps(pOutputBitmap + _mm256_cvtsi256_si32(outNdx), colorValue); - else - { - const int iMask = _mm256_movemask_epi8(mask); - const auto checkWrite = [pOutputBitmap, iMask](const int mask, const size_t ndx, const float color) -> void - { - if ((iMask & mask) != 0) - pOutputBitmap[ndx] = color; - }; - __m128 color = _mm256_castps256_ps128(colorValue); - checkWrite(1, _mm256_cvtsi256_si32(outNdx), AvxSupport::extractPs<0>(color)); // Note: extract_ps(x, i) returns the bits of the i-th float as int. - checkWrite(1 << 4, _mm256_extract_epi32(outNdx, 1), AvxSupport::extractPs<1>(color)); - checkWrite(1 << 8, _mm256_extract_epi32(outNdx, 2), AvxSupport::extractPs<2>(color)); - checkWrite(1 << 12, _mm256_extract_epi32(outNdx, 3), AvxSupport::extractPs<3>(color)); - color = _mm256_extractf128_ps(colorValue, 1); - checkWrite(1 << 16, _mm256_extract_epi32(outNdx, 4), AvxSupport::extractPs<0>(color)); - checkWrite(1 << 20, _mm256_extract_epi32(outNdx, 5), AvxSupport::extractPs<1>(color)); - checkWrite(1 << 24, _mm256_extract_epi32(outNdx, 6), AvxSupport::extractPs<2>(color)); - checkWrite(1 << 28, _mm256_extract_epi32(outNdx, 7), AvxSupport::extractPs<3>(color)); - } - } - - template - inline static T accumulateSingleColorValue(const size_t outNdx, const float newColor, const int mask, const T* const pOutputBitmap) noexcept - { - if (mask == 0) - return T{ 0 }; - - if constexpr (std::is_same::value) - { - const float accumulatedColor = static_cast(pOutputBitmap[outNdx]) + newColor; - return static_cast(std::min(accumulatedColor, static_cast(std::numeric_limits::max()))); - } - - if constexpr (std::is_same::value) - { - const float accumulatedColor = static_cast(pOutputBitmap[outNdx]) + newColor * 65536.0f; - return static_cast(std::min(accumulatedColor, 4294967040.0f)); // The next lower float value below UINTMAX. - } - - if constexpr (std::is_same::value) - return pOutputBitmap[outNdx] + newColor; - } - - // Shift and rotate for whole AVX vectors. - - template - inline static __m256i shiftRightEpi8(const __m256i x) noexcept - { - return _mm256_alignr_epi8(_mm256_zextsi128_si256(_mm256_extracti128_si256(x, 1)), x, N); - } - template - inline static __m256i shiftRightEpi32(const __m256i x) noexcept - { - return shiftRightEpi8<4 * N>(x); - } - - template - inline static __m256i shiftLeftEpi8(const __m256i x) noexcept - { - static_assert(N >= 0 && N <= 16); - return _mm256_alignr_epi8(x, _mm256_permute2x128_si256(x, x, 8), 16 - N); - } - template - inline static __m256i shiftLeftEpi32(const __m256i x) noexcept - { - static_assert(N == 1); - return _mm256_permutevar8x32_epi32(x, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); - } - - inline static __m256i shl1Epi16(const __m256i x, const int value) noexcept // CPU cycles: 2/2 + 3/1 + 1/1 = 6/4 - { - const __m128i src = _mm_insert_epi16(_mm_undefined_si128(), value, 7); // Value to the lowest 128bit-lane [highest element]. - const __m256i b = _mm256_permute2x128_si256(x, _mm256_zextsi128_si256(src), 0x02); // [lo(x), src] - return _mm256_alignr_epi8(x, b, 16 - 2); // [x, b] >> (14 bytes) lane-by-lane - } - inline static __m256i shr1Epi16(const __m256i x, const int value) noexcept // CPU cycles: 2/1 + 3/1 + 1/1 = 6/3 - { - const __m128i src = _mm_cvtsi32_si128(value); // Value to 128-bit-lane [lowest element]. - const __m256i a = _mm256_permute2x128_si256(x, _mm256_zextsi128_si256(src), 0x21); // [src, hi(x)] - return _mm256_alignr_epi8(a, x, 2); // [a, x] >> (2 bytes) lane-by-lane - } - - template - inline static __m256i rotateRightEpi8(const __m256i x) noexcept - { - if constexpr (N > 16) - return _mm256_alignr_epi8(x, _mm256_permute2x128_si256(x, x, 1), N - 16); - else - return _mm256_alignr_epi8(_mm256_permute2x128_si256(x, x, 1), x, N); - } - template - inline static __m256i rotateRightEpi32(const __m256i x) noexcept - { - return rotateRightEpi8<4 * N>(x); - } - - // Extract for PS has a strange signature (returns int), so we write an own version. - template - inline static float extractPs(const __m128 ps) - { - static_assert(NDX >= 0 && NDX < 4); - if constexpr (NDX == 0) - return _mm_cvtss_f32(ps); - else - return _mm_cvtss_f32(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(ps), NDX * 4))); - } -}; +#pragma once + +#include "cfa.h" +#include "ColorBitmap.h" +#include "GrayBitmap.h" + +class AvxSupport +{ +private: + // Unfortunately, we cannot use const here, because the member function are hardly never const declared. :-( + CMemoryBitmap& bitmap; + + template + auto* getColorPtr() { return dynamic_cast*>(&bitmap); } + template + auto* getGrayPtr() { return dynamic_cast*>(&bitmap); } + template + const auto* getColorPtr() const { return dynamic_cast*>(&bitmap); } + template + const auto* getGrayPtr() const { return dynamic_cast*>(&bitmap); } + + int getNrChannels() const; +public: + AvxSupport(CMemoryBitmap& b) noexcept; + + bool isColorBitmap() const; + template bool isColorBitmapOfType() const; + bool isMonochromeBitmap() const; + template bool isMonochromeBitmapOfType() const; + template bool isMonochromeCfaBitmapOfType() const; + bool isColorBitmapOrCfa() const; + + CFATYPE getCfaType() const; + + template + const std::vector& redPixels() const { return getColorPtr()->m_Red.m_vPixels; } + template + const std::vector& greenPixels() const { return getColorPtr()->m_Green.m_vPixels; } + template + const std::vector& bluePixels() const { return getColorPtr()->m_Blue.m_vPixels; } + template + const std::vector& grayPixels() const { return getGrayPtr()->m_vPixels; } + + template + std::vector& redPixels() { return getColorPtr()->m_Red.m_vPixels; } + template + std::vector& greenPixels() { return getColorPtr()->m_Green.m_vPixels; } + template + std::vector& bluePixels() { return getColorPtr()->m_Blue.m_vPixels; } + template + std::vector& grayPixels() { return getGrayPtr()->m_vPixels; } + + const int width() const; + + template + bool bitmapHasCorrectType() const; + + static bool checkAvx2CpuSupport(); + static bool checkSimdAvailability(); + static void reportCpuType(); + + template + inline static size_t numberOfAvxVectors(const size_t width) + { + static_assert(sizeof(ElementType) == 1 || sizeof(ElementType) == 2 || sizeof(ElementType) == 4 || sizeof(ElementType) == 8); + return width == 0 ? 0 : ((width - 1) * sizeof(ElementType)) / sizeof(VectorElementType) + 1; + } + + // When returning from AVX-code to non-AVX-code we should zero the upper 128 bits of all ymm registers. + // Otherwise old Intel CPUs could suffer from performance degradations. + template + inline static T zeroUpper(const T returnValue) + { + static_assert(std::is_integral::value); + _mm256_zeroupper(); + return returnValue; + } + + // SIMD functions + + inline static __m256 wordToPackedFloat(const __m128i x) noexcept + { + return _mm256_cvtepi32_ps(_mm256_cvtepu16_epi32(x)); + } + + inline static std::tuple<__m256d, __m256d, __m256d, __m256d> wordToPackedDouble(const __m256i x) noexcept + { + const __m256i i1 = _mm256_cvtepu16_epi32(_mm256_castsi256_si128(x)); + const __m256i i2 = _mm256_cvtepu16_epi32(_mm256_extracti128_si256(x, 1)); + return { + _mm256_cvtepi32_pd(_mm256_castsi256_si128(i1)), + _mm256_cvtepi32_pd(_mm256_extracti128_si256(i1, 1)), + _mm256_cvtepi32_pd(_mm256_castsi256_si128(i2)), + _mm256_cvtepi32_pd(_mm256_extracti128_si256(i2, 1)) + }; + } + + inline static __m256 cvtEpu32Ps(const __m256i x) noexcept + { + const __m256i mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), x); // 0 > x (= x < 0) + const __m256 ps = _mm256_cvtepi32_ps(x); + const __m256 corr = _mm256_add_ps(_mm256_set1_ps(static_cast(0x100000000ULL)), ps); // UINTMAX - x (Note: 'add_ps' is correct!) + return _mm256_blendv_ps(ps, corr, _mm256_castsi256_ps(mask)); // Take (UINTMAX - x) where x < 0 + } + + inline static std::tuple<__m256d, __m256d> cvtEpu32Pd(const __m256i x) noexcept + { + const __m256i mask = _mm256_cmpgt_epi32(_mm256_setzero_si256(), x); // 0 > x (= x < 0) + const __m256d d1 = _mm256_cvtepi32_pd(_mm256_castsi256_si128(x)); + const __m256d d2 = _mm256_cvtepi32_pd(_mm256_extracti128_si256(x, 1)); + const __m256d corr1 = _mm256_add_pd(_mm256_set1_pd(static_cast(0x100000000ULL)), d1); // UINTMAX - x (Note: 'add_pd' is correct!) + const __m256d corr2 = _mm256_add_pd(_mm256_set1_pd(static_cast(0x100000000ULL)), d2); + return { + _mm256_blendv_pd(d1, corr1, _mm256_cmp_pd(d1, _mm256_setzero_pd(), 17)), // 17: OP := _CMP_LT_OQ + _mm256_blendv_pd(d2, corr2, _mm256_cmp_pd(d2, _mm256_setzero_pd(), 17)) // Take (UINTMAX - x) where x < 0 + }; + } + + inline static std::tuple<__m256d, __m256d> cvtPsPd(const __m256 x) noexcept + { + return { + _mm256_cvtps_pd(_mm256_castps256_ps128(x)), + _mm256_cvtps_pd(_mm256_extractf128_ps(x, 1)) + }; + } + + inline static __m128i cvtEpi32Epu16(const __m256i epi32) noexcept + { + const __m256i epu16 = _mm256_packus_epi32(epi32, _mm256_castsi128_si256(_mm256_extracti128_si256(epi32, 1))); // (?, ?, ?, ?, a7, a6, a5, a4, a7, a6, a5, a4, a3, a2, a1, a0) + // Upper lane is now wrong and useless. + return _mm256_castsi256_si128(epu16); + } + + inline static __m128i cvtPsEpu16(const __m256 x) noexcept + { + const __m256i epi32 = _mm256_cvtps_epi32(x); + return cvtEpi32Epu16(epi32); + } + + inline static __m128i cvtTruncatePsEpu16(const __m256 x) noexcept + { + const __m256i epi32 = _mm256_cvttps_epi32(x); + return cvtEpi32Epu16(epi32); + } + + inline static __m256i cvtPsEpu32(const __m256 x) noexcept + { + // x >= INTMAX + 1 + const __m256 mask = _mm256_cmp_ps(x, _mm256_set1_ps(2147483648.0f), 29); // 29 = _CMP_GE_OQ (greater or equal, ordered, quiet) + const __m256 corr = _mm256_sub_ps(x, _mm256_set1_ps(4294967296.0f)); + return _mm256_cvttps_epi32(_mm256_blendv_ps(x, corr, mask)); + } + + inline static __m256i cmpGtEpu16(const __m256i a, const __m256i b) noexcept + { + const __m256i highBit = _mm256_set1_epi16(std::uint16_t{ 0x8000 }); + return _mm256_cmpgt_epi16(_mm256_xor_si256(a, highBit), _mm256_xor_si256(b, highBit)); + }; + + // Read color values from T* and return 2 x 8 packed single. + inline static std::tuple<__m256, __m256> read16PackedSingle(const std::uint16_t* const pColor) noexcept + { + const __m256i icolor = _mm256_loadu_si256((const __m256i*)pColor); + const __m256 lo8 = wordToPackedFloat(_mm256_castsi256_si128(icolor)); + const __m256 hi8 = wordToPackedFloat(_mm256_extracti128_si256(icolor, 1)); + return { lo8, hi8 }; + }; + inline static std::tuple<__m256, __m256> read16PackedSingle(const std::uint32_t* const pColor) noexcept + { + return { + _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_loadu_epi32(pColor), 16)), // Shift 16 bits right while shifting in zeros. + _mm256_cvtepi32_ps(_mm256_srli_epi32(_mm256_loadu_epi32(pColor + 8), 16)) + }; + } + inline static std::tuple<__m256, __m256> read16PackedSingle(const float* const pColor) noexcept + { + return { _mm256_loadu_ps(pColor), _mm256_loadu_ps(pColor + 8) }; + } + + // Read 16 color values from T* with stride + inline static std::tuple<__m256, __m256> read16PackedSingleStride(const std::uint16_t* const pColor, const int stride) noexcept + { + const __m256i ndx = _mm256_mullo_epi32(_mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7), _mm256_set1_epi32(stride)); + const __m256i v1 = _mm256_i32gather_epi32((const int*)pColor, ndx, 2); + const __m256i v2 = _mm256_i32gather_epi32((const int*)pColor, _mm256_add_epi32(ndx, _mm256_set1_epi32(8 * stride)), 2); // 8, 9, 10, 11, 12, 13, 14, 15 + return { + _mm256_cvtepi32_ps(_mm256_blend_epi16(v1, _mm256_setzero_si256(), 0xaa)), + _mm256_cvtepi32_ps(_mm256_blend_epi16(v2, _mm256_setzero_si256(), 0xaa)) + }; + } + // Note: ***** DOES NOT SHIFT 16 BITS RIGHT! ***** + inline static std::tuple<__m256, __m256> read16PackedSingleStride(const std::uint32_t* const pColor, const int stride) noexcept + { + const __m256i ndx = _mm256_mullo_epi32(_mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7), _mm256_set1_epi32(stride)); + const __m256i v1 = _mm256_i32gather_epi32((const int*)pColor, ndx, 4); + const __m256i v2 = _mm256_i32gather_epi32((const int*)pColor, _mm256_add_epi32(ndx, _mm256_set1_epi32(8 * stride)), 4); + return { + _mm256_cvtepi32_ps(v1), + _mm256_cvtepi32_ps(v2) + }; + } + inline static std::tuple<__m256, __m256> read16PackedSingleStride(const float* const pColor, const int stride) noexcept + { + const __m256i ndx1 = _mm256_mullo_epi32(_mm256_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7), _mm256_set1_epi32(stride)); + const __m256i ndx2 = _mm256_add_epi32(ndx1, _mm256_set1_epi32(8 * stride)); + return { _mm256_i32gather_ps(pColor, ndx1, 4), _mm256_i32gather_ps(pColor, ndx2, 4) }; + } + + inline static __m256i cvt2xEpi32Epu16(const __m256i lo, const __m256i hi) + { + return _mm256_packus_epi32(_mm256_permute2x128_si256(lo, hi, 0x20), _mm256_permute2x128_si256(lo, hi, 0x31)); + } + + // Read color values from T* and return 16 x packed short + inline static __m256i read16PackedShort(const std::uint16_t* const pColor) + { + return _mm256_loadu_epi16(pColor); + } + inline static __m256i read16PackedShort(const std::uint32_t* const pColor) + { + const __m256i lo = _mm256_srli_epi32(_mm256_loadu_epi32(pColor), 16); // Shift 16 bits right while shifting in zeros. + const __m256i hi = _mm256_srli_epi32(_mm256_loadu_epi32(pColor + 8), 16); + return cvt2xEpi32Epu16(lo, hi); + } + inline static __m256i read16PackedShort(const float* const pColor) + { + // Min with 65536 not needed, because cvt2xEpi32Epu16 applies unsigned saturation to 16 bits. +// const __m256i lo = _mm256_min_epi32(_mm256_cvtps_epi32(_mm256_loadu_ps(pColor)), _mm256_set1_epi32(0x0ffff)); +// const __m256i hi = _mm256_min_epi32(_mm256_cvtps_epi32(_mm256_loadu_ps(pColor + 8)), _mm256_set1_epi32(0x0ffff)); + const __m256i loEpi32 = _mm256_cvtps_epi32(_mm256_loadu_ps(pColor)); + const __m256i hiEpi32 = _mm256_cvtps_epi32(_mm256_loadu_ps(pColor + 8)); + return cvt2xEpi32Epu16(loEpi32, hiEpi32); + } + + // Read color values from T* and return 2 x 8 x packed int + inline static std::tuple<__m256i, __m256i> read16PackedInt(const std::uint16_t* const pColor) + { + const __m256i epi16 = _mm256_loadu_si256((const __m256i*)pColor); + return { + _mm256_cvtepu16_epi32(_mm256_castsi256_si128(epi16)), + _mm256_cvtepu16_epi32(_mm256_extracti128_si256(epi16, 1)) + }; + } + inline static std::tuple<__m256i, __m256i> read16PackedInt(const std::uint32_t* const pColor) + { + return { + _mm256_srli_epi32(_mm256_loadu_si256((const __m256i*)pColor), 16), // Shift 16 bits right while shifting in zeros (divide by 65536). + _mm256_srli_epi32(_mm256_loadu_si256(((const __m256i*)pColor) + 1), 16) + }; + } + inline static std::tuple<__m256i, __m256i> read16PackedInt(const float* const pColor) + { + return { + _mm256_min_epi32(_mm256_cvttps_epi32(_mm256_loadu_ps(pColor)), _mm256_set1_epi32(0x0000ffff)), + _mm256_min_epi32(_mm256_cvttps_epi32(_mm256_loadu_ps(pColor + 8)), _mm256_set1_epi32(0x0000ffff)) + }; + } + inline static std::tuple<__m256i, __m256i> read16PackedInt(const double*) + { + throw "read16PackedInt(const double*) is not implemented!"; + } + inline static std::tuple<__m256i, __m256i> read16PackedInt(const double* const pColor, const __m256d scalingFactor) + { + const __m128i lo1 = _mm256_cvttpd_epi32(_mm256_mul_pd(_mm256_loadu_pd(pColor), scalingFactor)); + const __m128i hi1 = _mm256_cvttpd_epi32(_mm256_mul_pd(_mm256_loadu_pd(pColor + 4), scalingFactor)); + + const __m128i lo2 = _mm256_cvttpd_epi32(_mm256_mul_pd(_mm256_loadu_pd(pColor + 8), scalingFactor)); + const __m128i hi2 = _mm256_cvttpd_epi32(_mm256_mul_pd(_mm256_loadu_pd(pColor + 12), scalingFactor)); + + return { + _mm256_min_epi32(_mm256_set_m128i(hi1, lo1), _mm256_set1_epi32(0x0000ffff)), + _mm256_min_epi32(_mm256_set_m128i(hi2, lo2), _mm256_set1_epi32(0x0000ffff)) + }; + } + + // Accumulate packed single newColor to T* oldColor + inline static __m256 accumulateColorValues(const __m256i outNdx, const __m256 colorValue, const __m256 fraction, const __m256i mask, const std::uint16_t* const pOutputBitmap, const bool fastload) noexcept + { + __m256i tempColor = _mm256_undefined_si256(); + if (fastload) + tempColor = _mm256_cvtepu16_epi32(_mm_loadu_si128(reinterpret_cast(pOutputBitmap + _mm256_cvtsi256_si32(outNdx)))); + else + { + // Gather with scale factor of 2 -> outNdx points to WORDs. Load these 8 WORDs and interpret them as epi32. + const __m256i tempColorAsI16 = _mm256_mask_i32gather_epi32(_mm256_setzero_si256(), reinterpret_cast(pOutputBitmap), outNdx, mask, 2); + // The high words of each epi32 color value are wrong -> we null them out. + tempColor = _mm256_blend_epi16(tempColorAsI16, _mm256_setzero_si256(), 0xaa); + } + const __m256 accumulatedColor = _mm256_fmadd_ps(colorValue, fraction, _mm256_cvtepi32_ps(tempColor)); // tempColor = 8 int in the range [0, 65535] + return _mm256_min_ps(accumulatedColor, _mm256_set1_ps(static_cast(0x0000ffff))); + } + + inline static __m256 accumulateColorValues(const __m256i outNdx, const __m256 colorValue, const __m256 fraction, const __m256i mask, const std::uint32_t* const pOutputBitmap, const bool fastload) noexcept + { + static_assert(sizeof(unsigned int) == sizeof(std::uint32_t)); + + const __m256 scalingFactor = _mm256_set1_ps(65536.0f); + + const __m256i tempColor = fastload + ? _mm256_loadu_si256(reinterpret_cast(pOutputBitmap + _mm256_cvtsi256_si32(outNdx))) + : _mm256_mask_i32gather_epi32(_mm256_setzero_si256(), reinterpret_cast(pOutputBitmap), outNdx, mask, 4); + const __m256 accumulatedColor = _mm256_fmadd_ps(colorValue, _mm256_mul_ps(fraction, scalingFactor), cvtEpu32Ps(tempColor)); + return _mm256_min_ps(accumulatedColor, _mm256_set1_ps(4294967040.0f)); // This constant is the next lower float value below UINTMAX. + } + + inline static __m256 accumulateColorValues(const __m256i outNdx, const __m256 colorValue, const __m256 fraction, const __m256i mask, const float* const pOutputBitmap, const bool fastload) noexcept + { + const __m256 tempColor = fastload + ? _mm256_loadu_ps(pOutputBitmap + _mm256_cvtsi256_si32(outNdx)) + : _mm256_mask_i32gather_ps(_mm256_setzero_ps(), pOutputBitmap, outNdx, _mm256_castsi256_ps(mask), 4); + return _mm256_fmadd_ps(colorValue, fraction, tempColor); + } + + // Store accumulated color + + inline static void storeColorValue(const __m256i outNdx, const __m256 colorValue, const __m256i mask, std::uint16_t* const pOutputBitmap, const bool faststore) noexcept + { + if (faststore) + _mm_storeu_si128(reinterpret_cast<__m128i*>(pOutputBitmap + _mm256_cvtsi256_si32(outNdx)), cvtPsEpu16(colorValue)); + else + { + const int iMask = _mm256_movemask_epi8(mask); + const auto checkWrite = [pOutputBitmap, iMask](const int mask, const size_t ndx, const float color) -> void + { + if ((iMask & mask) != 0) + pOutputBitmap[ndx] = static_cast(color); + }; + __m128 color = _mm256_castps256_ps128(colorValue); + checkWrite(1, _mm256_cvtsi256_si32(outNdx), AvxSupport::extractPs<0>(color)); // Note: extract_ps(x, i) returns the bits of the i-th float as int. + checkWrite(1 << 4, _mm256_extract_epi32(outNdx, 1), AvxSupport::extractPs<1>(color)); + checkWrite(1 << 8, _mm256_extract_epi32(outNdx, 2), AvxSupport::extractPs<2>(color)); + checkWrite(1 << 12, _mm256_extract_epi32(outNdx, 3), AvxSupport::extractPs<3>(color)); + color = _mm256_extractf128_ps(colorValue, 1); + checkWrite(1 << 16, _mm256_extract_epi32(outNdx, 4), AvxSupport::extractPs<0>(color)); + checkWrite(1 << 20, _mm256_extract_epi32(outNdx, 5), AvxSupport::extractPs<1>(color)); + checkWrite(1 << 24, _mm256_extract_epi32(outNdx, 6), AvxSupport::extractPs<2>(color)); + checkWrite(1 << 28, _mm256_extract_epi32(outNdx, 7), AvxSupport::extractPs<3>(color)); + } + } + + inline static void storeColorValue(const __m256i outNdx, const __m256 colorValue, const __m256i mask, std::uint32_t* const pOutputBitmap, const bool faststore) noexcept + { + static_assert(sizeof(unsigned int) == sizeof(std::uint32_t)); + + if (faststore) + _mm256_storeu_si256(reinterpret_cast<__m256i*>(pOutputBitmap + _mm256_cvtsi256_si32(outNdx)), cvtPsEpu32(colorValue)); + else + { + const int iMask = _mm256_movemask_epi8(mask); + const auto checkWrite = [pOutputBitmap, iMask](const int mask, const size_t ndx, const float color) -> void + { + if ((iMask & mask) != 0) + pOutputBitmap[ndx] = static_cast(color); + }; + __m128 color = _mm256_castps256_ps128(colorValue); + checkWrite(1, _mm256_cvtsi256_si32(outNdx), AvxSupport::extractPs<0>(color)); + checkWrite(1 << 4, _mm256_extract_epi32(outNdx, 1), AvxSupport::extractPs<1>(color)); + checkWrite(1 << 8, _mm256_extract_epi32(outNdx, 2), AvxSupport::extractPs<2>(color)); + checkWrite(1 << 12, _mm256_extract_epi32(outNdx, 3), AvxSupport::extractPs<3>(color)); + color = _mm256_extractf128_ps(colorValue, 1); + checkWrite(1 << 16, _mm256_extract_epi32(outNdx, 4), AvxSupport::extractPs<0>(color)); + checkWrite(1 << 20, _mm256_extract_epi32(outNdx, 5), AvxSupport::extractPs<1>(color)); + checkWrite(1 << 24, _mm256_extract_epi32(outNdx, 6), AvxSupport::extractPs<2>(color)); + checkWrite(1 << 28, _mm256_extract_epi32(outNdx, 7), AvxSupport::extractPs<3>(color)); + } + } + + inline static void storeColorValue(const __m256i outNdx, const __m256 colorValue, const __m256i mask, float* const pOutputBitmap, const bool faststore) noexcept + { + if (faststore) + _mm256_storeu_ps(pOutputBitmap + _mm256_cvtsi256_si32(outNdx), colorValue); + else + { + const int iMask = _mm256_movemask_epi8(mask); + const auto checkWrite = [pOutputBitmap, iMask](const int mask, const size_t ndx, const float color) -> void + { + if ((iMask & mask) != 0) + pOutputBitmap[ndx] = color; + }; + __m128 color = _mm256_castps256_ps128(colorValue); + checkWrite(1, _mm256_cvtsi256_si32(outNdx), AvxSupport::extractPs<0>(color)); // Note: extract_ps(x, i) returns the bits of the i-th float as int. + checkWrite(1 << 4, _mm256_extract_epi32(outNdx, 1), AvxSupport::extractPs<1>(color)); + checkWrite(1 << 8, _mm256_extract_epi32(outNdx, 2), AvxSupport::extractPs<2>(color)); + checkWrite(1 << 12, _mm256_extract_epi32(outNdx, 3), AvxSupport::extractPs<3>(color)); + color = _mm256_extractf128_ps(colorValue, 1); + checkWrite(1 << 16, _mm256_extract_epi32(outNdx, 4), AvxSupport::extractPs<0>(color)); + checkWrite(1 << 20, _mm256_extract_epi32(outNdx, 5), AvxSupport::extractPs<1>(color)); + checkWrite(1 << 24, _mm256_extract_epi32(outNdx, 6), AvxSupport::extractPs<2>(color)); + checkWrite(1 << 28, _mm256_extract_epi32(outNdx, 7), AvxSupport::extractPs<3>(color)); + } + } + + template + inline static T accumulateSingleColorValue(const size_t outNdx, const float newColor, const int mask, const T* const pOutputBitmap) noexcept + { + if (mask == 0) + return T{ 0 }; + + if constexpr (std::is_same::value) + { + const float accumulatedColor = static_cast(pOutputBitmap[outNdx]) + newColor; + return static_cast(std::min(accumulatedColor, static_cast(std::numeric_limits::max()))); + } + + if constexpr (std::is_same::value) + { + const float accumulatedColor = static_cast(pOutputBitmap[outNdx]) + newColor * 65536.0f; + return static_cast(std::min(accumulatedColor, 4294967040.0f)); // The next lower float value below UINTMAX. + } + + if constexpr (std::is_same::value) + return pOutputBitmap[outNdx] + newColor; + } + + // Shift and rotate for whole AVX vectors. + + template + inline static __m256i shiftRightEpi8(const __m256i x) noexcept + { + return _mm256_alignr_epi8(_mm256_zextsi128_si256(_mm256_extracti128_si256(x, 1)), x, N); + } + template + inline static __m256i shiftRightEpi32(const __m256i x) noexcept + { + return shiftRightEpi8<4 * N>(x); + } + + template + inline static __m256i shiftLeftEpi8(const __m256i x) noexcept + { + static_assert(N >= 0 && N <= 16); + return _mm256_alignr_epi8(x, _mm256_permute2x128_si256(x, x, 8), 16 - N); + } + template + inline static __m256i shiftLeftEpi32(const __m256i x) noexcept + { + static_assert(N == 1); + return _mm256_permutevar8x32_epi32(x, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); + } + + inline static __m256i shl1Epi16(const __m256i x, const int value) noexcept // CPU cycles: 2/2 + 3/1 + 1/1 = 6/4 + { + const __m128i src = _mm_insert_epi16(_mm_undefined_si128(), value, 7); // Value to the lowest 128bit-lane [highest element]. + const __m256i b = _mm256_permute2x128_si256(x, _mm256_zextsi128_si256(src), 0x02); // [lo(x), src] + return _mm256_alignr_epi8(x, b, 16 - 2); // [x, b] >> (14 bytes) lane-by-lane + } + inline static __m256i shr1Epi16(const __m256i x, const int value) noexcept // CPU cycles: 2/1 + 3/1 + 1/1 = 6/3 + { + const __m128i src = _mm_cvtsi32_si128(value); // Value to 128-bit-lane [lowest element]. + const __m256i a = _mm256_permute2x128_si256(x, _mm256_zextsi128_si256(src), 0x21); // [src, hi(x)] + return _mm256_alignr_epi8(a, x, 2); // [a, x] >> (2 bytes) lane-by-lane + } + + template + inline static __m256i rotateRightEpi8(const __m256i x) noexcept + { + if constexpr (N > 16) + return _mm256_alignr_epi8(x, _mm256_permute2x128_si256(x, x, 1), N - 16); + else + return _mm256_alignr_epi8(_mm256_permute2x128_si256(x, x, 1), x, N); + } + template + inline static __m256i rotateRightEpi32(const __m256i x) noexcept + { + return rotateRightEpi8<4 * N>(x); + } + + // Extract for PS has a strange signature (returns int), so we write an own version. + template + inline static float extractPs(const __m128 ps) + { + static_assert(NDX >= 0 && NDX < 4); + if constexpr (NDX == 0) + return _mm_cvtss_f32(ps); + else + return _mm_cvtss_f32(_mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(ps), NDX * 4))); + } +}; diff --git a/DeepSkyStacker/cfa.h b/DeepSkyStackerKernel/cfa.h similarity index 100% rename from DeepSkyStacker/cfa.h rename to DeepSkyStackerKernel/cfa.h diff --git a/DeepSkyStacker/dss_settings.cpp b/DeepSkyStackerKernel/dss_settings.cpp similarity index 100% rename from DeepSkyStacker/dss_settings.cpp rename to DeepSkyStackerKernel/dss_settings.cpp diff --git a/DeepSkyStacker/dss_settings.h b/DeepSkyStackerKernel/dss_settings.h similarity index 100% rename from DeepSkyStacker/dss_settings.h rename to DeepSkyStackerKernel/dss_settings.h diff --git a/dssbase.h b/DeepSkyStackerKernel/dssbase.h similarity index 100% rename from dssbase.h rename to DeepSkyStackerKernel/dssbase.h diff --git a/DeepSkyStacker/dssrect.h b/DeepSkyStackerKernel/dssrect.h similarity index 100% rename from DeepSkyStacker/dssrect.h rename to DeepSkyStackerKernel/dssrect.h diff --git a/DeepSkyStacker/group.cpp b/DeepSkyStackerKernel/group.cpp similarity index 100% rename from DeepSkyStacker/group.cpp rename to DeepSkyStackerKernel/group.cpp diff --git a/DeepSkyStacker/group.h b/DeepSkyStackerKernel/group.h similarity index 100% rename from DeepSkyStacker/group.h rename to DeepSkyStackerKernel/group.h diff --git a/DeepSkyStacker/imageloader.cpp b/DeepSkyStackerKernel/imageloader.cpp similarity index 100% rename from DeepSkyStacker/imageloader.cpp rename to DeepSkyStackerKernel/imageloader.cpp diff --git a/DeepSkyStacker/imageloader.h b/DeepSkyStackerKernel/imageloader.h similarity index 100% rename from DeepSkyStacker/imageloader.h rename to DeepSkyStackerKernel/imageloader.h diff --git a/DeepSkyStacker/matrix.h b/DeepSkyStackerKernel/matrix.h similarity index 100% rename from DeepSkyStacker/matrix.h rename to DeepSkyStackerKernel/matrix.h diff --git a/DeepSkyStackerKernel/stdafx.cpp b/DeepSkyStackerKernel/stdafx.cpp new file mode 100644 index 00000000..ddfdfe84 --- /dev/null +++ b/DeepSkyStackerKernel/stdafx.cpp @@ -0,0 +1,8 @@ +// stdafx.cpp : source file that includes just the standard includes +// DeepSkyStacker.pch will be the pre-compiled header +// stdafx.obj will contain the pre-compiled type information + +#include "stdafx.h" + +// TODO: reference any additional headers you need in STDAFX.H +// and not in this file diff --git a/DeepSkyStackerLive/DeepSkyStackerLive.cpp b/DeepSkyStackerLive/DeepSkyStackerLive.cpp index 48d82f8a..82cd6370 100644 --- a/DeepSkyStackerLive/DeepSkyStackerLive.cpp +++ b/DeepSkyStackerLive/DeepSkyStackerLive.cpp @@ -4,9 +4,8 @@ #include "stdafx.h" #include "DeepSkyStackerLive.h" #include "DeepSkyStackerLiveDlg.h" -#include "commonresource.h" #include "Ztrace.h" -#include "SetUILanguage.h" +#include "./../DeepSkyStacker/SetUILanguage.h" // Explicit include so not to pull over all headers in DSS if we added just a new include path. bool g_bShowRefStars = false; diff --git a/DeepSkyStackerLive/DeepSkyStackerLive.vcxproj b/DeepSkyStackerLive/DeepSkyStackerLive.vcxproj index 3e1a14e1..cb84adf8 100644 --- a/DeepSkyStackerLive/DeepSkyStackerLive.vcxproj +++ b/DeepSkyStackerLive/DeepSkyStackerLive.vcxproj @@ -84,7 +84,7 @@ X64 - ..\SMTP;..\ZClass;..\tools;..\LibTIFF;..\Libraw;..\CFitsIO;../;..\DeepSkyStacker;..\ChartCtrl;..\zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) + .\..\DeepSkyStackerKernel;.\..\DeepSkyStackerLive;.\..\ChartCtrl;..\SMTP;..\ZClass;..\tools;..\LibTIFF;..\Libraw;..\CFitsIO;..\zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) _UNICODE;UNICODE;_DEBUG;NOMINMAX;LIBRAW_NODLL;WIN32;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;USE_LIBTIFF_STATIC;CPJNSMTP_NOSSL;%(PreprocessorDefinitions) EnableFastChecks MultiThreadedDebugDLL @@ -133,7 +133,7 @@ X64 - ..\SMTP;..\ZClass;..\tools;..\LibTIFF;..\Libraw;..\CFitsIO;../;..\DeepSkyStacker;..\ChartCtrl;..\zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) + .\..\DeepSkyStackerKernel;.\..\DeepSkyStackerLive;.\..\ChartCtrl;..\SMTP;..\ZClass;..\tools;..\LibTIFF;..\Libraw;..\CFitsIO;..\zlib;$(Boost_1_80_0);%(AdditionalIncludeDirectories) _UNICODE;UNICODE;NDEBUG;NOMINMAX;LIBRAW_NODLL;WIN32;_WINDOWS;_CRT_SECURE_NO_WARNINGS;_WINSOCK_DEPRECATED_NO_WARNINGS;USE_LIBTIFF_STATIC;CPJNSMTP_NOSSL;%(PreprocessorDefinitions) MultiThreadedDLL false @@ -182,60 +182,7 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -292,68 +239,8 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @@ -407,6 +294,9 @@
+ + {cb7b75f1-08f4-4c8d-a7ef-2aa33e9a67f1} + {a2f500c6-6903-4c2d-906d-ce86b99ba50d} diff --git a/DeepSkyStackerLive/ImageList.cpp b/DeepSkyStackerLive/ImageList.cpp index b93dbf93..7f5f4b49 100644 --- a/DeepSkyStackerLive/ImageList.cpp +++ b/DeepSkyStackerLive/ImageList.cpp @@ -197,7 +197,7 @@ void CImageListTab::AddImage(LPCTSTR szImage) strDepth.Format(IDS_FORMAT_GRAY, bmpInfo.m_lBitPerChannel); m_ImageList.SetItemText(nItem, COLUMN_DEPTH, (LPCTSTR)strDepth); - CString strText; + CString strText; if (bmpInfo.m_lISOSpeed) ISOToString(bmpInfo.m_lISOSpeed, strText); @@ -207,7 +207,7 @@ void CImageListTab::AddImage(LPCTSTR szImage) ISOToString(0, strText); m_ImageList.SetItemText(nItem, COLUMN_ISO_GAIN, (LPCTSTR)strText); - ExposureToString(bmpInfo.m_fExposure, strText); + strText = exposureToString(bmpInfo.m_fExposure).toStdWString().c_str(); m_ImageList.SetItemText(nItem, COLUMN_EXPOSURE, (LPCTSTR)strText); strText.Format(_T("%.1f"), bmpInfo.m_fAperture); diff --git a/DeepSkyStackerLive/MainBoard.cpp b/DeepSkyStackerLive/MainBoard.cpp index b282a487..5f6db8fb 100644 --- a/DeepSkyStackerLive/MainBoard.cpp +++ b/DeepSkyStackerLive/MainBoard.cpp @@ -432,7 +432,7 @@ void CMainBoard::DrawProgress(CDC * pDC) CString strText; CString strTime; - ExposureToString(m_fTotalExposureTime, strTime); + strTime = exposureToString(m_fTotalExposureTime).toStdWString().c_str(); strText.Format(m_strStatsMask, m_lNrPending, m_lNrRegistered, m_lNrStacked, (LPCTSTR)strTime); m_Stats.GetWindowRect(&rcStats); diff --git a/DeepSkyStackerLive/version.rc b/DeepSkyStackerLive/version.rc index 69e3ebc9..a3f1c236 100644 --- a/DeepSkyStackerLive/version.rc +++ b/DeepSkyStackerLive/version.rc @@ -3,7 +3,7 @@ // Version // -#include "..\DeepSkyStacker\DSSVersion.h" +#include "..\DeepSkyStackerKernel\DSSVersion.h" diff --git a/DeepSkyStackerTest/AvxAccumulateTest.cpp b/DeepSkyStackerTest/AvxAccumulateTest.cpp index 4a945b56..d9c53c06 100644 --- a/DeepSkyStackerTest/AvxAccumulateTest.cpp +++ b/DeepSkyStackerTest/AvxAccumulateTest.cpp @@ -1,17 +1,17 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/dssrect.h" +#include "dssrect.h" #define UNIT_TESTS #include "AvxAccumulateTest.h" -#include "../DeepSkyStacker/BitmapBase.h" -#include "../DeepSkyStacker/avx_avg.h" +#include "BitmapBase.h" +#include "avx_avg.h" -#include "../DeepSkyStacker/TaskInfo.h" -#include "../DeepSkyStacker/EntropyInfo.h" -#include "../DeepSkyStacker/ColorBitmap.h" -#include "../DeepSkyStacker/MedianFilterEngine.h" +#include "TaskInfo.h" +#include "EntropyInfo.h" +#include "ColorBitmap.h" +#include "MedianFilterEngine.h" TEST_CASE("AVX Accumulation FASTAVERAGE", "[AVX][Accumulation][FastAverage]") diff --git a/DeepSkyStackerTest/AvxAccumulateTest.h b/DeepSkyStackerTest/AvxAccumulateTest.h index 54de1297..570d0d70 100644 --- a/DeepSkyStackerTest/AvxAccumulateTest.h +++ b/DeepSkyStackerTest/AvxAccumulateTest.h @@ -1,7 +1,7 @@ #pragma once -#include "../DeepSkyStacker/DSSCommon.h" -#include "../DeepSkyStacker/dssrect.h" +#include "DSSCommon.h" +#include "dssrect.h" class CAllStackingTasks { diff --git a/DeepSkyStackerTest/AvxCfaTest.cpp b/DeepSkyStackerTest/AvxCfaTest.cpp index cc3c0d84..b5271120 100644 --- a/DeepSkyStackerTest/AvxCfaTest.cpp +++ b/DeepSkyStackerTest/AvxCfaTest.cpp @@ -1,10 +1,10 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/avx_cfa.h" -#include "../DeepSkyStacker/GrayBitmap.h" -#include "../DeepSkyStacker/Multitask.h" -#include "../DeepSkyStacker/MultiBitmap.h" -#include "../DeepSkyStacker/MedianFilterEngine.h" +#include "avx_cfa.h" +#include "GrayBitmap.h" +#include "Multitask.h" +#include "MultiBitmap.h" +#include "MedianFilterEngine.h" TEST_CASE("AVX CFA", "[AVX][CFA]") { diff --git a/DeepSkyStackerTest/AvxEntropyTest.cpp b/DeepSkyStackerTest/AvxEntropyTest.cpp index eb2306c6..13e0b9b8 100644 --- a/DeepSkyStackerTest/AvxEntropyTest.cpp +++ b/DeepSkyStackerTest/AvxEntropyTest.cpp @@ -1,8 +1,8 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/avx_entropy.h" -#include "../DeepSkyStacker/ColorBitmap.h" -#include "../DeepSkyStacker/Multitask.h" +#include "avx_entropy.h" +#include "ColorBitmap.h" +#include "Multitask.h" #include "AvxEntropyTest.h" std::tuple calcEntropy(const std::vector& incidences) diff --git a/DeepSkyStackerTest/AvxEntropyTest.h b/DeepSkyStackerTest/AvxEntropyTest.h index 0344eaee..c303b92d 100644 --- a/DeepSkyStackerTest/AvxEntropyTest.h +++ b/DeepSkyStackerTest/AvxEntropyTest.h @@ -1,5 +1,5 @@ #pragma once -#include "../DeepSkyStacker/EntropyInfo.h" +#include "EntropyInfo.h" class TestEntropyInfo : public CEntropyInfo { diff --git a/DeepSkyStackerTest/AvxHistogramTest.cpp b/DeepSkyStackerTest/AvxHistogramTest.cpp index cb238f40..2a37f703 100644 --- a/DeepSkyStackerTest/AvxHistogramTest.cpp +++ b/DeepSkyStackerTest/AvxHistogramTest.cpp @@ -1,7 +1,7 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/avx_histogram.h" -#include "../DeepSkyStacker/MedianFilterEngine.h" +#include "avx_histogram.h" +#include "MedianFilterEngine.h" TEST_CASE("AVX Histogram", "[AVX][Histogram]") { diff --git a/DeepSkyStackerTest/AvxStackingTest.cpp b/DeepSkyStackerTest/AvxStackingTest.cpp index b25289f7..42324c21 100644 --- a/DeepSkyStackerTest/AvxStackingTest.cpp +++ b/DeepSkyStackerTest/AvxStackingTest.cpp @@ -5,14 +5,14 @@ #include "AvxAccumulateTest.h" #include "AvxEntropyTest.h" -#include "../DeepSkyStacker/avx.h" -#include "../DeepSkyStacker/avx_median.h" -#include "../DeepSkyStacker/GrayBitmap.h" -#include "../DeepSkyStacker/EntropyInfo.h" -#include "../DeepSkyStacker/TaskInfo.h" -#include "../DeepSkyStacker/PixelTransform.h" -#include "../DeepSkyStacker/avx_entropy.h" -#include "../DeepSkyStacker/BackgroundCalibration.h" +#include "avx.h" +#include "avx_median.h" +#include "GrayBitmap.h" +#include "EntropyInfo.h" +#include "TaskInfo.h" +#include "PixelTransform.h" +#include "avx_entropy.h" +#include "BackgroundCalibration.h" TEST_CASE("AVX Stacking, no transform, no calib", "[AVX][Stacking][simple]") diff --git a/DeepSkyStackerTest/BitMapFillerTest.cpp b/DeepSkyStackerTest/BitMapFillerTest.cpp index aaf6d2c6..d96edd01 100644 --- a/DeepSkyStackerTest/BitMapFillerTest.cpp +++ b/DeepSkyStackerTest/BitMapFillerTest.cpp @@ -1,9 +1,9 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/avx_bitmap_filler.h" -#include "../DeepSkyStacker/ColorBitmap.h" -#include "../DeepSkyStacker/MedianFilterEngine.h" -#include "../DeepSkyStacker/DSSProgress.h" +#include "avx_bitmap_filler.h" +#include "ColorBitmap.h" +#include "MedianFilterEngine.h" +#include "DSSProgress.h" template void be2le(std::uint16_t(&out)[SZ], const std::uint16_t* pIn) diff --git a/DeepSkyStackerTest/DeepSkyStackerTest.vcxproj b/DeepSkyStackerTest/DeepSkyStackerTest.vcxproj index 13182434..dd6ba546 100644 --- a/DeepSkyStackerTest/DeepSkyStackerTest.vcxproj +++ b/DeepSkyStackerTest/DeepSkyStackerTest.vcxproj @@ -80,7 +80,7 @@ true stdcpp20 Use - ../zclass;../;$(Boost_1_80_0) + ../zclass;./../DeepSkyStackerKernel;$(Boost_1_80_0) /wd4828 /wd4702 %(AdditionalOptions) Level3 false @@ -103,7 +103,7 @@ true stdcpp20 Use - ../zclass;../;$(Boost_1_80_0) + ../zclass;./../DeepSkyStackerKernel;$(Boost_1_80_0) true /wd4828 /wd4702 %(AdditionalOptions) Level3 @@ -121,27 +121,6 @@ - - - - - - - - - - - - - - - - - - - - - @@ -159,24 +138,16 @@ - - - - - - - - - - - - - + + + {cb7b75f1-08f4-4c8d-a7ef-2aa33e9a67f1} + + diff --git a/DeepSkyStackerTest/DeepSkyStackerTest.vcxproj.filters b/DeepSkyStackerTest/DeepSkyStackerTest.vcxproj.filters index 3e2f8e17..3eb49bc2 100644 --- a/DeepSkyStackerTest/DeepSkyStackerTest.vcxproj.filters +++ b/DeepSkyStackerTest/DeepSkyStackerTest.vcxproj.filters @@ -21,51 +21,27 @@ Source Files - - Source Files - Source Files - - Source Files - Source Files - - Source Files - - - Source Files - Source Files - - Source Files - Source Files - - Source Files - Source Files - - Source Files - Source Files Source Files - - Source Files - Source Files @@ -75,45 +51,6 @@ Source Files - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - @@ -125,45 +62,6 @@ Source Files - - Header Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - Source Files diff --git a/DeepSkyStackerTest/DssRectTest.cpp b/DeepSkyStackerTest/DssRectTest.cpp index 3c2cb94c..35e5fcdf 100644 --- a/DeepSkyStackerTest/DssRectTest.cpp +++ b/DeepSkyStackerTest/DssRectTest.cpp @@ -1,7 +1,7 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/DSSTools.h" -#include "../DeepSkyStacker/dssrect.h" +#include "DSSTools.h" +#include "dssrect.h" // Old CPointExt class diff --git a/DeepSkyStackerTest/OpenMpTest.cpp b/DeepSkyStackerTest/OpenMpTest.cpp index 211f5dc0..72901b83 100644 --- a/DeepSkyStackerTest/OpenMpTest.cpp +++ b/DeepSkyStackerTest/OpenMpTest.cpp @@ -1,9 +1,9 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/avx_bitmap_filler.h" -#include "../DeepSkyStacker/GrayBitmap.h" -#include "../DeepSkyStacker/BitMapFiller.h" -#include "../DeepSkyStacker/MedianFilterEngine.h" +#include "avx_bitmap_filler.h" +#include "GrayBitmap.h" +#include "BitMapFiller.h" +#include "MedianFilterEngine.h" TEST_CASE("OpenMP parallelization", "[OpenMP]") { diff --git a/DeepSkyStackerTest/PixelIteratorTest.cpp b/DeepSkyStackerTest/PixelIteratorTest.cpp index b09728a8..89967a65 100644 --- a/DeepSkyStackerTest/PixelIteratorTest.cpp +++ b/DeepSkyStackerTest/PixelIteratorTest.cpp @@ -1,7 +1,7 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/BitmapIterator.h" -#include "../DeepSkyStacker/ColorBitmap.h" +#include "BitmapIterator.h" +#include "ColorBitmap.h" TEST_CASE("Gray Pixel Iterator", "[GrayPixelIterator]") diff --git a/DeepSkyStackerTest/SkyBackGroupTest.cpp b/DeepSkyStackerTest/SkyBackGroupTest.cpp index 1a0ac768..a8241060 100644 --- a/DeepSkyStackerTest/SkyBackGroupTest.cpp +++ b/DeepSkyStackerTest/SkyBackGroupTest.cpp @@ -1,6 +1,6 @@ #include "stdafx.h" #include "catch.h" -#include "../DeepSkyStacker/SkyBackground.h" +#include "SkyBackground.h" TEST_CASE("SkyBackground", "[SkyBackground]") { diff --git a/Tools/ListViewCtrlEx.cpp b/Tools/ListViewCtrlEx.cpp index dc2c362f..6025e94a 100644 --- a/Tools/ListViewCtrlEx.cpp +++ b/Tools/ListViewCtrlEx.cpp @@ -13,7 +13,8 @@ Description: Implementation of class "CListBase" ******************************************************************************/ #include "stdafx.h" -#include "commonresource.h" +//#include "commonresource.h" +#include "resource.h" #include "ListViewCtrlEx.h" /*** Definition of class "CListCtrlEx" ***************************************/