upate
4
main.bbl
@ -117,7 +117,7 @@ Z.~Qin, P.~Zhang, and X.~Li, ``Ultra fast deep lane detection with hybrid
|
|||||||
anchor driven ordinal classification,'' \emph{IEEE transactions on pattern
|
anchor driven ordinal classification,'' \emph{IEEE transactions on pattern
|
||||||
analysis and machine intelligence}, vol.~46, no.~5, pp. 2555--2568, 2022.
|
analysis and machine intelligence}, vol.~46, no.~5, pp. 2555--2568, 2022.
|
||||||
|
|
||||||
\bibitem{condlanenet}
|
\bibitem{CondLaneNet}
|
||||||
L.~Liu, X.~Chen, S.~Zhu, and P.~Tan, ``Condlanenet: a top-to-down lane
|
L.~Liu, X.~Chen, S.~Zhu, and P.~Tan, ``Condlanenet: a top-to-down lane
|
||||||
detection framework based on conditional convolution,'' in \emph{Proceedings
|
detection framework based on conditional convolution,'' in \emph{Proceedings
|
||||||
of the IEEE/CVF international conference on computer vision}, 2021, pp.
|
of the IEEE/CVF international conference on computer vision}, 2021, pp.
|
||||||
@ -173,7 +173,7 @@ P.~Sun, Y.~Jiang, E.~Xie, W.~Shao, Z.~Yuan, C.~Wang, and P.~Luo, ``What makes
|
|||||||
Machine Learning}.\hskip 1em plus 0.5em minus 0.4em\relax PMLR, 2021, pp.
|
Machine Learning}.\hskip 1em plus 0.5em minus 0.4em\relax PMLR, 2021, pp.
|
||||||
9934--9944.
|
9934--9944.
|
||||||
|
|
||||||
\bibitem{learnnms}
|
\bibitem{learnNMS}
|
||||||
J.~Hosang, R.~Benenson, and B.~Schiele, ``Learning non-maximum suppression,''
|
J.~Hosang, R.~Benenson, and B.~Schiele, ``Learning non-maximum suppression,''
|
||||||
in \emph{Proceedings of the IEEE conference on computer vision and pattern
|
in \emph{Proceedings of the IEEE conference on computer vision and pattern
|
||||||
recognition}, 2017, pp. 4507--4515.
|
recognition}, 2017, pp. 4507--4515.
|
||||||
|
278
main.tex
@ -53,7 +53,7 @@
|
|||||||
\maketitle
|
\maketitle
|
||||||
|
|
||||||
\begin{abstract}
|
\begin{abstract}
|
||||||
Lane detection is a critical and challenging task in autonomous driving, particularly in real-world scenarios where traffic lanes are often slender, lengthy, and partially obscured by other vehicles, complicating detection efforts. Existing anchor-based methods typically rely on prior Lane anchors to extract features and refine lane location and shape. Though achieving high performance, manually setting prior anchors is cumbersome, and ensuring adequate coverage across diverse datasets often requires a large number of dense anchors. Additionally, non-maximum suppression is used to suppress redundant predictions, which complicates real-world deployment and may fail in dense scenarios. In this study, we introduce Polar R-CNN, a NMS-free anchor-based method for lane detection. By incorporating both local and global polar coordinate systems, Polar R-CNN enables flexible anchor proposals and significantly reduces the number of anchors required without compromising performance. Additionally, we introduce a heuristic GNN-based NMS-free head that supports an end-to-end paradigm, making the model more deployment-friendly and enhancing performance in dense scenarios. Our method achieves competitive results on five popular lane detection benchmarks—Tusimple, CULane, LLAMAS, CurveLanes, and DL-Rail—while maintaining a lightweight design and straightforward structure. Our source code are available at \href{https://github.com/ShqWW/PolarRCNN}{\textit{https://github.com/ShqWW/Polar R-CNN}}.
|
Lane detection is a critical and challenging task in autonomous driving, particularly in real-world scenarios where traffic lanes are often slender, lengthy, and partially obscured by other vehicles, complicating detection efforts. Existing anchor-based methods typically rely on prior Lane anchors to extract features and refine lane location and shape. Though achieving high performance, manually setting prior anchors is cumbersome, and ensuring adequate coverage across diverse datasets often requires a large number of dense anchors. Additionally, non-maximum suppression is used to suppress redundant predictions, which complicates real-world deployment and may fail in dense scenarios. In this study, we introduce Polar R-CNN, a NMS-free anchor-based method for lane detection. By incorporating both local and global polar coordinate systems, Polar R-CNN enables flexible anchor proposals and significantly reduces the number of anchors required without compromising performance. Additionally, we introduce a heuristic GNN-based NMS-free head that supports an end-to-end paradigm, making the model more deployment-friendly and enhancing performance in dense scenarios. Our method achieves competitive results on five popular lane detection benchmarks—Tusimple, CULane, LLAMAS, CurveLanes, and DL-Rail—while maintaining a lightweight design and straightforward structure. Our source code are available at \href{https://github.com/ShqWW/PolarRCNN}{\textit{https://github.com/ShqWW/PolarRCNN}}.
|
||||||
\end{abstract}
|
\end{abstract}
|
||||||
\begin{IEEEkeywords}
|
\begin{IEEEkeywords}
|
||||||
Lane detection, NMS-free, Graph neural network, Polar coordinate system.
|
Lane detection, NMS-free, Graph neural network, Polar coordinate system.
|
||||||
@ -73,20 +73,20 @@ In recent years, fueled by advancements in deep learning and the availability of
|
|||||||
\def\imgheight{0.5625\linewidth}
|
\def\imgheight{0.5625\linewidth}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/anchor_demo/anchor_fix_init.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/anchor_demo/anchor_fix_init.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/anchor_demo/anchor_fix_learned.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/anchor_demo/anchor_fix_learned.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/anchor_demo/anchor_proposal.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/anchor_demo/anchor_proposal.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/anchor_demo/gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/anchor_demo/gt.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption{Anchor settings of different methods. (a) The initial anchor settings of CLRNet. (b) The learned anchor settings of CLRNet trained on CULane. (c) The proposed anchors of our method. (d) The ground truth.}
|
\caption{Anchor settings of different methods. (a) The initial anchor settings of CLRNet. (b) The learned anchor settings of CLRNet trained on CULane. (c) The proposed anchors of our method. (d) The ground truth.}
|
||||||
@ -101,20 +101,20 @@ In recent years, fueled by advancements in deep learning and the availability of
|
|||||||
\def\imgheight{0.5625\linewidth}
|
\def\imgheight{0.5625\linewidth}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/nms_demo/less_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/nms_demo/less_gt.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/nms_demo/less_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/nms_demo/less_pred.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/nms_demo/redun_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/nms_demo/redun_gt.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/nms_demo/redun_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/nms_demo/redun_pred.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
|
|
||||||
@ -160,12 +160,12 @@ The lane detection aims to detect lane instances in an image. In this section, w
|
|||||||
|
|
||||||
\begin{figure*}[ht]
|
\begin{figure*}[ht]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth]{thsis_figure/ovarall_architecture.png} % 替换为你的图片文件名
|
\includegraphics[width=\linewidth]{thesis_figure/ovarall_architecture.png} % 替换为你的图片文件名
|
||||||
\caption{The overall pipeline of Polar R-CNN. The architecture is simple and lightweight. The backbone (e.g. ResNet18) and FPN aims to extract feature of the image. And the Local polar head aims to proposed sparse line anchors. After pooling features sample along the line anchors, the global polar head give the final predictions. Triplet head is set in the Global polar Head, including an one-to-one classification head (O2O cls head), an one-to-many classification head (o2m cls head) and an one-to-many regression head (O2M reg Head). The one-to-one cls head aim to replace the NMS post-processing and select only one positive prediction sample for each ground truth from the redundant predictions from the o2m head.}
|
\caption{The overall pipeline of Polar R-CNN. The architecture is simple and lightweight. The backbone (e.g. ResNet18) and FPN aims to extract feature of the image. And the Local polar head aims to proposed sparse line anchors. After pooling features sample along the line anchors, the global polar head give the final predictions. Triplet head is set in the Global polar Head, including an one-to-one classification head (O2O cls head), an one-to-many classification head (o2m cls head) and an one-to-many regression head (O2M reg Head). The one-to-one cls head aim to replace the NMS post-processing and select only one positive prediction sample for each ground truth from the redundant predictions from the o2m head.}
|
||||||
\label{overall_architecture}
|
\label{overall_architecture}
|
||||||
\end{figure*}
|
\end{figure*}
|
||||||
|
|
||||||
\textbf{NMS-Free Object Detections}. Non-maximum muppression (NMS) is an important post-processing step in most general object detection methods. Detr \cite{detr} employs one-to-one label assignment to avoid redundant predictions without using NMS. Other NMS-free methods \cite{learnNMS} have also been proposed, addressing this issue from two aspects: model architecture and label assignment. Studies \cite{date} \cite{yolov10} suggest that one-to-one assignments are crucial for NMS-free predictions, but maintaining one-to-many assignments is still necessary to ensure effective feature learning of the model. Other works \cite{o3d} \cite{relationnet} consider the model’s expressive capacity to provide non-redundant predictions. However, few studies have analyzed the NMS-free paradigm for anchor-based lane detection methods as thoroughly as in general object detection. Most anchor-based lane detection methods still rely on NMS post-processing. In our work, besides label assignment, we extend the analysis to the detection head’s structure, focusing on achieving non-redundant (NMS-free) lane predictions.
|
\textbf{NMS-Free Object Detections}. Non-maximum suppression (NMS) is an important post-processing step in most general object detection methods. Detr \cite{detr} employs one-to-one label assignment to avoid redundant predictions without using NMS. Other NMS-free methods \cite{learnNMS} have also been proposed, addressing this issue from two aspects: model architecture and label assignment. Studies \cite{date} \cite{yolov10} suggest that one-to-one assignments are crucial for NMS-free predictions, but maintaining one-to-many assignments is still necessary to ensure effective feature learning of the model. Other works \cite{o3d} \cite{relationnet} consider the model’s expressive capacity to provide non-redundant predictions. However, few studies have analyzed the NMS-free paradigm for anchor-based lane detection methods as thoroughly as in general object detection. Most anchor-based lane detection methods still rely on NMS post-processing. In our work, besides label assignment, we extend the analysis to the detection head’s structure, focusing on achieving non-redundant (NMS-free) lane predictions.
|
||||||
|
|
||||||
In this work, we aim to address to two issues in anchor-based lane detection mentioned above, the sparse lane anchor setting and NMS-free predictions.
|
In this work, we aim to address to two issues in anchor-based lane detection mentioned above, the sparse lane anchor setting and NMS-free predictions.
|
||||||
|
|
||||||
@ -194,6 +194,7 @@ The overall architecture of Polar R-CNN is illustrated in Fig. \ref{overall_arch
|
|||||||
$N^{nbr}_{i}$& set& The adjacent node set of the $i_{th}$ of anchor node\\
|
$N^{nbr}_{i}$& set& The adjacent node set of the $i_{th}$ of anchor node\\
|
||||||
$C_{o2m}$ & scalar& The positive threshold of one-to-many confidence\\
|
$C_{o2m}$ & scalar& The positive threshold of one-to-many confidence\\
|
||||||
$C_{o2o}$ & scalar& The positive threshold of one-to-one confidence\\
|
$C_{o2o}$ & scalar& The positive threshold of one-to-one confidence\\
|
||||||
|
$d_{dim}$ & scalar& Dimension of the distance tensor.\\
|
||||||
% \midrule
|
% \midrule
|
||||||
% & & \\
|
% & & \\
|
||||||
% & & \\
|
% & & \\
|
||||||
@ -220,11 +221,11 @@ Lanes are characterized by their thin and elongated curved shapes. A suitable la
|
|||||||
\def\imgheight{0.4\linewidth}
|
\def\imgheight{0.4\linewidth}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth]{thsis_figure/coord/ray.png}
|
\includegraphics[width=\imgwidth]{thesis_figure/coord/ray.png}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth]{thsis_figure/coord/polar.png}
|
\includegraphics[width=\imgwidth]{thesis_figure/coord/polar.png}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption{Different descriptions for anchor parameters. (a) Ray: start point and orientation. (b) Polar: radius and angle.}
|
\caption{Different descriptions for anchor parameters. (a) Ray: start point and orientation. (b) Polar: radius and angle.}
|
||||||
@ -236,12 +237,11 @@ We define two types of polar coordinate systems: the global coordinate system an
|
|||||||
\subsection{Local Polar Head}
|
\subsection{Local Polar Head}
|
||||||
|
|
||||||
\textbf{Anchor formulation in Local polar head}. Inspired by the region proposal network in Faster R-CNN \cite{fasterrcnn}, the local polar head (LPH) aims to propose flexible, high-quality anchors aorund the lane ground truths within an image. As Fig. \ref{lph} and Fig. \ref{overall_architecture} demonstrate, the highest level $P_{3} \in \mathbb{R}^{C_{f} \times H_{f} \times W_{f}}$ of FPN feature maps is selected as the input for the Local Polar Head (LPH). Following a downsampling operation, the feature map is then fed into two branches: the regression branch $\phi _{reg}^{lph}\left(\cdot \right)$ and the classification branch $\phi _{cls}^{lph}\left(\cdot \right)$.
|
\textbf{Anchor formulation in Local polar head}. Inspired by the region proposal network in Faster R-CNN \cite{fasterrcnn}, the local polar head (LPH) aims to propose flexible, high-quality anchors aorund the lane ground truths within an image. As Fig. \ref{lph} and Fig. \ref{overall_architecture} demonstrate, the highest level $P_{3} \in \mathbb{R}^{C_{f} \times H_{f} \times W_{f}}$ of FPN feature maps is selected as the input for the Local Polar Head (LPH). Following a downsampling operation, the feature map is then fed into two branches: the regression branch $\phi _{reg}^{lph}\left(\cdot \right)$ and the classification branch $\phi _{cls}^{lph}\left(\cdot \right)$.
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
&F_d\gets DS\left( P_{3} \right), \,F_d\in \mathbb{R} ^{C_f\times H^{l}\times W^{l}}\\
|
&F_d\gets DS\left( P_{3} \right), \,F_d\in \mathbb{R} ^{C_f\times H^{l}\times W^{l}},\\
|
||||||
&F_{reg\,\,}\gets \phi _{reg}^{lph}\left( F_d \right), \,F_{reg\,\,}\in \mathbb{R} ^{2\times H^{l}\times W^{l}}\\
|
&F_{reg\,\,}\gets \phi _{reg}^{lph}\left( F_d \right), \,F_{reg\,\,}\in \mathbb{R} ^{2\times H^{l}\times W^{l}},\\
|
||||||
&F_{cls}\gets \phi _{cls}^{lph}\left( F_d \right), \,F_{cls}\in \mathbb{R} ^{H^{l}\times W^{l}}
|
&F_{cls}\gets \phi _{cls}^{lph}\left( F_d \right), \,F_{cls}\in \mathbb{R} ^{H^{l}\times W^{l}}.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\label{lph equ}
|
\label{lph equ}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
@ -250,7 +250,7 @@ We define two types of polar coordinate systems: the global coordinate system an
|
|||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=0.45\textwidth]{thsis_figure/local_polar_head.png} % 替换为你的图片文件名
|
\includegraphics[width=0.45\textwidth]{thesis_figure/local_polar_head.png} % 替换为你的图片文件名
|
||||||
\caption{The main architecture of our model.}
|
\caption{The main architecture of our model.}
|
||||||
\label{lph}
|
\label{lph}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
@ -258,11 +258,10 @@ We define two types of polar coordinate systems: the global coordinate system an
|
|||||||
\textbf{Loss Function.} During the training phase, as illustrated in Fig. \ref{lphlabel}, the ground truth labels for the Local Polar Head (LPH) are constructed as follows. The radius ground truth is defined as the shortest distance from a grid point (local origin point) to the ground truth lane curve. The angle ground truth is defined as the orientation of the vector from the grid point to the nearest point on the curve. A grid point is designated as a positive sample if its radius label is less than a threshold $\tau_{L}$ ; otherwise, it is considered a negative sample.
|
\textbf{Loss Function.} During the training phase, as illustrated in Fig. \ref{lphlabel}, the ground truth labels for the Local Polar Head (LPH) are constructed as follows. The radius ground truth is defined as the shortest distance from a grid point (local origin point) to the ground truth lane curve. The angle ground truth is defined as the orientation of the vector from the grid point to the nearest point on the curve. A grid point is designated as a positive sample if its radius label is less than a threshold $\tau_{L}$ ; otherwise, it is considered a negative sample.
|
||||||
|
|
||||||
Once the regression and classification labels are established, the LPH can be trained using the smooth-L1 loss $d\left(\cdot \right)$ for regression and the binary cross-entropy loss $BCE\left( \cdot , \cdot \right)$ for classification. The LPH loss function is defined as follows:
|
Once the regression and classification labels are established, the LPH can be trained using the smooth-L1 loss $d\left(\cdot \right)$ for regression and the binary cross-entropy loss $BCE\left( \cdot , \cdot \right)$ for classification. The LPH loss function is defined as follows:
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\mathcal{L} _{lph}^{cls}&=BCE\left( F_{cls},F_{gt} \right) \\
|
\mathcal{L} _{lph}^{cls}&=BCE\left( F_{cls},F_{gt} \right), \\
|
||||||
\mathcal{L} _{lph}^{r\mathrm{e}g}&=\frac{1}{N_{lph}^{pos}}\sum_{j\in \left\{j|\hat{r}_i<\tau_{L} \right\}}{\left( d\left( \theta _j-\hat{\theta}_j \right) +d\left( r_j^L-\hat{r}_j^L \right) \right)}\\
|
\mathcal{L} _{lph}^{r\mathrm{e}g}&=\frac{1}{N_{lph}^{pos}}\sum_{j\in \left\{j|\hat{r}_i<\tau_{L} \right\}}{\left( d\left( \theta _j-\hat{\theta}_j \right) +d\left( r_j^L-\hat{r}_j^L \right) \right)}.\\
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\label{loss_lph}
|
\label{loss_lph}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
@ -271,7 +270,7 @@ Once the regression and classification labels are established, the LPH can be tr
|
|||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth]{thsis_figure/coord/localpolar.png}
|
\includegraphics[width=\linewidth]{thesis_figure/coord/localpolar.png}
|
||||||
\caption{Label construction for local polar proposal module.}
|
\caption{Label construction for local polar proposal module.}
|
||||||
\label{lphlabel}
|
\label{lphlabel}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
@ -282,37 +281,35 @@ Global polar head (GPH) is a crucial component in the second stage of Polar R-CN
|
|||||||
\textbf{RoI Pooling Module.} RoI pooling module is designed to transform features sampled from lane anchors into a standard feature tensor. Once the local polar parameters of a lane anchor are given, they can be converted to global polar coordinates using the following equation:
|
\textbf{RoI Pooling Module.} RoI pooling module is designed to transform features sampled from lane anchors into a standard feature tensor. Once the local polar parameters of a lane anchor are given, they can be converted to global polar coordinates using the following equation:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
r^{g}_{j}=r^{l}_{j}+\left( \textbf{c}^{l}_{j}-\textbf{c}^{g}_{j} \right) ^{T}\left[\cos\theta_{j}, \sin\theta_{j} \right]
|
r^{g}_{j}=r^{l}_{j}+\left( \textbf{c}^{l}_{j}-\textbf{c}^{g}_{j} \right) ^{T}\left[\cos\theta_{j}; \sin\theta_{j} \right].
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
where $\textbf{c}^{l}_{j} \in \mathbb{R}^{2}$ and $\textbf{c}^{g} \in \mathbb{R}^{2}$ represent the Cartesian coordinates of local and global origins correspondingly.
|
where $\textbf{c}^{l}_{j} \in \mathbb{R}^{2}$ and $\textbf{c}^{g} \in \mathbb{R}^{2}$ represent the Cartesian coordinates of local and global origins correspondingly.
|
||||||
|
|
||||||
Next, feature points are sampled on the lane anchor. The y-coordinates of these points are uniformly sampled vertically from the image, as previously mentioned. The $x_{i}$ coordinates are computed using the global polar axis with the following equation:
|
Next, feature points are sampled on the lane anchor. The y-coordinates of these points are uniformly sampled vertically from the image, as previously mentioned. The $x_{i}$ coordinates are computed using the global polar axis with the following equation:
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
x_{i\,\,}=-y_i\tan \theta +\frac{r^{g}}{\cos \theta}
|
x_{i\,\,}=-y_i\tan \theta +\frac{r^{g}}{\cos \theta}.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth]{thsis_figure/detection_head.png} % 替换为你的图片文件名
|
\includegraphics[width=\linewidth]{thesis_figure/detection_head.png} % 替换为你的图片文件名
|
||||||
\caption{The main architecture of global polar head}
|
\caption{The main architecture of global polar head}
|
||||||
\label{gph}
|
\label{gph}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
Suppose the $P_{0}$, $P_{1}$ and $P_{2}$ denote the last three levels from FPN and $\boldsymbol{F}_{L}^{s}\in \mathbb{R} ^{N_p\times d_f}$ represent the $L_{th}$ sample point feature from $P_{L}$. The grid featuers from the three levels are extracted and fused together without cross layer cascade refinenment unlike CLRNet. To reduce the number of parameters, we employ a weight sum strategy to combine features from different layers, similar to \cite{detr}, but in a more compact form:
|
Suppose the $P_{0}$, $P_{1}$ and $P_{2}$ denote the last three levels from FPN and $\boldsymbol{F}_{L}^{s}\in \mathbb{R} ^{N_p\times d_f}$ represent the $L_{th}$ sample point feature from $P_{L}$. The grid featuers from the three levels are extracted and fused together without cross layer cascade refinenment unlike CLRNet. To reduce the number of parameters, we employ a weight sum strategy to combine features from different layers, similar to \cite{detr}, but in a more compact form:
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\boldsymbol{F}^s=\sum_{L=0}^2{\boldsymbol{F}_{L}^{s}\times \frac{e^{\boldsymbol{w}_{L}^{s}}}{\sum_{L=0}^2{e^{\boldsymbol{w}_{L}^{s}}}}}
|
\boldsymbol{F}^s=\sum_{L=0}^2{\boldsymbol{F}_{L}^{s}\times \frac{e^{\boldsymbol{w}_{L}^{s}}}{\sum_{L=0}^2{e^{\boldsymbol{w}_{L}^{s}}}}},
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
where $\boldsymbol{w}_{L}^{s}\in \mathbb{R} ^{N_p}$ represents the learnable aggregate weight, serving as a learned model weight. Instead of concatenating the three sampling features into $\boldsymbol{F}^s\in \mathbb{R} ^{N_p\times d_f\times 3}$ directly, the adaptive summation significantly reduces the feature dimensions to $\boldsymbol{F}^s\in \mathbb{R} ^{N_p\times d_f}$, which is one-third of the original dimension. The weighted sum tensors are then fed into fully connected layers to obtain the pooled RoI features of an anchor:
|
where $\boldsymbol{w}_{L}^{s}\in \mathbb{R} ^{N_p}$ represents the learnable aggregate weight, serving as a learned model weight. Instead of concatenating the three sampling features into $\boldsymbol{F}^s\in \mathbb{R} ^{N_p\times d_f\times 3}$ directly, the adaptive summation significantly reduces the feature dimensions to $\boldsymbol{F}^s\in \mathbb{R} ^{N_p\times d_f}$, which is one-third of the original dimension. The weighted sum tensors are then fed into fully connected layers to obtain the pooled RoI features of an anchor:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\boldsymbol{F}^{roi}\gets FC^{pooling}\left( \boldsymbol{F}^s \right), \boldsymbol{F}^{roi}\in \mathbb{R} ^{d_r}
|
\boldsymbol{F}^{roi}\gets FC^{pooling}\left( \boldsymbol{F}^s \right), \boldsymbol{F}^{roi}\in \mathbb{R} ^{d_r},
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
@ -338,6 +335,7 @@ where $\boldsymbol{w}_{L}^{s}\in \mathbb{R} ^{N_p}$ represents the learnable agg
|
|||||||
\end{aligned}
|
\end{aligned}
|
||||||
\label{al_1-1}
|
\label{al_1-1}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
where the $\land$ logical ``AND'' operation between two Boolean values.
|
||||||
\STATE Calculate the geometric prior adjacent matrix $\boldsymbol{M} \in \mathbb{R} ^{N_{pos} \times N_{pos}} $, where the element $M_{ij}$ in $\boldsymbol{M}$ is caculate as follows:
|
\STATE Calculate the geometric prior adjacent matrix $\boldsymbol{M} \in \mathbb{R} ^{N_{pos} \times N_{pos}} $, where the element $M_{ij}$ in $\boldsymbol{M}$ is caculate as follows:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
@ -352,7 +350,7 @@ where $\boldsymbol{w}_{L}^{s}\in \mathbb{R} ^{N_p}$ represents the learnable agg
|
|||||||
\STATE Calculate the distance matrix $\boldsymbol{D} \in \mathbb{R} ^{N_{pos} \times N_{pos}}$, where the element $D_{ij}$ in $\boldsymbol{D}$ is defined as follows:
|
\STATE Calculate the distance matrix $\boldsymbol{D} \in \mathbb{R} ^{N_{pos} \times N_{pos}}$, where the element $D_{ij}$ in $\boldsymbol{D}$ is defined as follows:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
D_{ij} = 1-d\left( \boldsymbol{x}_{i}^{b} + \varDelta \boldsymbol{x}_{i}^{roi}, \boldsymbol{x}_{j}^{b} + \varDelta \boldsymbol{x}_{j}^{roi}, \boldsymbol{e}_{i}, \boldsymbol{e}_{j}\right)
|
D_{ij} = 1-d\left( \boldsymbol{x}_{i}^{b} + \varDelta \boldsymbol{x}_{i}^{roi}, \boldsymbol{x}_{j}^{b} + \varDelta \boldsymbol{x}_{j}^{roi}, \boldsymbol{e}_{i}, \boldsymbol{e}_{j}\right),
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\label{al_1-3}
|
\label{al_1-3}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
@ -367,43 +365,42 @@ where $\boldsymbol{w}_{L}^{s}\in \mathbb{R} ^{N_p}$ represents the learnable agg
|
|||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
|
|
||||||
\RETURN The final confidence $\tilde{s}_i$; %算法的返回值
|
\RETURN The final confidence $\tilde{s}_i$. % the return result of the algorithm
|
||||||
\end{algorithmic}
|
\end{algorithmic}
|
||||||
\label{Graph Fast NMS}
|
\label{Graph Fast NMS}
|
||||||
\end{algorithm}
|
\end{algorithm}
|
||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth]{thsis_figure/gnn.png} % 替换为你的图片文件名
|
\includegraphics[width=\linewidth]{thesis_figure/gnn.png} % 替换为你的图片文件名
|
||||||
\caption{The main architecture of gnn.}
|
\caption{The main architecture of gnn.}
|
||||||
\label{gnn}
|
\label{gnn}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
|
|
||||||
\textbf{NMS vs NMS-free.} Let $\boldsymbol{F}^{roi}_{i}$ denotes the ROI features extracted from $i_{th}$ anchors and the three subheads using $\boldsymbol{F}^{roi}_{i}$ as input. For now, let us focus on the O2M classification (O2M cls) head and the O2M regression (O2M reg) head, which follow the old paradigm used in previous work and can serve as a baseline for the new one-to-one paradigm. To maintain simplicity and rigor, both the O2M cls head and the O2M reg head consist of two layers with activation functions, featuring a plain structure without any complex mechanisms such as attention or deformable convolution. as previously mentioned, merely replacing the one-to-many label assignment with one-to-one label assignment is insufficient for eliminating NMS post-processing. This is because anchors often exhibit significant overlap or are positioned very close to each other, as shown in Fig. \ref{anchor setting} (b)(c). Let the $\boldsymbol{F}^{roi}_{i}$ and $\boldsymbol{F}^{roi}_{j}$ represent the features from two overlapping (or very close) anchors, implying that $\boldsymbol{F}^{roi}_{i}$ and $\boldsymbol{F}^{roi}_{j}$ will be almost identical. Let $f_{plain}^{cls}$ denotes the neural structure used in O2M cls head and suppose it's trained with one-to-one label assignment. If $\boldsymbol{F}^{roi}_{i}$ is a positive sample and the $\boldsymbol{F}^{roi}_{j}$ is a negative sample, the ideal output should be as follows:
|
\textbf{NMS vs NMS-free.} Let $\boldsymbol{F}^{roi}_{i}$ denotes the ROI features extracted from $i_{th}$ anchors and the three subheads using $\boldsymbol{F}^{roi}_{i}$ as input. For now, let us focus on the O2M classification (O2M cls) head and the O2M regression (O2M reg) head, which follow the old paradigm used in previous work and can serve as a baseline for the new one-to-one paradigm. To maintain simplicity and rigor, both the O2M cls head and the O2M reg head consist of two layers with activation functions, featuring a plain structure without any complex mechanisms such as attention or deformable convolution. as previously mentioned, merely replacing the one-to-many label assignment with one-to-one label assignment is insufficient for eliminating NMS post-processing. This is because anchors often exhibit significant overlap or are positioned very close to each other, as shown in Fig. \ref{anchor setting} (b)&(c). Let the $\boldsymbol{F}^{roi}_{i}$ and $\boldsymbol{F}^{roi}_{j}$ represent the features from two overlapping (or very close) anchors, implying that $\boldsymbol{F}^{roi}_{i}$ and $\boldsymbol{F}^{roi}_{j}$ will be almost identical. Let $f_{plain}^{cls}$ denotes the neural structure used in O2M cls head and suppose it's trained with one-to-one label assignment. If $\boldsymbol{F}^{roi}_{i}$ is a positive sample and the $\boldsymbol{F}^{roi}_{j}$ is a negative sample, the ideal output should be as follows:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
&\boldsymbol{F}_{i}^{roi}\approx \boldsymbol{F}_{j}^{roi}
|
&\boldsymbol{F}_{i}^{roi}\approx \boldsymbol{F}_{j}^{roi},
|
||||||
\\
|
\\
|
||||||
&f_{cls}^{plain}\left( \boldsymbol{F}_{i}^{roi} \right) \rightarrow 1
|
&f_{cls}^{plain}\left( \boldsymbol{F}_{i}^{roi} \right) \rightarrow 1,
|
||||||
\\
|
\\
|
||||||
&f_{cls}^{plain}\left( \boldsymbol{F}_{i}^{roi} \right) \rightarrow 0
|
&f_{cls}^{plain}\left( \boldsymbol{F}_{i}^{roi} \right) \rightarrow 0.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\label{sharp fun}
|
\label{sharp fun}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
|
The Eq. (\ref{sharp fun}) suggests that the property of $f_{cls}^{plain}$ need to be ``sharp'' enough to differentiate between two similar features. That is to say, the output of $f_{cls}^{plain}$ changes rapidly over short periods or distances, it implies that $f_{cls}^{plain}$ need to captures information with higher frequency. This issue is also discussed in \cite{o3d}. Capturing the high frequency with a plain structure is difficult because a naive MLP tends to capture information with lower frequency \cite{xu2022overview}. In the most extreme case, where $\boldsymbol{F}_{i}^{roi} = \boldsymbol{F}_{j}^{roi}$, it becomes impossible to distinguish the two anchors to positive and negative samples completely; in practice, both confidences converge to around 0.5. This problem arises from the limitations of the input format and the structure of the naive MLP, which restrict its expressive capability for information with higher frequency. Therefore, it is crucial to establish relationships between anchors and design a new model structure to effectively represent ``sharp'' information.
|
||||||
|
|
||||||
The equation \ref{sharp fun} suggests that the property of $f_{cls}^{plain}$ need to be "sharp" enough to differentiate between two similar features. That is to say, the output of $f_{cls}^{plain}$ changes repidly over short proids or distances, it implies that $f_{cls}^{plain}$ need to captures information with higher frequency. This issue is also discussed in \cite{o3d}. Capturing the high frequency with a plain structure is difficult because a naive MLP tends to capture information with lower frequency \cite{xu2022overview}. In the most extreme case, where $\boldsymbol{F}_{i}^{roi} = \boldsymbol{F}_{j}^{roi}$, it becomes impossible to distinguish the two anchors to positive and negative samples completely; in practice, both confidences converge to around 0.5. This problem arises from the limitations of the input format and the structure of the naive MLP, which restrict its expressive capability for information with higher frequency. Therefore, it is crucial to establish relationships between anchors and design a new model structure to effectively represent “sharp” information.
|
It is easy to see that the ``ideal'' one-to-one branch is equivalence to O2M cls branch with O2M regression and NMS post-processing. If the NMS could be replaced by some equivalent but learnable functions (e.g. a neural network with specific structure), the O2O head could be trained to handle the one-to-one assignment. However, the NMS involves sequential iteration and confidence sorting, which are challenging to reproduce with a neural network. Although previous works, such as RNN-based approaches \cite{stewart2016end}, utilize an iterative format, they are time-consuming and introduce additional complexity into the model training process due to their iterative nature. To eliminate the iteration process, we proposed a equivalent format of Fast NMS\cite{yolact}.
|
||||||
|
|
||||||
It is easy to see that the "ideal" one-to-one branch is equivalence to O2M cls branch with O2M regression and NMS post-processing. If the NMS could be replaced by some equivalent but learnable functions (e.g. a neural network with specific structure), the O2O head could be trained to handle the one-to-one assignment. However, the NMS involves sequential iteration and confidence sorting, which are challenging to reproduce with a neural network. Although previous works, such as RNN-based approaches \cite{stewart2016end}, utilize an iterative format, they are time-consuming and introduce additional complexity into the model training process due to their iterative nature. To eliminate the iteration process, we proposed a equivalent format of Fast NMS\cite{yolact}.
|
|
||||||
|
|
||||||
The key rule of the NMS post-processing is as follows:
|
The key rule of the NMS post-processing is as follows:
|
||||||
Given a series of positive detections with redundancy, a detection result A is supressed by another detection result B if and only if:
|
Given a series of positive detections with redundancy, a detection result A is suppressed by another detection result B if and only if:
|
||||||
|
|
||||||
(1) The confidence of A is lower than that of B.
|
(1) The confidence of A is lower than that of B.
|
||||||
|
|
||||||
(2) The predefined distance (e.g. IoU distance and L1 distance) between A and B is smaller than a threshold.
|
(2) The predefined distance (e.g. IoU distance and L1 distance) between A and B is smaller than a threshold.
|
||||||
|
|
||||||
(3) B is not supressed by any other detection results.
|
(3) B is not suppressed by any other detection results.
|
||||||
|
|
||||||
For simplicity, Fast NMS only satisfies the condition (1) and (2), which may lead to an increase in false negative predictions but offers faster processing without sequential iteration. Leveraging the “iteration-free” property, we propose a further refinement called “sort-free” Fast NMS. This new approach, named Graph-based Fast NMS, is detailed in Algorithm \ref{Graph Fast NMS}.
|
For simplicity, Fast NMS only satisfies the condition (1) and (2), which may lead to an increase in false negative predictions but offers faster processing without sequential iteration. Leveraging the “iteration-free” property, we propose a further refinement called “sort-free” Fast NMS. This new approach, named Graph-based Fast NMS, is detailed in Algorithm \ref{Graph Fast NMS}.
|
||||||
|
|
||||||
@ -412,81 +409,78 @@ It is straightforward to demonstrate that, when all elements in $\boldsymbol{M}$
|
|||||||
According to the analysis of the shortcomings of traditional NMS post-processing shown in Fig. \ref{NMS setting}, the fundamental issue arises from the definition of the distance between predictions. Traditional NMS relies on geometric properties to define distances between predictions, which often neglects the contextual semantics. For example, in some scenarios, two predicted lanes with a small geometric distance should not be suppressed, such as the case of double lines or fork lines. Although setting a threshold $d_{\tau}$ can mitigate this problem, it is challenging to strike a balance between precision and recall.
|
According to the analysis of the shortcomings of traditional NMS post-processing shown in Fig. \ref{NMS setting}, the fundamental issue arises from the definition of the distance between predictions. Traditional NMS relies on geometric properties to define distances between predictions, which often neglects the contextual semantics. For example, in some scenarios, two predicted lanes with a small geometric distance should not be suppressed, such as the case of double lines or fork lines. Although setting a threshold $d_{\tau}$ can mitigate this problem, it is challenging to strike a balance between precision and recall.
|
||||||
|
|
||||||
To address this, we replace the explicit definition of the distance function with an implicit graph neural network. Additionally, the coordinates of anchors is also replace with the anchor features ${F}_{i}^{roi}$. According to information bottleneck theory \cite{alemi2016deep}, ${F}_{i}^{roi}$ , which contains the location and classification information, is sufficient for modelling the explicit geometric distance by neural network. Besides the geometric information, features ${F}_{i}^{roi}$ containes the implicit contextual information of an anchor, which provides additional clues for establishing implicit contextual distances between two anchors. The implicit contextual distance is calculated as follows:
|
To address this, we replace the explicit definition of the distance function with an implicit graph neural network. Additionally, the coordinates of anchors is also replace with the anchor features ${F}_{i}^{roi}$. According to information bottleneck theory \cite{alemi2016deep}, ${F}_{i}^{roi}$ , which contains the location and classification information, is sufficient for modelling the explicit geometric distance by neural network. Besides the geometric information, features ${F}_{i}^{roi}$ containes the implicit contextual information of an anchor, which provides additional clues for establishing implicit contextual distances between two anchors. The implicit contextual distance is calculated as follows:
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\tilde{\boldsymbol{F}}_{i}^{roi}\gets& \mathrm{Re}LU\left( FC_{o2o}^{roi}\left( \boldsymbol{F}_{i}^{roi} \right) \right)
|
\tilde{\boldsymbol{F}}_{i}^{roi}\gets& \mathrm{Re}LU\left( FC_{o2o}^{roi}\left( \boldsymbol{F}_{i}^{roi} \right) \right),
|
||||||
\\
|
\\
|
||||||
\boldsymbol{F}_{ij}^{edge}\gets& FC_{in}\left( \tilde{\boldsymbol{F}}_{i}^{roi} \right) -FC_{out}\left( \tilde{\boldsymbol{F}}_{i}^{roi} \right)
|
\boldsymbol{F}_{ij}^{edge}\gets& FC_{in}\left( \tilde{\boldsymbol{F}}_{i}^{roi} \right) -FC_{out}\left( \tilde{\boldsymbol{F}}_{i}^{roi} \right)
|
||||||
\\
|
\\
|
||||||
&+FC_{base}\left( \boldsymbol{x}_{i}^{b}-\boldsymbol{x}_{j}^{b} \right)
|
&+FC_{base}\left( \boldsymbol{x}_{i}^{b}-\boldsymbol{x}_{j}^{b} \right),
|
||||||
\\
|
\\
|
||||||
\boldsymbol{D}_{ij}^{edge}\gets& MLP_{edge}\left( \boldsymbol{F}_{ij}^{graph} \right)
|
\boldsymbol{D}_{ij}^{edge}\gets& MLP_{edge}\left( \boldsymbol{F}_{ij}^{graph} \right).
|
||||||
\\
|
\\
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\label{edge_layer}
|
\label{edge_layer}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
Equation \ref{edge_layer} represents the implicit expression of equation \ref{al_1-3}, where the distance $\boldsymbol{D}_{ij}^{edge}$ is no longer a scalar but a semantic tensor with dimension $d_{dis}$. $\boldsymbol{D}_{ij}^{edge}$ containes more complex information compared to traditional geometric distance. The confidence caculation is expressed as follows:
|
Eq. (\ref{edge_layer}) represents the implicit expression of Eq. (\ref{al_1-3}), where the distance $\boldsymbol{D}_{ij}^{edge}$ is no longer a scalar but a semantic tensor with dimension $d_{dis}$. $\boldsymbol{D}_{ij}^{edge}$ containes more complex information compared to traditional geometric distance. The confidence caculation is expressed as follows:
|
||||||
|
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\\
|
\\
|
||||||
&\boldsymbol{D}_{i}^{node}\gets \underset{j\in \left\{ j|T_{ij}=1 \right\}}{\max}\boldsymbol{D}_{ij}^{edge}
|
&\boldsymbol{D}_{i}^{node}\gets \underset{j\in \left\{ j|T_{ij}=1 \right\}}{\max}\boldsymbol{D}_{ij}^{edge},
|
||||||
\\
|
\\
|
||||||
&\boldsymbol{F}_{i}^{node}\gets MLP_{node}\left( \boldsymbol{D}_{i}^{node} \right)
|
&\boldsymbol{F}_{i}^{node}\gets MLP_{node}\left( \boldsymbol{D}_{i}^{node} \right),
|
||||||
\\
|
\\
|
||||||
&\tilde{s}_i\gets \sigma \left( FC_{o2o,out}\left( \boldsymbol{F}_{i}^{node} \right) \right)
|
&\tilde{s}_i\gets \sigma \left( FC_{o2o,out}\left( \boldsymbol{F}_{i}^{node} \right) \right).
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\label{node_layer}
|
\label{node_layer}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
The equation \ref{node_layer} serves as the implicit replacement for equation \ref{al_1-4}. In this approach, we use elementwise max pooling of tensors instead of scalar-based max operations. The pooled tensor is then fed into a neural network with a sigmoid activation function to directly obtain the confidence. By eliminating the need for a predefined distance threshold, all confidence calculation patterns are derived from the training data.
|
The Eq. (\ref{node_layer}) serves as the implicit replacement for Eq. (\ref{al_1-4}). In this approach, we use elementwise max pooling of tensors instead of scalar-based max operations. The pooled tensor is then fed into a neural network with a sigmoid activation function to directly obtain the confidence. By eliminating the need for a predefined distance threshold, all confidence calculation patterns are derived from the training data.
|
||||||
|
|
||||||
It should be noted that the O2O cls head depends on the predictons of O2M cls head as outlined in equation \ref{al_1-1}. From a probablity percpective, the confidence output by O2M cls head, $s_{j}$, represents the probability that the $j_{th}$ detection is a positive sample. The confidence output by O2O cls head, $\tilde{s}_i$, denotes the conditional probablity that $i_{th}$ sample shouldn't be supressed given the condition that the $i_{th}$ sample identified as a positive sample:
|
It should be noted that the O2O cls head depends on the predictons of O2M cls head as outlined in Eq. (\ref{al_1-1}). From a probablity percpective, the confidence output by O2M cls head, $s_{j}$, represents the probability that the $j_{th}$ detection is a positive sample. The confidence output by O2O cls head, $\tilde{s}_i$, denotes the conditional probablity that $i_{th}$ sample shouldn't be suppressed given the condition that the $i_{th}$ sample identified as a positive sample:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
&s_j|_{j=1}^{N_A}\equiv P\left( a_j\,\,is\,\,pos \right) \,\,
|
&s_j|_{j=1}^{N_A}\equiv P\left( a_j\,\,is\,\,pos \right), \,\,
|
||||||
\\
|
\\
|
||||||
&\tilde{s}_i|_{i=1}^{N_{pos}}\equiv P\left( a_i\,\,is\,\,saved|a_i\,is\,\,pos \right)
|
&\tilde{s}_i|_{i=1}^{N_{pos}}\equiv P\left( a_i\,\,is\,\,saved|a_i\,is\,\,pos \right).
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\label{probablity}
|
\label{probablity}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
|
|
||||||
\textbf{Label assignment and Cost function} We use the label assignment (SimOTA) similar to previous works \cite{clrnet}\cite{clrernet}. However, to make the function more compact and consistent with general object detection works \cite{iouloss}\cite{giouloss}, we have redefined the lane IoU. As illustrated in Fig. \ref{glaneiou}, the newly-defined lane IoU, which we refer to as GLaneIoU, is redefined as follows:
|
\textbf{Label assignment and Cost function} We use the label assignment (SimOTA) similar to previous works \cite{clrnet}\cite{clrernet}. However, to make the function more compact and consistent with general object detection works \cite{iouloss}\cite{giouloss}, we have redefined the lane IoU. As illustrated in Fig. \ref{glaneiou}, the newly-defined lane IoU, which we refer to as GLaneIoU, is redefined as follows:
|
||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth]{thsis_figure/GLaneIoU.png} % 替换为你的图片文件名
|
\includegraphics[width=\linewidth]{thesis_figure/GLaneIoU.png} % 替换为你的图片文件名
|
||||||
\caption{Illustrations of GLaneIoU redefined in our work.}
|
\caption{Illustrations of GLaneIoU redefined in our work.}
|
||||||
\label{glaneiou}
|
\label{glaneiou}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
&w_{i}^{k}=\frac{\sqrt{\left( \Delta x_{i}^{k} \right) ^2+\left( \Delta y_{i}^{k} \right) ^2}}{\Delta y_{i}^{k}}w_{b}
|
&w_{i}^{k}=\frac{\sqrt{\left( \Delta x_{i}^{k} \right) ^2+\left( \Delta y_{i}^{k} \right) ^2}}{\Delta y_{i}^{k}}w_{b},
|
||||||
\\
|
\\
|
||||||
&\hat{d}_{i}^{\mathcal{O}}=\min \left( x_{i}^{p}+w_{i}^{p}, x_{i}^{q}+w_{i}^{q} \right) -\max \left( x_{i}^{p}-w_{i}^{p}, x_{i}^{q}-w_{i}^{q} \right)
|
&\hat{d}_{i}^{\mathcal{O}}=\min \left( x_{i}^{p}+w_{i}^{p}, x_{i}^{q}+w_{i}^{q} \right) -\max \left( x_{i}^{p}-w_{i}^{p}, x_{i}^{q}-w_{i}^{q} \right),
|
||||||
\\
|
\\
|
||||||
&\hat{d}_{i}^{\xi}=\max \left( x_{i}^{p}-w_{i}^{p}, x_{i}^{q}-w_{i}^{q} \right) -\min \left( x_{i}^{p}+w_{i}^{p}, x_{i}^{q}+w_{i}^{q} \right)
|
&\hat{d}_{i}^{\xi}=\max \left( x_{i}^{p}-w_{i}^{p}, x_{i}^{q}-w_{i}^{q} \right) -\min \left( x_{i}^{p}+w_{i}^{p}, x_{i}^{q}+w_{i}^{q} \right),
|
||||||
\\
|
\\
|
||||||
&d_{i}^{\mathcal{U}}=\max \left( x_{i}^{p}+w_{i}^{p}, x_{i}^{q}+w_{i}^{q} \right) -\min \left( x_{i}^{p}-w_{i}^{p}, x_{i}^{q}-w_{i}^{q} \right)
|
&d_{i}^{\mathcal{U}}=\max \left( x_{i}^{p}+w_{i}^{p}, x_{i}^{q}+w_{i}^{q} \right) -\min \left( x_{i}^{p}-w_{i}^{p}, x_{i}^{q}-w_{i}^{q} \right),
|
||||||
\\
|
\\
|
||||||
&d_{i}^{\mathcal{O}}=\max \left( \hat{d}_{i}^{\mathcal{O}},0 \right) \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, d_{i}^{\xi}=\max \left( \hat{d}_{i}^{\xi},0 \right)
|
&d_{i}^{\mathcal{O}}=\max \left( \hat{d}_{i}^{\mathcal{O}},0 \right) \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, d_{i}^{\xi}=\max \left( \hat{d}_{i}^{\xi},0 \right).
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
The definations of $d_{i}^{\mathcal{O}}$ and $d_{i}^{\mathcal{\xi}}$ is similar but slightly different from those in \cite{clrnet} and \cite{adnet}, with adjustments made to ensure the values are non-negative. This format is intended to maintain consistency with the IoU definitions used for bounding boxes. Therefore, the overall GLaneIoU is given as follows:
|
The definations of $d_{i}^{\mathcal{O}}$ and $d_{i}^{\mathcal{\xi}}$ is similar but slightly different from those in \cite{clrnet} and \cite{adnet}, with adjustments made to ensure the values are non-negative. This format is intended to maintain consistency with the IoU definitions used for bounding boxes. Therefore, the overall GLaneIoU is given as follows:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
GLaneIoU\,\,=\,\,\frac{\sum\nolimits_{i=j}^k{d_{i}^{\mathcal{O}}}}{\sum\nolimits_{i=j}^k{d_{i}^{\mathcal{U}}}}-g\frac{\sum\nolimits_{i=j}^k{d_{i}^{\xi}}}{\sum\nolimits_{i=j}^k{d_{i}^{\mathcal{U}}}}
|
GLaneIoU\,\,=\,\,\frac{\sum\nolimits_{i=j}^k{d_{i}^{\mathcal{O}}}}{\sum\nolimits_{i=j}^k{d_{i}^{\mathcal{U}}}}-g\frac{\sum\nolimits_{i=j}^k{d_{i}^{\xi}}}{\sum\nolimits_{i=j}^k{d_{i}^{\mathcal{U}}}},
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
where j and k are the indices of the valid points (the start point and the end point). It's straightforward to observed that when $g=0$, the GLaneIoU is correspond to GIoU\cite{giouloss} for bounding box, with a value range of $\left[0, 1 \right]$. When $g=1$, the GLaneIoU is correspond to GIoU for bounding box, with a value range of $\left(-1, 1 \right]$. In general, when $g>0$, the value range of GLaneIoU is $\left(-g, 1 \right]$.
|
where j and k are the indices of the valid points (the start point and the end point). It's straightforward to observed that when $g=0$, the GLaneIoU is correspond to GIoU\cite{giouloss} for bounding box, with a value range of $\left[0, 1 \right]$. When $g=1$, the GLaneIoU is correspond to GIoU for bounding box, with a value range of $\left(-1, 1 \right]$. In general, when $g>0$, the value range of GLaneIoU is $\left(-g, 1 \right]$.
|
||||||
We then define the cost function between $i_{th}$ prediction and $j_{th}$ ground truth as follows like \cite{detr}:
|
We then define the cost function between $i_{th}$ prediction and $j_{th}$ ground truth as follows like \cite{detr}:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\mathcal{C} _{ij}=\left(s_i\right)^{\beta_c}\times \left( GLaneIoU_{ij, g=0} \right) ^{\beta_r}
|
\mathcal{C} _{ij}=\left(s_i\right)^{\beta_c}\times \left( GLaneIoU_{ij, g=0} \right) ^{\beta_r}.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
This cost function is more compact than those in previous works\cite{clrnet}\cite{adnet} and takes both location and confidence into account. For label assignment, SimOTA (with k=4) \cite{yolox} is used for the two O2M heads with one-to-many assignment, while the Hungarian \cite{detr} algorithm is employed for the O2O classification head for one-to-one assignment.
|
This cost function is more compact than those in previous works\cite{clrnet}\cite{adnet} and takes both location and confidence into account. For label assignment, SimOTA (with k=4) \cite{yolox} is used for the two O2M heads with one-to-many assignment, while the Hungarian \cite{detr} algorithm is employed for the O2O classification head for one-to-one assignment.
|
||||||
@ -494,27 +488,27 @@ This cost function is more compact than those in previous works\cite{clrnet}\cit
|
|||||||
\textbf{Loss function} We use focal loss \cite{focal} for O2O cls head and O2M cls head:
|
\textbf{Loss function} We use focal loss \cite{focal} for O2O cls head and O2M cls head:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\mathcal{L} _{\,\,o2m}^{cls}&=\sum_{i\in \varOmega _{pos}^{o2m}}{\alpha _{o2m}\left( 1-s_i \right) ^{\gamma}\log \left( s_i \right)}\\&+\sum_{i\in \varOmega _{neg}^{o2m}}{\left( 1-\alpha _{o2m} \right) \left( s_i \right) ^{\gamma}\log \left( 1-s_i \right)}
|
\mathcal{L} _{o2m}^{cls}&=\sum_{i\in \varOmega _{pos}^{o2m}}{\alpha _{o2m}\left( 1-s_i \right) ^{\gamma}\log \left( s_i \right)}\\&+\sum_{i\in \varOmega _{neg}^{o2m}}{\left( 1-\alpha _{o2m} \right) \left( s_i \right) ^{\gamma}\log \left( 1-s_i \right)},
|
||||||
\\
|
\\
|
||||||
\mathcal{L} _{\,\,o2o}^{cls}&=\sum_{i\in \varOmega _{pos}^{o2o}}{\alpha _{o2o}\left( 1-\tilde{s}_i \right) ^{\gamma}\log \left( \tilde{s}_i \right)}\\&+\sum_{i\in \varOmega _{neg}^{o2o}}{\left( 1-\alpha _{o2o} \right) \left( \tilde{s}_i \right) ^{\gamma}\log \left( 1-\tilde{s}_i \right)}
|
\mathcal{L} _{o2o}^{cls}&=\sum_{i\in \varOmega _{pos}^{o2o}}{\alpha _{o2o}\left( 1-\tilde{s}_i \right) ^{\gamma}\log \left( \tilde{s}_i \right)}\\&+\sum_{i\in \varOmega _{neg}^{o2o}}{\left( 1-\alpha _{o2o} \right) \left( \tilde{s}_i \right) ^{\gamma}\log \left( 1-\tilde{s}_i \right)}.
|
||||||
\\
|
\\
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
where the set of the one-to-one sample, $\varOmega _{pos}^{o2o}$ and $\varOmega _{neg}^{o2o}$, is restricted to the positive sample set of O2M cls head:
|
where the set of the one-to-one sample, $\varOmega _{pos}^{o2o}$ and $\varOmega _{neg}^{o2o}$, is restricted to the positive sample set of O2M cls head:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\varOmega _{pos}^{o2o}\cup \varOmega _{neg}^{o2o}=\left\{ i|s_i>C_{o2m} \right\}
|
\varOmega _{pos}^{o2o}\cup \varOmega _{neg}^{o2o}=\left\{ i|s_i>C_{o2m} \right\}.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
only one sample with confidence larger than $C_{o2m}$ is chosed as the canditate sample of O2O cls head. According to \cite{pss}, to maintain feature quality during training stage, the gradient of O2O cls head are stopped from propagating back to the rest of the network (stop from the roi feature of the anchor $\boldsymbol{F}_{i}^{roi}$). Additionally, we use the rank loss to increase the gap between positive and negative confidences of O2O cls head:
|
only one sample with confidence larger than $C_{o2m}$ is chosed as the canditate sample of O2O cls head. According to \cite{pss}, to maintain feature quality during training stage, the gradient of O2O cls head are stopped from propagating back to the rest of the network (stop from the roi feature of the anchor $\boldsymbol{F}_{i}^{roi}$). Additionally, we use the rank loss to increase the gap between positive and negative confidences of O2O cls head:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
&\mathcal{L} _{\,\,rank}=\frac{1}{N_{rank}}\sum_{i\in \varOmega _{pos}^{o2o}}{\sum_{j\in \varOmega _{neg}^{o2o}}{\max \left( 0, \tau _{rank}-\tilde{s}_i+\tilde{s}_j \right)}}\\
|
&\mathcal{L} _{\,\,rank}=\frac{1}{N_{rank}}\sum_{i\in \varOmega _{pos}^{o2o}}{\sum_{j\in \varOmega _{neg}^{o2o}}{\max \left( 0, \tau _{rank}-\tilde{s}_i+\tilde{s}_j \right)}}\\
|
||||||
&N_{rank}=\left| \varOmega _{pos}^{o2o} \right|\left| \varOmega _{neg}^{o2o} \right|
|
&N_{rank}=\left| \varOmega _{pos}^{o2o} \right|\left| \varOmega _{neg}^{o2o} \right|.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
We directly use the GLaneIoU loss, $\mathcal{L}_{GLaneIoU}$, to regression the offset of xs (with g=1) and Smooth-L1 loss for the regression of end points (namely the y axis of the start point and the end point), denoted as $\mathcal{L} _{end}$. In order to make model learn the global features, we proposed the auxloss illustrated in fig. \ref{auxloss}:
|
We directly use the GLaneIoU loss, $\mathcal{L}_{GLaneIoU}$, to regression the offset of xs (with g=1) and Smooth-L1 loss for the regression of end points (namely the y axis of the start point and the end point), denoted as $\mathcal{L} _{end}$. In order to make model learn the global features, we proposed the auxloss illustrated in Fig. \ref{auxloss}:
|
||||||
\begin{align}
|
\begin{align}
|
||||||
\mathcal{L}_{aux} &= \frac{1}{\left| \varOmega_{pos}^{o2m} \right| N_{seg}} \sum_{i \in \varOmega_{pos}^{o2o}} \sum_{m=j}^k \Bigg[ l \left( \theta_i - \hat{\theta}_{i}^{seg,m} \right) \\
|
\mathcal{L}_{aux} &= \frac{1}{\left| \varOmega_{pos}^{o2m} \right| N_{seg}} \sum_{i \in \varOmega_{pos}^{o2o}} \sum_{m=j}^k \Bigg[ l \left( \theta_i - \hat{\theta}_{i}^{seg,m} \right) \\
|
||||||
&\quad + l \left( r_{i}^{global} - \hat{r}_{i}^{seg,m} \right) \Bigg]
|
&\quad + l \left( r_{i}^{global} - \hat{r}_{i}^{seg,m} \right) \Bigg]
|
||||||
@ -528,7 +522,7 @@ The anchors and ground truth are divided into several segments. Each anchor segm
|
|||||||
The overall loss function of Polar R-CNN is given as follows:
|
The overall loss function of Polar R-CNN is given as follows:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
\mathcal{L}_{overall} &=\mathcal{L} _{lph}^{cls}+w_{lph}^{reg}\mathcal{L} _{lph}^{reg}\\&+w_{o2m}^{cls}\mathcal{L} _{o2m}^{cls}+w_{o2o}^{cls}\mathcal{L} _{o2o}^{cls}+w_{rank}\mathcal{L} _{rank}\\&+w_{IoU}\mathcal{L} _{IoU}+w_{end}\mathcal{L} _{end}+w_{aux}\mathcal{L} _{aux}
|
\mathcal{L}_{overall} &=\mathcal{L} _{lph}^{cls}+w_{lph}^{reg}\mathcal{L} _{lph}^{reg}\\&+w_{o2m}^{cls}\mathcal{L} _{o2m}^{cls}+w_{o2o}^{cls}\mathcal{L} _{o2o}^{cls}+w_{rank}\mathcal{L} _{rank}\\&+w_{IoU}\mathcal{L} _{IoU}+w_{end}\mathcal{L} _{end}+w_{aux}\mathcal{L} _{aux}.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
The first line in the loss function represents the loss for the local polar head, which includes both classification and regression components. The second line pertains to the losses associated with the two classification heads (O2M and O2O), while the third line represents the loss for the regression head within the triplet head. Each term in the equation is weighted by a factor to balance the contributions of each component to the gradient. The entire training process is end-to-end.
|
The first line in the loss function represents the loss for the local polar head, which includes both classification and regression components. The second line pertains to the losses associated with the two classification heads (O2M and O2O), while the third line represents the loss for the regression head within the triplet head. Each term in the equation is weighted by a factor to balance the contributions of each component to the gradient. The entire training process is end-to-end.
|
||||||
@ -567,8 +561,8 @@ The first line in the loss function represents the loss for the local polar head
|
|||||||
\multirow{4}*{Evaluation Hyperparameter}
|
\multirow{4}*{Evaluation Hyperparameter}
|
||||||
& $H^{l}\times W^{l}$ &$4\times10$&$4\times10$&$4\times10$&$4\times10$&$6\times13$\\
|
& $H^{l}\times W^{l}$ &$4\times10$&$4\times10$&$4\times10$&$4\times10$&$6\times13$\\
|
||||||
& $K_{a}$ &20&20&20&12&50\\
|
& $K_{a}$ &20&20&20&12&50\\
|
||||||
& $C_{O2M}$ &0.48&0.40&0.40&0.40&0.45\\
|
& $C_{o2m}$ &0.48&0.40&0.40&0.40&0.45\\
|
||||||
& $C_{O2O}$ &0.46&0.46&0.46&0.46&0.44\\
|
& $C_{o2o}$ &0.46&0.46&0.46&0.46&0.44\\
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{adjustbox}
|
\end{adjustbox}
|
||||||
@ -578,7 +572,7 @@ The first line in the loss function represents the loss for the local polar head
|
|||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth]{thsis_figure/auxloss.png} %
|
\includegraphics[width=\linewidth]{thesis_figure/auxloss.png} %
|
||||||
\caption{Auxloss for segment parameter regression.}
|
\caption{Auxloss for segment parameter regression.}
|
||||||
\label{auxloss}
|
\label{auxloss}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
@ -590,24 +584,24 @@ We conducted experiments on four widely used lane detection benchmarks and one r
|
|||||||
We use the F1-score to evaluate our model on the CULane, LLAMAS, DL-Rail, and Curvelanes datasets, maintaining consistency with previous works. The F1-score is defined as follows:
|
We use the F1-score to evaluate our model on the CULane, LLAMAS, DL-Rail, and Curvelanes datasets, maintaining consistency with previous works. The F1-score is defined as follows:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
F1=\frac{2\times Precision\times Recall}{Precision\,\,+\,\,Recall}
|
F1=\frac{2\times Precision\times Recall}{Precision\,\,+\,\,Recall},
|
||||||
\\
|
\\
|
||||||
Precision\,\,=\,\,\frac{TP}{TP+FP}
|
Precision\,\,=\,\,\frac{TP}{TP+FP},
|
||||||
\\
|
\\
|
||||||
Recall\,\,=\,\,\frac{TP}{TP+FN}
|
Recall\,\,=\,\,\frac{TP}{TP+FN}.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
In our experiment, we use different IoU thresholds to calculate the F1-score for different datasets: F1@50 and F1@75 for CULane \cite{clrnet}, F1@50 for LLAMAS \cite{clrnet} and Curvelanes \cite{CondLaneNet}, and F1@50, F1@75, and mF1 for DL-Rail \cite{dalnet}. The mF1 is defined as:
|
In our experiment, we use different IoU thresholds to calculate the F1-score for different datasets: F1@50 and F1@75 for CULane \cite{clrnet}, F1@50 for LLAMAS \cite{clrnet} and Curvelanes \cite{CondLaneNet}, and F1@50, F1@75, and mF1 for DL-Rail \cite{dalnet}. The mF1 is defined as:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
mF1=\left( F1@50+F1@55+...+F1@95 \right) /10
|
mF1=\left( F1@50+F1@55+...+F1@95 \right) /10.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
|
|
||||||
For Tusimple, the evaluation is formulated as follows:
|
For Tusimple, the evaluation is formulated as follows:
|
||||||
\begin{equation}
|
\begin{equation}
|
||||||
\begin{aligned}
|
\begin{aligned}
|
||||||
Accuracy=\frac{\sum{C_{clip}}}{\sum{S_{clip}}}
|
Accuracy=\frac{\sum{C_{clip}}}{\sum{S_{clip}}}.
|
||||||
\end{aligned}
|
\end{aligned}
|
||||||
\end{equation}
|
\end{equation}
|
||||||
where $C_{clip}$ and $S_{clip}$ represent the number of correct points (predicted points within 20 pixels of the ground truth) and the ground truth points, respectively. If the accuracy exceeds 85\%, the prediction is considered correct. TuSimples also report the False Positive Rate (FP=1-Precision) and False Negative Rate (FN=1-Recall) formular.
|
where $C_{clip}$ and $S_{clip}$ represent the number of correct points (predicted points within 20 pixels of the ground truth) and the ground truth points, respectively. If the accuracy exceeds 85\%, the prediction is considered correct. TuSimples also report the False Positive Rate (FP=1-Precision) and False Negative Rate (FN=1-Recall) formular.
|
||||||
@ -661,12 +655,12 @@ All input images are cropped and resized to $800\times320$. Similar to \cite{clr
|
|||||||
\hline
|
\hline
|
||||||
\textbf{Proposed Method} \\
|
\textbf{Proposed Method} \\
|
||||||
\cline{1-1}
|
\cline{1-1}
|
||||||
Polar R-CNN-NMS &ResNet18&80.81&63.96&94.12&79.57&76.53&83.33&55.06&90.62&79.50&1088&75.25\\
|
Polar R-CNN-NMS &ResNet18&80.81&63.97&94.12&79.57&76.53&83.33&55.10&90.70&79.50&1088&75.25\\
|
||||||
Polar R-CNN &ResNet18&80.81&63.96&94.12&79.57&76.53&83.33&55.06&90.62&79.50&1088&75.25\\
|
Polar R-CNN &ResNet18&80.81&63.96&94.12&79.57&76.53&83.33&55.06&90.62&79.50&1088&75.25\\
|
||||||
Polar R-CNN &ResNet34&80.92&63.97&94.24&79.76&76.70&81.93&55.40&\textbf{91.12}&79.85&1158&75.71\\
|
Polar R-CNN &ResNet34&80.92&63.97&94.24&79.76&76.70&81.93&55.40&\textbf{91.12}&79.85&1158&75.71\\
|
||||||
Polar R-CNN &ResNet50&81.34&64.77&94.45&\textbf{80.42}&75.82&83.61&56.62&91.10&80.05&1356&75.94\\
|
Polar R-CNN &ResNet50&81.34&64.77&94.45&\textbf{80.42}&75.82&83.61&56.62&91.10&80.05&1356&75.94\\
|
||||||
Polar R-CNN-NMS &DLA34 &\textbf{81.49}&64.96&\textbf{94.44}&80.36&\textbf{76.83}&83.68&56.53&90.85&\textbf{80.09}&1135&76.32\\
|
Polar R-CNN-NMS &DLA34 &\textbf{81.49}&64.96&\textbf{94.44}&80.36&\textbf{76.79}&83.68&56.52&90.85&\textbf{80.09}&1133&76.32\\
|
||||||
Polar R-CNN &DLA34 &\textbf{81.49}&\textbf{64.97}&\textbf{94.44}&80.36&\textbf{76.79}&83.68&\textbf{56.52}&90.85&\textbf{80.09}&1133&76.32\\
|
Polar R-CNN &DLA34 &\textbf{81.49}&\textbf{64.97}&\textbf{94.44}&80.36&\textbf{76.79}&83.68&\textbf{56.55}&90.81&\textbf{79.80}&1133&76.33\\
|
||||||
\bottomrule
|
\bottomrule
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\end{adjustbox}
|
\end{adjustbox}
|
||||||
@ -714,7 +708,7 @@ All input images are cropped and resized to $800\times320$. Similar to \cite{clr
|
|||||||
BézierLaneNet\cite{bezierlanenet} &ResNet34&95.17&95.89&94.46\\
|
BézierLaneNet\cite{bezierlanenet} &ResNet34&95.17&95.89&94.46\\
|
||||||
LaneATT\cite{laneatt} &ResNet34&93.74&96.79&90.88\\
|
LaneATT\cite{laneatt} &ResNet34&93.74&96.79&90.88\\
|
||||||
LaneAF\cite{laneaf} &DLA34 &96.07&96.91&95.26\\
|
LaneAF\cite{laneaf} &DLA34 &96.07&96.91&95.26\\
|
||||||
DALNet\cite{dalnet} &ResNet34&96.12&\textbf{96.83}&95.42\\
|
DALNet\cite{dalnet} &ResNet18&96.12&\textbf{96.83}&95.42\\
|
||||||
CLRNet\cite{clrnet} &DLA34 &96.12&- &- \\
|
CLRNet\cite{clrnet} &DLA34 &96.12&- &- \\
|
||||||
\midrule
|
\midrule
|
||||||
|
|
||||||
@ -789,7 +783,7 @@ We also compare the number of anchors and processing speed with other methods. F
|
|||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth]{thsis_figure/anchor_num_method.png}
|
\includegraphics[width=\linewidth]{thesis_figure/anchor_num_method.png}
|
||||||
\caption{Anchor Number and f1-score of different methods on CULane.}
|
\caption{Anchor Number and f1-score of different methods on CULane.}
|
||||||
\label{anchor_num_method}
|
\label{anchor_num_method}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
@ -797,7 +791,7 @@ We also compare the number of anchors and processing speed with other methods. F
|
|||||||
|
|
||||||
\begin{figure}[t]
|
\begin{figure}[t]
|
||||||
\centering
|
\centering
|
||||||
\includegraphics[width=\linewidth]{thsis_figure/speed_method.png}
|
\includegraphics[width=\linewidth]{thesis_figure/speed_method.png}
|
||||||
\caption{Anchor Number and f1-score of different methods on CULane.}
|
\caption{Anchor Number and f1-score of different methods on CULane.}
|
||||||
\label{speed_method}
|
\label{speed_method}
|
||||||
\end{figure}
|
\end{figure}
|
||||||
@ -840,13 +834,13 @@ We also explore the effect of different local polar map sizes on our model, as i
|
|||||||
\def\imgwidth{\linewidth}
|
\def\imgwidth{\linewidth}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth]{thsis_figure/anchor_num/anchor_num_testing_p.png}
|
\includegraphics[width=\imgwidth]{thesis_figure/anchor_num/anchor_num_testing_p.png}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth]{thsis_figure/anchor_num/anchor_num_testing_r.png}
|
\includegraphics[width=\imgwidth]{thesis_figure/anchor_num/anchor_num_testing_r.png}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth]{thsis_figure/anchor_num/anchor_num_testing.png}
|
\includegraphics[width=\imgwidth]{thesis_figure/anchor_num/anchor_num_testing.png}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption{Anchor Number and f1-score of different methods on CULane.}
|
\caption{Anchor Number and f1-score of different methods on CULane.}
|
||||||
\label{anchor_num_testing}
|
\label{anchor_num_testing}
|
||||||
@ -861,20 +855,20 @@ We also explore the effect of different local polar map sizes on our model, as i
|
|||||||
\def\imgheight{0.4\linewidth}
|
\def\imgheight{0.4\linewidth}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/heatmap/cam1.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/heatmap/cam1.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/heatmap/anchor1.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/heatmap/anchor1.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/heatmap/cam2.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/heatmap/cam2.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/heatmap/anchor2.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/heatmap/anchor2.jpg}
|
||||||
\caption{}
|
\caption{}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\caption{The heap map of the local polar map and the anchor selection during the evaluation stage.}
|
\caption{The heap map of the local polar map and the anchor selection during the evaluation stage.}
|
||||||
@ -1016,18 +1010,18 @@ In the traditional NMS post-processing \cite{clrernet}, the default IoU threshol
|
|||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\rotatebox{90}{\small{GT}}
|
\rotatebox{90}{\small{GT}}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/culane/1_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/culane/1_gt.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/culane/2_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/culane/2_gt.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/tusimple/1_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/tusimple/1_gt.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/tusimple/2_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/tusimple/2_gt.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
@ -1036,18 +1030,18 @@ In the traditional NMS post-processing \cite{clrernet}, the default IoU threshol
|
|||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\raisebox{-1.5em}{\rotatebox{90}{\small{Anchors}}}
|
\raisebox{-1.5em}{\rotatebox{90}{\small{Anchors}}}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/culane/1_anchor.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/culane/1_anchor.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/culane/2_anchor.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/culane/2_anchor.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/tusimple/1_anchor.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/tusimple/1_anchor.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/tusimple/2_anchor.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/tusimple/2_anchor.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
@ -1055,19 +1049,19 @@ In the traditional NMS post-processing \cite{clrernet}, the default IoU threshol
|
|||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\raisebox{-2em}{\rotatebox{90}{\small{Predictions}}}
|
\raisebox{-2em}{\rotatebox{90}{\small{Predictions}}}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/culane/1_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/culane/1_pred.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/culane/2_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/culane/2_pred.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\caption{CULane}
|
\caption{CULane}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/tusimple/1_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/tusimple/1_pred.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/tusimple/2_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/tusimple/2_pred.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\caption{TuSimple}
|
\caption{TuSimple}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
@ -1081,18 +1075,18 @@ In the traditional NMS post-processing \cite{clrernet}, the default IoU threshol
|
|||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\rotatebox{90}{\small{GT}}
|
\rotatebox{90}{\small{GT}}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/llamas/1_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/llamas/1_gt.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/llamas/2_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/llamas/2_gt.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/dlrail/1_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/dlrail/1_gt.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/dlrail/2_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/dlrail/2_gt.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
@ -1100,18 +1094,18 @@ In the traditional NMS post-processing \cite{clrernet}, the default IoU threshol
|
|||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\raisebox{-1.5em}{\rotatebox{90}{\small{Anchors}}}
|
\raisebox{-1.5em}{\rotatebox{90}{\small{Anchors}}}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/llamas/1_anchor.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/llamas/1_anchor.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/llamas/2_anchor.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/llamas/2_anchor.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/dlrail/1_anchor.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/dlrail/1_anchor.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/dlrail/2_anchor.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/dlrail/2_anchor.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
@ -1119,19 +1113,19 @@ In the traditional NMS post-processing \cite{clrernet}, the default IoU threshol
|
|||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\raisebox{-2em}{\rotatebox{90}{\small{Predictions}}}
|
\raisebox{-2em}{\rotatebox{90}{\small{Predictions}}}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/llamas/1_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/llamas/1_pred.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/llamas/2_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/llamas/2_pred.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\caption{LLAMAS}
|
\caption{LLAMAS}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\pagewidth}
|
\begin{subfigure}{\pagewidth}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/dlrail/1_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/dlrail/1_pred.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\begin{minipage}{\subwidth}
|
\begin{minipage}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_dataset/dlrail/2_pred.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_dataset/dlrail/2_pred.jpg}
|
||||||
\end{minipage}
|
\end{minipage}
|
||||||
\caption{DL-Rail}
|
\caption{DL-Rail}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
@ -1150,90 +1144,90 @@ In the traditional NMS post-processing \cite{clrernet}, the default IoU threshol
|
|||||||
\def\imgheight{0.5625\linewidth}
|
\def\imgheight{0.5625\linewidth}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/redun_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/redun_gt.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/redun_pred50.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/redun_pred50.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/redun_pred15.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/redun_pred15.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/redun_NMSfree.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/redun_NMSfree.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/redun2_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/redun2_gt.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/redun2_pred50.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/redun2_pred50.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/redun2_pred15.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/redun2_pred15.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/redun2_NMSfree.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/redun2_NMSfree.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
|
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/less_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/less_gt.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/less_pred50.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/less_pred50.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/less_pred15.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/less_pred15.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/less_NMSfree.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/less_NMSfree.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/less2_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/less2_gt.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/less2_pred50.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/less2_pred50.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/less2_pred15.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/less2_pred15.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/less2_NMSfree.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/less2_NMSfree.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/all_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/all_gt.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/all_pred50.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/all_pred50.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/all_pred15.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/all_pred15.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/all_NMSfree.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/all_NMSfree.jpg}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
|
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/all2_gt.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/all2_gt.jpg}
|
||||||
\caption{GT}
|
\caption{GT}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/all2_pred50.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/all2_pred50.jpg}
|
||||||
\caption{NMS@50}
|
\caption{NMS@50}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/all2_pred15.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/all2_pred15.jpg}
|
||||||
\caption{NMS@15}
|
\caption{NMS@15}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\begin{subfigure}{\subwidth}
|
\begin{subfigure}{\subwidth}
|
||||||
\includegraphics[width=\imgwidth, height=\imgheight]{thsis_figure/view_nms/all2_NMSfree.jpg}
|
\includegraphics[width=\imgwidth, height=\imgheight]{thesis_figure/view_nms/all2_NMSfree.jpg}
|
||||||
\caption{NMSFree}
|
\caption{NMSFree}
|
||||||
\end{subfigure}
|
\end{subfigure}
|
||||||
\vspace{0.5em}
|
\vspace{0.5em}
|
||||||
@ -1251,11 +1245,11 @@ In this paper, we propose Polar R-CNN to address two key issues in anchor-based
|
|||||||
\bibliographystyle{IEEEtran}
|
\bibliographystyle{IEEEtran}
|
||||||
\bibliography{reference}
|
\bibliography{reference}
|
||||||
%\newpage
|
%\newpage
|
||||||
\begin{IEEEbiographynophoto}{Shengqi Wang}
|
|
||||||
received the Master degree from Xi'an Jiaotong University, Xi'an, China, in 2020. He is now pursuing for the Ph.D. degree in statistics at Xi'an Jiaotong University. His research interests include low-level computer vision, deep learning, and so on.
|
|
||||||
\end{IEEEbiographynophoto}
|
|
||||||
%
|
%
|
||||||
\begin{IEEEbiography}[{\includegraphics[width=1in,height=1.25in,clip,keepaspectratio]{thsis_figure/ljm.pdf}}]{Junmin Liu}
|
\begin{IEEEbiography}[{\includegraphics[width=1in,height=1.25in,clip,keepaspectratio]{thesis_figure/wsq.jpg}}]{Shengqi Wang}
|
||||||
|
received the Master degree from Xi'an Jiaotong University, Xi'an, China, in 2020. He is now pursuing for the Ph.D. degree in statistics at Xi'an Jiaotong University. His research interests include low-level computer vision, deep learning, and so on.
|
||||||
|
\end{IEEEbiography}
|
||||||
|
\begin{IEEEbiography}[{\includegraphics[width=1in,height=1.25in,clip,keepaspectratio]{thesis_figure/ljm.pdf}}]{Junmin Liu}
|
||||||
was born in 1982. He received the Ph.D. degree in Mathematics from Xi'an Jiaotong University, Xi'an, China, in 2013. From 2011 to 2012, he served as a Research Assistant with the Department of Geography and Resource Management at the Chinese University of Hong Kong, Hong Kong, China. From 2014 to 2017, he worked as a Visiting Scholar at the University of Maryland, College Park, USA. He is currently a full Professor at the School of Mathematics and Statistics, Xi'an Jiaotong University, Xi'an, China. His research interests are mainly focused on the theory and application of machine learning and image processing. He has published over 60+ research papers in international conferences and journals.
|
was born in 1982. He received the Ph.D. degree in Mathematics from Xi'an Jiaotong University, Xi'an, China, in 2013. From 2011 to 2012, he served as a Research Assistant with the Department of Geography and Resource Management at the Chinese University of Hong Kong, Hong Kong, China. From 2014 to 2017, he worked as a Visiting Scholar at the University of Maryland, College Park, USA. He is currently a full Professor at the School of Mathematics and Statistics, Xi'an Jiaotong University, Xi'an, China. His research interests are mainly focused on the theory and application of machine learning and image processing. He has published over 60+ research papers in international conferences and journals.
|
||||||
\end{IEEEbiography}
|
\end{IEEEbiography}
|
||||||
\vfill
|
\vfill
|
||||||
|
2
make.sh
@ -1,5 +1,5 @@
|
|||||||
# latexmk -c
|
# latexmk -c
|
||||||
# latexmk -pvc -xelatex -interaction=nonstopmode main.tex
|
# latexmk -pvc -xelatex -interaction=nonstopmode main.tex
|
||||||
latexmk -pdf main.tex
|
# latexmk -pdf main.tex
|
||||||
latexmk -quiet -interaction=nonstopmode --pvc --pdf main.tex
|
latexmk -quiet -interaction=nonstopmode --pvc --pdf main.tex
|
||||||
# latexmk -pdf -interaction=nonstopmode -pvc main.tex
|
# latexmk -pdf -interaction=nonstopmode -pvc main.tex
|
Before Width: | Height: | Size: 1.7 MiB |
Before Width: | Height: | Size: 865 KiB |
Before Width: | Height: | Size: 713 KiB |
Before Width: | Height: | Size: 282 KiB |
Before Width: | Height: | Size: 192 KiB |
Before Width: | Height: | Size: 244 KiB |
@ -1,47 +0,0 @@
|
|||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib as mpl
|
|
||||||
|
|
||||||
# 设置全局字体为 Times New Roman
|
|
||||||
mpl.rcParams['font.family'] = 'Times New Roman'
|
|
||||||
mpl.rcParams['font.serif'] = ['Times New Roman']
|
|
||||||
mpl.rcParams['axes.titlesize'] = 18
|
|
||||||
mpl.rcParams['axes.labelsize'] = 30
|
|
||||||
mpl.rcParams['xtick.labelsize'] = 22
|
|
||||||
mpl.rcParams['ytick.labelsize'] = 22
|
|
||||||
mpl.rcParams['legend.fontsize'] = 22
|
|
||||||
|
|
||||||
alpha = 0.7
|
|
||||||
plt.figure(figsize=(8.5, 8))
|
|
||||||
|
|
||||||
# 2*7
|
|
||||||
x_2x7 = [10, 14]
|
|
||||||
y_2x7 = [80.075, 80.083]
|
|
||||||
plt.plot(x_2x7, y_2x7, 'o-', color='blue', alpha=alpha, markersize=12, linewidth=4, label="2*7")
|
|
||||||
|
|
||||||
# 2*10
|
|
||||||
x_2x10 = [10, 15, 20]
|
|
||||||
y_2x10 = [80.549, 80.585, 80.59]
|
|
||||||
plt.plot(x_2x10, y_2x10, 's-', color='green', alpha=alpha, markersize=12, linewidth=4, label="2*10")
|
|
||||||
|
|
||||||
# 4*10
|
|
||||||
x_4x10 = [10, 15, 20, 25, 30, 35, 40]
|
|
||||||
y_4x10 = [80.3, 80.824, 80.805, 80.799, 80.798, 80.796, 80.794]
|
|
||||||
plt.plot(x_4x10, y_4x10, 'd-', color='red', alpha=alpha, markersize=12, linewidth=4, label="4*10")
|
|
||||||
|
|
||||||
# 5*12
|
|
||||||
x_5x12 = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]
|
|
||||||
y_5x12 = [79.742, 80.809, 80.9, 80.924, 80.919, 80.92, 80.921, 80.924, 80.923, 80.923, 80.924]
|
|
||||||
plt.plot(x_5x12, y_5x12, 'v-', color='purple', alpha=alpha, markersize=12, linewidth=4, label="5*12")
|
|
||||||
|
|
||||||
# 6*18
|
|
||||||
x_6x13 = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 78]
|
|
||||||
y_6x13 = [78.576, 80.53, 80.907, 80.953, 80.955, 80.958, 80.956, 80.958, 80.963, 80.965, 80.962, 80.962, 80.962, 80.962]
|
|
||||||
plt.plot(x_6x13, y_6x13, 'p-', color='orange', alpha=alpha, markersize=12, linewidth=4, label="6*13")
|
|
||||||
|
|
||||||
|
|
||||||
plt.grid(True, linestyle='-', alpha=0.5)
|
|
||||||
plt.xlabel("Anchor Number") # 横坐标名字
|
|
||||||
plt.ylabel("F1-Score") # 纵坐标名字
|
|
||||||
plt.legend(loc="lower right", title="Polarmap Size", title_fontsize=mpl.rcParams['legend.fontsize']) # 图例标题
|
|
||||||
plt.savefig('anchor_num_testing.png', dpi=300)
|
|
||||||
plt.show()
|
|
Before Width: | Height: | Size: 247 KiB |
@ -1,50 +0,0 @@
|
|||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib as mpl
|
|
||||||
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib as mpl
|
|
||||||
|
|
||||||
# 设置全局字体为 Times New Roman
|
|
||||||
mpl.rcParams['font.family'] = 'Times New Roman'
|
|
||||||
mpl.rcParams['font.serif'] = ['Times New Roman']
|
|
||||||
mpl.rcParams['axes.titlesize'] = 18
|
|
||||||
mpl.rcParams['axes.labelsize'] = 30
|
|
||||||
mpl.rcParams['xtick.labelsize'] = 22
|
|
||||||
mpl.rcParams['ytick.labelsize'] = 22
|
|
||||||
mpl.rcParams['legend.fontsize'] = 22
|
|
||||||
|
|
||||||
alpha = 0.7
|
|
||||||
plt.figure(figsize=(8.5, 8))
|
|
||||||
|
|
||||||
# 2*7
|
|
||||||
x_2x7 = [10, 14]
|
|
||||||
y_2x7 = [87.343, 85.597]
|
|
||||||
plt.plot(x_2x7, y_2x7, 'o-', color='blue', alpha=alpha, markersize=12, linewidth=4, label="2*7")
|
|
||||||
|
|
||||||
# 2*10
|
|
||||||
x_2x10 = [10, 15, 20]
|
|
||||||
y_2x10 = [88.174, 88.044, 88.036]
|
|
||||||
plt.plot(x_2x10, y_2x10, 's-', color='green', alpha=alpha, markersize=12, linewidth=4, label="2*10")
|
|
||||||
|
|
||||||
# 4*10
|
|
||||||
x_4x10 = [10, 15, 20, 25, 30, 35, 40]
|
|
||||||
y_4x10 = [89.599, 88.822, 88.516, 88.428, 88.390, 88.379, 88.376]
|
|
||||||
plt.plot(x_4x10, y_4x10, 'd-', color='red', alpha=alpha, markersize=12, linewidth=4, label="4*10")
|
|
||||||
|
|
||||||
# 5*12
|
|
||||||
x_5x12 = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]
|
|
||||||
y_5x12 = [89.968, 89.221, 88.893, 88.805, 88.757, 88.740, 88.735, 88.733, 88.729, 88.728, 88.729]
|
|
||||||
plt.plot(x_5x12, y_5x12, 'v-', color='purple', alpha=alpha, markersize=12, linewidth=4, label="5*12")
|
|
||||||
|
|
||||||
# 6*18
|
|
||||||
x_6x13 = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 78]
|
|
||||||
y_6x13 = [90.845, 89.979, 89.492, 89.237, 89.101, 89.055, 89.018, 88.999, 88.996, 88.992, 88.983, 88.982, 88.982, 88.982]
|
|
||||||
plt.plot(x_6x13, y_6x13, 'p-', color='orange', alpha=alpha, markersize=12, linewidth=4, label="6*13")
|
|
||||||
|
|
||||||
plt.grid(True, linestyle='-', alpha=0.5)
|
|
||||||
plt.xlabel("Anchor Number") # 横坐标名字
|
|
||||||
plt.ylabel("Precision") # 纵坐标名字
|
|
||||||
plt.legend(loc="lower right", title="Polarmap Size", title_fontsize=mpl.rcParams['legend.fontsize']) # 图例标题
|
|
||||||
plt.savefig('anchor_num_testing_p.png', dpi=300)
|
|
||||||
plt.show()
|
|
Before Width: | Height: | Size: 239 KiB |
@ -1,47 +0,0 @@
|
|||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib as mpl
|
|
||||||
|
|
||||||
# 设置全局字体为 Times New Roman
|
|
||||||
mpl.rcParams['font.family'] = 'Times New Roman'
|
|
||||||
mpl.rcParams['font.serif'] = ['Times New Roman']
|
|
||||||
mpl.rcParams['axes.titlesize'] = 18
|
|
||||||
mpl.rcParams['axes.labelsize'] = 30
|
|
||||||
mpl.rcParams['xtick.labelsize'] = 22
|
|
||||||
mpl.rcParams['ytick.labelsize'] = 22
|
|
||||||
mpl.rcParams['legend.fontsize'] = 22
|
|
||||||
|
|
||||||
alpha = 0.7
|
|
||||||
plt.figure(figsize=(8.5, 8))
|
|
||||||
|
|
||||||
# 2*7
|
|
||||||
x_2x7 = [10, 14]
|
|
||||||
y_2x7 = [73.924, 74.002]
|
|
||||||
plt.plot(x_2x7, y_2x7, 'o-', color='blue', alpha=alpha, markersize=12, linewidth=4, label="2*7")
|
|
||||||
|
|
||||||
# 2*10
|
|
||||||
x_2x10 = [10, 15, 20]
|
|
||||||
y_2x10 = [74.138, 74.290, 74.305]
|
|
||||||
plt.plot(x_2x10, y_2x10, 's-', color='green', alpha=alpha, markersize=12, linewidth=4, label="2*10")
|
|
||||||
|
|
||||||
# 4*10
|
|
||||||
x_4x10 = [10, 15, 20, 25, 30, 35, 40]
|
|
||||||
y_4x10 = [72.750, 74.147, 74.330, 74.381, 74.406, 74.411, 74.410]
|
|
||||||
plt.plot(x_4x10, y_4x10, 'd-', color='red', alpha=alpha, markersize=12, linewidth=4, label="4*10")
|
|
||||||
|
|
||||||
# 5*12
|
|
||||||
x_5x12 = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60]
|
|
||||||
y_5x12 = [71.603, 73.847, 74.226, 74.327, 74.353, 74.366, 74.371, 74.378, 74.380, 74.380, 74.382]
|
|
||||||
plt.plot(x_5x12, y_5x12, 'v-', color='purple', alpha=alpha, markersize=12, linewidth=4, label="5*12")
|
|
||||||
|
|
||||||
# 6*18
|
|
||||||
x_6x13 = [10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 78]
|
|
||||||
y_6x13 = [69.227, 72.876, 73.825, 74.077, 74.174, 74.210, 74.232, 74.249, 74.260, 74.265, 74.267, 74.267, 74.267, 74.268]
|
|
||||||
plt.plot(x_6x13, y_6x13, 'p-', color='orange', alpha=alpha, markersize=12, linewidth=4, label="6*13")
|
|
||||||
|
|
||||||
plt.grid(True, linestyle='-', alpha=0.5)
|
|
||||||
plt.xlabel("Anchor Number") # 横坐标名字
|
|
||||||
plt.ylabel("Recall") # 纵坐标名字
|
|
||||||
plt.legend(loc="lower right", title="Polarmap Size", title_fontsize=mpl.rcParams['legend.fontsize']) # 图例标题
|
|
||||||
plt.savefig('anchor_num_testing_r.png', dpi=300)
|
|
||||||
plt.show()
|
|
||||||
|
|
@ -1,3 +0,0 @@
|
|||||||
python anchor_num_testing.py
|
|
||||||
python anchor_num_testing_p.py
|
|
||||||
python anchor_num_testing_r.py
|
|
Before Width: | Height: | Size: 147 KiB |
@ -1,50 +0,0 @@
|
|||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib as mpl
|
|
||||||
|
|
||||||
# 设置全局字体为 Times New Roman
|
|
||||||
mpl.rcParams['font.family'] = 'Times New Roman'
|
|
||||||
mpl.rcParams['font.serif'] = ['Times New Roman']
|
|
||||||
mpl.rcParams['axes.titlesize'] = 14
|
|
||||||
mpl.rcParams['axes.labelsize'] = 12
|
|
||||||
mpl.rcParams['xtick.labelsize'] = 12
|
|
||||||
mpl.rcParams['ytick.labelsize'] = 12
|
|
||||||
mpl.rcParams['legend.fontsize'] = 12
|
|
||||||
|
|
||||||
# 定义数据
|
|
||||||
data = {
|
|
||||||
'LaneATT (2021)': {'x': [1000, 1000, 1000], 'y': [75.09, 76.68, 77.02], 'sizes': [40*2.5, 80*2.5, 180*2.5], 'color': 'magenta', 'marker': 'H'},
|
|
||||||
'CLRNet (2022)': {'x': [192, 192, 192, 192], 'y': [79.58, 79.73, 80.13, 80.47], 'sizes': [20*2.5, 40*2.5, 80*2.5, 180*2.5], 'color': 'orange', 'marker': 'p'},
|
|
||||||
'CLRerNet (2023)': {'x': [192, 192, 192], 'y': [80.76, 80.91, 81.12], 'sizes': [40*2.5, 80*2.5, 180*2.5], 'color': 'orangered', 'marker': 'p'},
|
|
||||||
'ADNet (2023)': {'x': [64, 64], 'y': [77.56, 78.94], 'sizes': [80*2.5, 180*2.5], 'color': 'green', 'marker': 'v'},
|
|
||||||
'SRLane (2024)': {'x': [40], 'y': [79.73], 'sizes': [180*2.5], 'color': 'red', 'marker': '*'},
|
|
||||||
'Sparse Laneformer (2024)': {'x': [20, 20, 20], 'y': [76.55, 77.77, 77.83], 'sizes': [40*2.5, 80*2.5, 180*2.5], 'color': 'purple', 'marker': '^'},
|
|
||||||
'PolarRCNN (Ours)': {'x': [20, 20, 20, 20], 'y': [80.81, 80.92, 81.34, 81.49], 'sizes': [20*2.5, 40*2.5, 80*2.5, 180*2.5], 'color': 'blue', 'marker': 'o'},
|
|
||||||
}
|
|
||||||
|
|
||||||
# 定义统一的标记大小
|
|
||||||
legend_marker_size = 100
|
|
||||||
|
|
||||||
# 绘制数据点
|
|
||||||
for label, props in data.items():
|
|
||||||
plt.scatter(
|
|
||||||
props['x'], props['y'],
|
|
||||||
s=props['sizes'],
|
|
||||||
alpha=0.5,
|
|
||||||
c=props['color'],
|
|
||||||
marker=props['marker'],
|
|
||||||
edgecolors='w',
|
|
||||||
linewidth=0.5,
|
|
||||||
label=label
|
|
||||||
)
|
|
||||||
|
|
||||||
# 设置标题和标签
|
|
||||||
plt.grid(True, linestyle='-', alpha=0.5)
|
|
||||||
plt.xlabel('Anchor Number')
|
|
||||||
plt.ylabel('F1-score')
|
|
||||||
|
|
||||||
# 添加图例,并调整图例中的标记大小
|
|
||||||
legend = plt.legend(loc="best")
|
|
||||||
for handle in legend.legend_handles:
|
|
||||||
handle._sizes = [legend_marker_size]
|
|
||||||
plt.savefig('anchor_num_method.png', dpi=300)
|
|
||||||
plt.show()
|
|
Before Width: | Height: | Size: 887 KiB |
Before Width: | Height: | Size: 694 KiB |
Before Width: | Height: | Size: 1.4 MiB |
Before Width: | Height: | Size: 1.4 MiB |
Before Width: | Height: | Size: 133 KiB |
Before Width: | Height: | Size: 1.4 MiB |
Before Width: | Height: | Size: 63 KiB |
Before Width: | Height: | Size: 67 KiB |
Before Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 54 KiB |
Before Width: | Height: | Size: 621 KiB |
Before Width: | Height: | Size: 756 KiB |
Before Width: | Height: | Size: 1.1 MiB |
Before Width: | Height: | Size: 501 KiB |
Before Width: | Height: | Size: 696 KiB |
Before Width: | Height: | Size: 1.4 MiB |
Before Width: | Height: | Size: 189 KiB |
@ -1,54 +0,0 @@
|
|||||||
import matplotlib.pyplot as plt
|
|
||||||
import matplotlib as mpl
|
|
||||||
|
|
||||||
# 设置全局字体为 Times New Roman
|
|
||||||
mpl.rcParams['font.family'] = 'Times New Roman'
|
|
||||||
mpl.rcParams['font.serif'] = ['Times New Roman']
|
|
||||||
mpl.rcParams['axes.titlesize'] = 14
|
|
||||||
mpl.rcParams['axes.labelsize'] = 12
|
|
||||||
mpl.rcParams['xtick.labelsize'] = 12
|
|
||||||
mpl.rcParams['ytick.labelsize'] = 12
|
|
||||||
mpl.rcParams['legend.fontsize'] = 12
|
|
||||||
mark_size = 8
|
|
||||||
|
|
||||||
# 定义数据
|
|
||||||
data = {
|
|
||||||
'LaneATT (2021)': {'x': [3.23, 5.01, 23.67], 'y': [75.09, 76.68, 77.02], 'color': 'magenta', 'marker': 'H'},
|
|
||||||
'CLRNet (2022)': {'x': [7.37, 8.81, 9.31, 14.36], 'y': [79.58, 79.73, 80.47, 80.13], 'color': 'orange', 'marker': 'p'},
|
|
||||||
'CLRerNet (2023)': {'x': [8.81, 9.31, 14.36], 'y': [80.76, 81.12, 80.91], 'color': 'orangered', 'marker': 'p'},
|
|
||||||
'ADNet (2023)': {'x': [8.4, 10.67], 'y': [77.56, 78.94], 'color': 'green', 'marker': 'v'},
|
|
||||||
'SRLane (2024)': {'x': [3.12], 'y': [79.73], 'color': 'red', 'marker': '*'},
|
|
||||||
'UFLDv2 (2022)': {'x': [2.7, 4.6], 'y': [75, 76], 'color': 'purple', 'marker': '^'},
|
|
||||||
'PolarRCNN-NMS (ours)': {'x': [3.71, 4.97, 5.47, 6.14], 'y': [80.81, 80.92, 81.49, 81.34], 'color': 'blue', 'marker': 'o'},
|
|
||||||
'PolarRCNN (ours)': {'x': [4.77, 6.10, 6.54, 7.13], 'y': [80.81, 80.92, 81.49, 81.34], 'color': 'cyan', 'marker': 'o'},
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
plt.xlim(0, 30)
|
|
||||||
|
|
||||||
# 绘制数据点
|
|
||||||
for label, props in data.items():
|
|
||||||
plt.plot(
|
|
||||||
props['x'], props['y'],
|
|
||||||
alpha=0.8,
|
|
||||||
c=props['color'],
|
|
||||||
marker=props['marker'],
|
|
||||||
# edgecolors='w',
|
|
||||||
markersize = mark_size,
|
|
||||||
linewidth=1.2,
|
|
||||||
label=label
|
|
||||||
)
|
|
||||||
|
|
||||||
# 设置标题和标签
|
|
||||||
plt.grid(True, linestyle='-', alpha=0.5)
|
|
||||||
plt.xlabel('Latency (ms) on NVIDIA A100')
|
|
||||||
plt.ylabel('F1-score (%)')
|
|
||||||
|
|
||||||
# 添加图例,并调整图例中的标记大小
|
|
||||||
legend = plt.legend(loc="upper right")
|
|
||||||
for handle in legend.legend_handles:
|
|
||||||
handle._sizes = [20]
|
|
||||||
plt.savefig('speed_method.png', dpi=300)
|
|
||||||
plt.show()
|
|
Before Width: | Height: | Size: 443 KiB |
Before Width: | Height: | Size: 394 KiB |
Before Width: | Height: | Size: 393 KiB |
Before Width: | Height: | Size: 345 KiB |
Before Width: | Height: | Size: 319 KiB |
Before Width: | Height: | Size: 320 KiB |
Before Width: | Height: | Size: 839 KiB |
Before Width: | Height: | Size: 722 KiB |
Before Width: | Height: | Size: 722 KiB |
Before Width: | Height: | Size: 932 KiB |
Before Width: | Height: | Size: 866 KiB |
Before Width: | Height: | Size: 867 KiB |
Before Width: | Height: | Size: 264 KiB |
Before Width: | Height: | Size: 189 KiB |
Before Width: | Height: | Size: 189 KiB |
Before Width: | Height: | Size: 256 KiB |
Before Width: | Height: | Size: 214 KiB |
Before Width: | Height: | Size: 213 KiB |
Before Width: | Height: | Size: 280 KiB |
Before Width: | Height: | Size: 187 KiB |
Before Width: | Height: | Size: 189 KiB |
Before Width: | Height: | Size: 314 KiB |
Before Width: | Height: | Size: 235 KiB |
Before Width: | Height: | Size: 238 KiB |
Before Width: | Height: | Size: 576 KiB |
Before Width: | Height: | Size: 572 KiB |
Before Width: | Height: | Size: 574 KiB |
Before Width: | Height: | Size: 555 KiB |
Before Width: | Height: | Size: 917 KiB |
Before Width: | Height: | Size: 909 KiB |
Before Width: | Height: | Size: 917 KiB |
Before Width: | Height: | Size: 888 KiB |
Before Width: | Height: | Size: 592 KiB |
Before Width: | Height: | Size: 584 KiB |
Before Width: | Height: | Size: 584 KiB |
Before Width: | Height: | Size: 562 KiB |
Before Width: | Height: | Size: 574 KiB |
Before Width: | Height: | Size: 572 KiB |
Before Width: | Height: | Size: 572 KiB |
Before Width: | Height: | Size: 560 KiB |
Before Width: | Height: | Size: 499 KiB |
Before Width: | Height: | Size: 502 KiB |
Before Width: | Height: | Size: 510 KiB |
Before Width: | Height: | Size: 502 KiB |
Before Width: | Height: | Size: 547 KiB |
Before Width: | Height: | Size: 540 KiB |
Before Width: | Height: | Size: 557 KiB |
Before Width: | Height: | Size: 540 KiB |