This paper proposes a realistic modularized framework for controlling autonomous surface vehicles (ASVs) on inland waterways (IWs) based on deep reinforcement learning (DRL). The framework improves operational safety and comprises two levels: a high-level local path planning (LPP) unit and a low-level path following (PF) unit, each consisting of a DRL agent. The LPP agent is responsible for planning a path under consideration of dynamic vessels, closing a gap in the current research landscape. In addition, the LPP agent adequately considers traffic rules and the geometry of the waterway. We thereby introduce a novel application of a spatial-temporal recurrent neural network architecture to continuous action spaces. The LPP agent outperforms a state-of-the-art artificial potential field (APF) method by increasing the minimum distance to other vessels by 65% on average. The PF agent performs low-level actuator control while accounting for shallow water influences and the environmental forces winds, waves, and currents. Compared with a proportional-integral-derivative (PID) controller, the PF agent yields only 61% of the mean cross-track error (MCTE) while significantly reducing control effort (CE) in terms of the required absolute rudder angle. Lastly, both agents are jointly validated in simulation, employing the lower Elbe in northern Germany as an example case and using real automatic identification system (AIS) trajectories to model the behavior of other ships.
%0 Report
%1 8cd05c69222445eebbe98f8905fb8030
%A Waltz, Martin
%A Paulig, Niklas
%A Okhrin, Ostap
%D 2023
%K topic_engineering FIS_scads cs.AI, cs.SY eess.SY,
%R 10.48550/arXiv.2307.16769
%T 2-Level Reinforcement Learning for Ships on Inland Waterways: Path Planning and Following
%X This paper proposes a realistic modularized framework for controlling autonomous surface vehicles (ASVs) on inland waterways (IWs) based on deep reinforcement learning (DRL). The framework improves operational safety and comprises two levels: a high-level local path planning (LPP) unit and a low-level path following (PF) unit, each consisting of a DRL agent. The LPP agent is responsible for planning a path under consideration of dynamic vessels, closing a gap in the current research landscape. In addition, the LPP agent adequately considers traffic rules and the geometry of the waterway. We thereby introduce a novel application of a spatial-temporal recurrent neural network architecture to continuous action spaces. The LPP agent outperforms a state-of-the-art artificial potential field (APF) method by increasing the minimum distance to other vessels by 65% on average. The PF agent performs low-level actuator control while accounting for shallow water influences and the environmental forces winds, waves, and currents. Compared with a proportional-integral-derivative (PID) controller, the PF agent yields only 61% of the mean cross-track error (MCTE) while significantly reducing control effort (CE) in terms of the required absolute rudder angle. Lastly, both agents are jointly validated in simulation, employing the lower Elbe in northern Germany as an example case and using real automatic identification system (AIS) trajectories to model the behavior of other ships.
@techreport{8cd05c69222445eebbe98f8905fb8030,
abstract = {This paper proposes a realistic modularized framework for controlling autonomous surface vehicles (ASVs) on inland waterways (IWs) based on deep reinforcement learning (DRL). The framework improves operational safety and comprises two levels: a high-level local path planning (LPP) unit and a low-level path following (PF) unit, each consisting of a DRL agent. The LPP agent is responsible for planning a path under consideration of dynamic vessels, closing a gap in the current research landscape. In addition, the LPP agent adequately considers traffic rules and the geometry of the waterway. We thereby introduce a novel application of a spatial-temporal recurrent neural network architecture to continuous action spaces. The LPP agent outperforms a state-of-the-art artificial potential field (APF) method by increasing the minimum distance to other vessels by 65% on average. The PF agent performs low-level actuator control while accounting for shallow water influences and the environmental forces winds, waves, and currents. Compared with a proportional-integral-derivative (PID) controller, the PF agent yields only 61% of the mean cross-track error (MCTE) while significantly reducing control effort (CE) in terms of the required absolute rudder angle. Lastly, both agents are jointly validated in simulation, employing the lower Elbe in northern Germany as an example case and using real automatic identification system (AIS) trajectories to model the behavior of other ships.},
added-at = {2024-11-28T16:27:18.000+0100},
author = {Waltz, Martin and Paulig, Niklas and Okhrin, Ostap},
biburl = {https://puma.scadsai.uni-leipzig.de/bibtex/2c0ad4164b011dde4a6c580f05fd60936/scadsfct},
day = 25,
doi = {10.48550/arXiv.2307.16769},
interhash = {f09c6ec127ace7926635c22cafd894ee},
intrahash = {c0ad4164b011dde4a6c580f05fd60936},
keywords = {topic_engineering FIS_scads cs.AI, cs.SY eess.SY,},
language = {English},
month = jul,
timestamp = {2024-11-28T17:41:01.000+0100},
title = {2-Level Reinforcement Learning for Ships on Inland Waterways: Path Planning and Following},
type = {WorkingPaper},
year = 2023
}