{"created":"2023-07-25T10:24:29.885005+00:00","id":1816,"links":{},"metadata":{"_buckets":{"deposit":"3689ad5f-364a-4333-8cd3-facf69412913"},"_deposit":{"created_by":1,"id":"1816","owners":[1],"pid":{"revision_id":0,"type":"depid","value":"1816"},"status":"published"},"_oai":{"id":"oai:hiroshima-cu.repo.nii.ac.jp:00001816","sets":["1:409"]},"author_link":["7529","7528"],"item_10001_biblio_info_7":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2016-05-11","bibliographicIssueDateType":"Issued"},"bibliographicIssueNumber":"8","bibliographicPageEnd":"1877","bibliographicPageStart":"1865","bibliographicVolumeNumber":"28","bibliographic_titles":[{"bibliographic_title":"IEEE Transactions on Neural Networks and Learning Systems"}]}]},"item_10001_description_19":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_10001_description_5":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"Softmax selection is one of the most popular methods for action selection in reinforcement learning. Although various recently proposed methods may be more effective with full parameter tuning, implementing a complicated method that requires the tuning of many parameters can be difficult. Thus, softmax selection is still worth revisiting, considering the cost savings of its implementation and tuning. In fact, this method works adequately in practice with only one parameter appropriately set for the environment. The aim of this paper is to improve the variable setting of this method to extend the bandwidth of good parameters, thereby reducing the cost of implementation and parameter tuning. To achieve this, we take advantage of the asymptotic equipartition property in a Markov decision process to extend the peak bandwidth of softmax selection. Using a variety of episodic tasks, we show that our setting is effective in extending the bandwidth and that it yields a better policy in terms of stability. The bandwidth is quantitatively assessed in a series of statistical tests.","subitem_description_type":"Abstract"}]},"item_10001_publisher_8":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"IEEE"}]},"item_10001_relation_13":{"attribute_name":"PubMed番号","attribute_value_mlt":[{"subitem_relation_type":"isVersionOf","subitem_relation_type_id":{"subitem_relation_type_id_text":"27187974","subitem_relation_type_select":"PMID"}}]},"item_10001_relation_14":{"attribute_name":"DOI","attribute_value_mlt":[{"subitem_relation_type":"isVersionOf","subitem_relation_type_id":{"subitem_relation_type_id_text":"info:doi/10.1109/TNNLS.2016.2558295","subitem_relation_type_select":"DOI"}}]},"item_10001_relation_17":{"attribute_name":"関連サイト","attribute_value_mlt":[{"subitem_relation_name":[{"subitem_relation_name_text":"http://ieeexplore.ieee.org/document/7468547/"}],"subitem_relation_type_id":{"subitem_relation_type_id_text":"http://ieeexplore.ieee.org/document/7468547/","subitem_relation_type_select":"URI"}}]},"item_10001_rights_15":{"attribute_name":"権利","attribute_value_mlt":[{"subitem_rights":"© 2016 IEEE. Personal use is permitted, but republication/redistribution requires IEEE permission.See http://www.ieee.org/publications_standards/publications/rights/index.html for more information.|This manuscript version is made available under the CC-BY-NC-ND 4.0 license http://creativecommons.org/licenses/by-nc-nd/4.0/"}]},"item_10001_source_id_11":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA1255553X","subitem_source_identifier_type":"NCID"}]},"item_10001_source_id_9":{"attribute_name":"ISSN","attribute_value_mlt":[{"subitem_source_identifier":"2162237X","subitem_source_identifier_type":"ISSN"}]},"item_10001_version_type_20":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_ab4af688f83e57aa","subitem_version_type":"AM"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"IWATA, Kazunori"},{"creatorName":"イワタ, カズノリ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"7528","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"岩田, 一貴","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"7529","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2023-03-07"}],"displaytype":"detail","filename":"tnnls-2015-p-5568.r1.pdf","filesize":[{"value":"398.5 kB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"tnnls-2015-p-5568.r1.pdf","url":"https://hiroshima-cu.repo.nii.ac.jp/record/1816/files/tnnls-2015-p-5568.r1.pdf"},"version_id":"8783f871-bc00-4145-892d-ce0a7d14d9a1"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"Asymptotic equipartition property (AEP)","subitem_subject_scheme":"Other"},{"subitem_subject":"parameter bandwidth","subitem_subject_scheme":"Other"},{"subitem_subject":"reinforcement learning (RL)","subitem_subject_scheme":"Other"},{"subitem_subject":"softmax selection","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"eng"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"journal article","resourceuri":"http://purl.org/coar/resource_type/c_6501"}]},"item_title":"Extending the Peak Bandwidth of Parameters for Softmax Selection in Reinforcement Learning","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"Extending the Peak Bandwidth of Parameters for Softmax Selection in Reinforcement Learning"}]},"item_type_id":"10001","owner":"1","path":["409"],"pubdate":{"attribute_name":"公開日","attribute_value":"2023-03-07"},"publish_date":"2023-03-07","publish_status":"0","recid":"1816","relation_version_is_last":true,"title":["Extending the Peak Bandwidth of Parameters for Softmax Selection in Reinforcement Learning"],"weko_creator_id":"1","weko_shared_id":-1},"updated":"2023-07-25T10:42:58.176012+00:00"}