{"created":"2023-07-25T10:24:38.288148+00:00","id":1969,"links":{},"metadata":{"_buckets":{"deposit":"2d2ffd7c-5ad8-403e-97bd-c2af54900006"},"_deposit":{"created_by":1,"id":"1969","owners":[1],"pid":{"revision_id":0,"type":"depid","value":"1969"},"status":"published"},"_oai":{"id":"oai:hiroshima-cu.repo.nii.ac.jp:00001969","sets":["52:362:429"]},"author_link":["10786","10785","10790","10783","10787","10789","10784","10788"],"item_10003_alternative_title_1":{"attribute_name":"その他(別言語等)のタイトル","attribute_value_mlt":[{"subitem_alternative_title":"GAN オ モチイタ セイシツ ヘンカン ニオケル シュウハスウ タイイキ ゴト ノ ブンセキ"}]},"item_10003_biblio_info_7":{"attribute_name":"書誌情報","attribute_value_mlt":[{"bibliographicIssueDates":{"bibliographicIssueDate":"2023-06","bibliographicIssueDateType":"Issued"},"bibliographicPageEnd":"4","bibliographicPageStart":"1","bibliographicVolumeNumber":"37","bibliographic_titles":[{"bibliographic_title":"人工知能学会全国大会論文集"}]}]},"item_10003_description_19":{"attribute_name":"フォーマット","attribute_value_mlt":[{"subitem_description":"application/pdf","subitem_description_type":"Other"}]},"item_10003_description_5":{"attribute_name":"抄録","attribute_value_mlt":[{"subitem_description":"In recent years, deep learning has enabled high-quality speech synthesis and voice quality conversion. Traditional methods use a GAN (Generative Adversarial Network) to perform voice conversion. However, the generated speech sounds a little muffled compared to actual speech. There are also some shortcomings regarding the generated 2D features. Therefore, in this\nstudy, the generated spectrogram is divided into several frequency bands, and the Mel-Cepstrum Distortion (MCD) of each frequency band to investigate and analyze which frequency bands are well generated. Analysis showed that the low frequency of the generated Spectrograms were well generated, but the mid/high frequency were not well generated. In addition, we\nfound that although the linguistic information was reproduced, the reproduction of speaker characteristics was insufficient.","subitem_description_type":"Abstract"},{"subitem_description":"近年,深層学習により高品質な音声合成・声質変換が行えるようになっている.従来の手法では,GAN(Generative Adversarial Network)を用い,声質変換を行っている.しかし,生成された音声は本物の音声と比べ,ややこもったような音声となっており,生成された2次元特徴量に関しても不十分な箇所はある.そこで本研究では,生成されるSpectrogramを数個の周波数帯域ごとに分割し,それぞれの周波数帯域ごとのMCD(Mel-Cepstrum Distortion)を計算し,どの周波数帯域がうまく生成されている調査・分析を行う.分析の結果、生成されたSpectrogramの低周波帯域はうまく生成できていたが,中・高周波帯域は生成が不十分であることが分かった.また,言語情報の再現はできているが,話者性の再現は不十分であることも分かった.","subitem_description_type":"Abstract"}]},"item_10003_description_6":{"attribute_name":"内容記述","attribute_value_mlt":[{"subitem_description":"2023年度(第37回):2023年6月6日-6月9日:熊本県熊本市(熊本城ホール)+オンライン 4Xin1-26","subitem_description_type":"Other"}]},"item_10003_publisher_8":{"attribute_name":"出版者","attribute_value_mlt":[{"subitem_publisher":"人工知能学会"}]},"item_10003_relation_17":{"attribute_name":"関連サイト","attribute_value_mlt":[{"subitem_relation_name":[{"subitem_relation_name_text":"人工知能学会"}],"subitem_relation_type_id":{"subitem_relation_type_id_text":"https://www.ai-gakkai.or.jp/","subitem_relation_type_select":"URI"}}]},"item_10003_rights_15":{"attribute_name":"権利","attribute_value_mlt":[{"subitem_rights":"本著作物の著作権は人工知能学会に帰属します。本著作物は著作権者である人工知能学会の許可のもとに掲載するものです。ご利用に当たっては「著作権法」に従うことをお願いいたします。"}]},"item_10003_source_id_11":{"attribute_name":"書誌レコードID","attribute_value_mlt":[{"subitem_source_identifier":"AA11578981","subitem_source_identifier_type":"NCID"}]},"item_10003_version_type_20":{"attribute_name":"著者版フラグ","attribute_value_mlt":[{"subitem_version_resource":"http://purl.org/coar/version/c_970fb48d4fbd8a85","subitem_version_type":"VoR"}]},"item_creator":{"attribute_name":"著者","attribute_type":"creator","attribute_value_mlt":[{"creatorNames":[{"creatorName":"和田, 楓也"},{"creatorName":"ワダ, フウヤ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"10783","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"黒澤, 義明"},{"creatorName":"クロサワ, ヨシアキ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"10784","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"目良, 和也"},{"creatorName":"メラ, カズヤ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"10785","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"竹澤, 寿幸"},{"creatorName":"タケザワ, トシユキ","creatorNameLang":"ja-Kana"}],"nameIdentifiers":[{"nameIdentifier":"10786","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"WADA, Fuya","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"10787","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"KUROSAWA, Yoshiaki","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"10788","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"MERA, Kazuya","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"10789","nameIdentifierScheme":"WEKO"}]},{"creatorNames":[{"creatorName":"TAKEZAWA, Toshiyuki","creatorNameLang":"en"}],"nameIdentifiers":[{"nameIdentifier":"10790","nameIdentifierScheme":"WEKO"}]}]},"item_files":{"attribute_name":"ファイル情報","attribute_type":"file","attribute_value_mlt":[{"accessrole":"open_date","date":[{"dateType":"Available","dateValue":"2023-07-04"}],"displaytype":"detail","filename":"4Xin1-26 .pdf","filesize":[{"value":"1.1 MB"}],"format":"application/pdf","licensetype":"license_note","mimetype":"application/pdf","url":{"label":"4Xin1-26 .pdf","url":"https://hiroshima-cu.repo.nii.ac.jp/record/1969/files/4Xin1-26 .pdf"},"version_id":"d4c5745c-246e-40ae-a245-9e122d089ac2"}]},"item_keyword":{"attribute_name":"キーワード","attribute_value_mlt":[{"subitem_subject":"声質変換","subitem_subject_scheme":"Other"},{"subitem_subject":"GAN","subitem_subject_scheme":"Other"},{"subitem_subject":"Generative Adversarial Networks","subitem_subject_scheme":"Other"}]},"item_language":{"attribute_name":"言語","attribute_value_mlt":[{"subitem_language":"jpn"}]},"item_resource_type":{"attribute_name":"資源タイプ","attribute_value_mlt":[{"resourcetype":"conference paper","resourceuri":"http://purl.org/coar/resource_type/c_5794"}]},"item_title":"GANを用いた声質変換における周波数帯域ごとの分析","item_titles":{"attribute_name":"タイトル","attribute_value_mlt":[{"subitem_title":"GANを用いた声質変換における周波数帯域ごとの分析"},{"subitem_title":"Frequency Analysis in Voice Conversion Using Generative Adversarial Networks","subitem_title_language":"en"}]},"item_type_id":"10003","owner":"1","path":["429"],"pubdate":{"attribute_name":"公開日","attribute_value":"2023-07-04"},"publish_date":"2023-07-04","publish_status":"0","recid":"1969","relation_version_is_last":true,"title":["GANを用いた声質変換における周波数帯域ごとの分析"],"weko_creator_id":"1","weko_shared_id":-1},"updated":"2023-07-25T10:29:42.877620+00:00"}