[{"@type":"PropertyValue","name":"语种","value":"印尼语,马来语,泰语,越南语"},{"@type":"PropertyValue","name":"数据量","value":"印尼语14447771条,马来语1239420条,泰语6467564条,越南语8942813条,总量3100万+条"},{"@type":"PropertyValue","name":"字段","value":"URL,title,published_time,article_content,category"},{"@type":"PropertyValue","name":"格式","value":"JSONL"},{"@type":"PropertyValue","name":"","value":""}]
{"id":1625,"datatype":"1","titleimg":"/shujutang/static/image/index/datatang_wenben_default.jpg","type1":"226","type1str":null,"type2":"227","type2str":null,"dataname":"3,100万条东南亚语种新闻文本数据","datazy":[{"title":"语种","desc":"语种","content":"印尼语,马来语,泰语,越南语"},{"desc":"数据量","content":"印尼语14447771条,马来语1239420条,泰语6467564条,越南语8942813条,总量3100万+条","title":"数据量"},{"desc":"字段","content":"URL,title,published_time,article_content,category","title":"字段"},{"desc":"格式","content":"JSONL","title":"格式"},{"desc":"","content":"","title":""}],"datatag":"小语种,东南亚,新闻,舆情","technologydoc":null,"downurl":null,"datainfo":null,"standard":null,"dataylurl":null,"flag":null,"publishtime":null,"createby":null,"createtime":null,"ext1":null,"samplestoreloc":null,"hosturl":null,"datasize":null,"industryPlan":null,"keyInformation":"","samplePresentation":[],"officialSummary":"本数据集为东南亚多语种新闻数据,涵盖印尼语、马来语、泰语和越南语四种语言。数据总量超过3100万条,数据以JSONL格式存储,每条记录独立成行,便于高效读取与处理。数据来源广泛,涉及各类新闻主题,能够全面反映东南亚地区的社会动态、文化热点与经济趋势。本数据集可助力大模型提升多语言能力,丰富文化知识,优化性能,拓展东南亚行业应用,推动跨语言研究。","dataexampl":null,"datakeyword":"小语种,东南亚,新闻,舆情","isDelete":null,"ids":null,"idsList":null,"datasetCode":null,"productStatus":null,"tagTypeEn":null,"tagTypeZh":"类型","website":null,"samplePresentationList":null,"datazyList":null,"keyInformationList":null,"dataexamplList":null,"bgimg":null,"datazyScriptList":null,"datakeywordListString":null,"sourceShowPage":"llm","BGimg":"","voiceBg":["/shujutang/static/image/comm/audio_bg.png","/shujutang/static/image/comm/audio_bg2.png","/shujutang/static/image/comm/audio_bg3.png","/shujutang/static/image/comm/audio_bg4.png","/shujutang/static/image/comm/audio_bg5.png"]}
[]