[tds_menu_login inline="yes" guest_tdicon="td-icon-profile" logout_tdicon="td-icon-log-out" tdc_css="eyJwaG9uZSI6eyJtYXJnaW4tcmlnaHQiOiIyMCIsIm1hcmdpbi1ib3R0b20iOiIwIiwibWFyZ2luLWxlZnQiOiI2IiwiZGlzcGxheSI6IiJ9LCJwaG9uZV9tYXhfd2lkdGgiOjc2N30=" toggle_hide="eyJwaG9uZSI6InllcyJ9" ia_space="eyJwaG9uZSI6IjAifQ==" icon_size="eyJhbGwiOjI0LCJwaG9uZSI6IjIwIn0=" avatar_size="eyJwaG9uZSI6IjIwIn0=" show_menu="yes" menu_offset_top="eyJwaG9uZSI6IjE4In0=" menu_offset_horiz="eyJhbGwiOjgsInBob25lIjoiLTMifQ==" menu_width="eyJwaG9uZSI6IjE4MCJ9" menu_horiz_align="eyJhbGwiOiJjb250ZW50LWhvcml6LWxlZnQiLCJwaG9uZSI6ImNvbnRlbnQtaG9yaXotcmlnaHQifQ==" menu_uh_padd="eyJwaG9uZSI6IjEwcHggMTVweCA4cHgifQ==" menu_gh_padd="eyJwaG9uZSI6IjEwcHggMTVweCA4cHgifQ==" menu_ul_padd="eyJwaG9uZSI6IjhweCAxNXB4In0=" menu_ul_space="eyJwaG9uZSI6IjYifQ==" menu_ulo_padd="eyJwaG9uZSI6IjhweCAxNXB4IDEwcHgifQ==" menu_gc_padd="eyJwaG9uZSI6IjhweCAxNXB4IDEwcHgifQ==" menu_bg="var(--news-hub-black)" menu_shadow_shadow_size="eyJwaG9uZSI6IjAifQ==" menu_arrow_color="rgba(0,0,0,0)" menu_uh_color="var(--news-hub-light-grey)" menu_uh_border_color="var(--news-hub-dark-grey)" menu_ul_link_color="var(--news-hub-white)" menu_ul_link_color_h="var(--news-hub-accent-hover)" menu_ul_sep_color="var(--news-hub-dark-grey)" menu_uf_txt_color="var(--news-hub-white)" menu_uf_txt_color_h="var(--news-hub-accent-hover)" menu_uf_border_color="var(--news-hub-dark-grey)" f_uh_font_size="eyJwaG9uZSI6IjEyIn0=" f_uh_font_line_height="eyJwaG9uZSI6IjEuMyJ9" f_uh_font_family="eyJwaG9uZSI6IjMyNSJ9" f_links_font_size="eyJwaG9uZSI6IjEyIn0=" f_links_font_line_height="eyJwaG9uZSI6IjEuMyJ9" f_links_font_family="eyJwaG9uZSI6IjMyNSJ9" f_uf_font_size="eyJwaG9uZSI6IjEyIn0=" f_uf_font_line_height="eyJwaG9uZSI6IjEuMyJ9" f_uf_font_family="eyJwaG9uZSI6IjMyNSJ9" f_gh_font_family="eyJwaG9uZSI6IjMyNSJ9" f_gh_font_size="eyJwaG9uZSI6IjEyIn0=" f_gh_font_line_height="eyJwaG9uZSI6IjEuMyJ9" f_btn1_font_family="eyJwaG9uZSI6IjMyNSJ9" f_btn1_font_weight="eyJwaG9uZSI6IjcwMCJ9" f_btn1_font_transform="eyJwaG9uZSI6InVwcGVyY2FzZSJ9" f_btn2_font_weight="eyJwaG9uZSI6IjcwMCJ9" f_btn2_font_transform="eyJwaG9uZSI6InVwcGVyY2FzZSJ9" f_btn2_font_family="eyJwaG9uZSI6IjMyNSJ9"]
-8.5 C
New York
[tds_menu_login guest_tdicon="td-icon-profile" logout_tdicon="td-icon-log-out" tdc_css="eyJhbGwiOnsibWFyZ2luLWJvdHRvbSI6IjAiLCJkaXNwbGF5IjoiIn19" toggle_txt_color="var(--news-hub-white)" menu_offset_top="eyJhbGwiOiIxOSIsImxhbmRzY2FwZSI6IjE3IiwicG9ydHJhaXQiOiIxNSJ9" menu_offset_horiz="eyJhbGwiOi02LCJsYW5kc2NhcGUiOiItMyIsInBvcnRyYWl0IjoiLTIifQ==" menu_horiz_align="content-horiz-right" menu_bg="var(--news-hub-black)" menu_uh_color="var(--news-hub-light-grey)" menu_uh_border_color="var(--news-hub-dark-grey)" menu_ul_link_color="#ffffff" menu_ul_link_color_h="var(--news-hub-accent-hover)" menu_ul_sep_color="var(--news-hub-dark-grey)" menu_uf_txt_color="var(--news-hub-white)" menu_uf_txt_color_h="var(--news-hub-accent-hover)" menu_uf_border_color="var(--news-hub-dark-grey)" f_uh_font_family="325" f_uh_font_line_height="1.3" f_links_font_family="325" f_links_font_line_height="1.3" f_uf_font_line_height="1.3" f_uf_font_family="325" menu_uh_padd="eyJhbGwiOiIyMHB4IDI1cHggMThweCIsImxhbmRzY2FwZSI6IjE1cHggMjBweCAxM3B4IiwicG9ydHJhaXQiOiIxMHB4IDE1cHggOHB4In0=" menu_ul_padd="eyJhbGwiOiIxOHB4IDI1cHgiLCJsYW5kc2NhcGUiOiIxNnB4IDIwcHgiLCJwb3J0cmFpdCI6IjhweCAxNXB4In0=" menu_ul_space="eyJhbGwiOiIxMCIsImxhbmRzY2FwZSI6IjgiLCJwb3J0cmFpdCI6IjYifQ==" menu_ulo_padd="eyJhbGwiOiIxOHB4IDI1cHggMjBweCIsImxhbmRzY2FwZSI6IjEzcHggMjBweCAxNXB4IiwicG9ydHJhaXQiOiI4cHggMTVweCAxMHB4In0=" menu_shadow_shadow_size="0" menu_arrow_color="rgba(255,255,255,0)" menu_width="eyJhbGwiOiIyMjAiLCJwb3J0cmFpdCI6IjE4MCJ9" show_version="" menu_gh_padd="eyJhbGwiOiIyMHB4IDI1cHggMThweCIsImxhbmRzY2FwZSI6IjE1cHggMjBweCAxM3B4IiwicG9ydHJhaXQiOiIxMHB4IDE1cHggOHB4In0=" menu_gc_padd="eyJhbGwiOiIxOHB4IDI1cHggMjBweCIsImxhbmRzY2FwZSI6IjEzcHggMjBweCAxNXB4IiwicG9ydHJhaXQiOiI4cHggMTVweCAxMHB4In0=" menu_gh_color="var(--news-hub-light-grey)" menu_gh_border_color="var(--news-hub-dark-grey)" f_gh_font_family="325" menu_gc_btn1_bg_color="var(--news-hub-accent)" menu_gc_btn1_bg_color_h="var(--news-hub-accent-hover)" menu_gc_btn2_color="var(--news-hub-accent)" menu_gc_btn2_color_h="var(--news-hub-accent-hover)" f_btn1_font_family="325" f_btn1_font_transform="uppercase" f_btn2_font_family="325" f_btn2_font_transform="uppercase" f_btn1_font_weight="700" f_btn2_font_weight="700" show_menu="yes" f_uf_font_size="eyJsYW5kc2NhcGUiOiIxMiIsInBvcnRyYWl0IjoiMTIifQ==" icon_color="var(--news-hub-white)" icon_size="eyJhbGwiOjIyLCJsYW5kc2NhcGUiOiIyMCIsInBvcnRyYWl0IjoiMTgifQ==" avatar_size="eyJhbGwiOiIyMiIsImxhbmRzY2FwZSI6IjIwIiwicG9ydHJhaXQiOiIxOCJ9" ia_space="eyJhbGwiOiIxMCIsImxhbmRzY2FwZSI6IjgiLCJwb3J0cmFpdCI6IjYifQ==" f_toggle_font_family="325" f_toggle_font_size="eyJhbGwiOiIxNCIsImxhbmRzY2FwZSI6IjEzIiwicG9ydHJhaXQiOiIxMiJ9" logout_size="eyJhbGwiOjE0LCJsYW5kc2NhcGUiOiIxMyJ9" f_uh_font_size="eyJsYW5kc2NhcGUiOiIxMyIsInBvcnRyYWl0IjoiMTIifQ==" f_links_font_size="eyJsYW5kc2NhcGUiOiIxMyIsInBvcnRyYWl0IjoiMTIifQ==" f_gh_font_size="eyJsYW5kc2NhcGUiOiIxMyIsInBvcnRyYWl0IjoiMTIifQ=="]

Researchers figure out how to make AI misbehave, serve up prohibited content

Published:

pixelated word balloon

Enlarge (credit: MirageC/Getty Images)

ChatGPT and its artificially intelligent siblings have been tweaked over and over to prevent troublemakers from getting them to spit out undesirable messages such as hate speech, personal information, or step-by-step instructions for building an improvised bomb. But researchers at Carnegie Mellon University last week showed that adding a simple incantation to a prompt—a string of text that might look like gobbledygook to you or me but which carries subtle significance to an AI model trained on huge quantities of web data—can defy all of these defenses in several popular chatbots at once.

The work suggests that the propensity for the cleverest AI chatbots to go off the rails isn’t just a quirk that can be papered over with a few simple rules. Instead, it represents a more fundamental weakness that will complicate efforts to deploy the most advanced AI.

Read 19 remaining paragraphs | Comments

Ars Technica - All contentContinue reading/original-link]

Related articles

spot_img

Recent articles

spot_img