import React, { useEffect } from 'react';
// @mui
import { styled as Mstyled } from '@mui/material/styles'
import useMediaQuery from "@mui/material/useMediaQuery";
import {
  Container,
  Box,
  Button,
  Typography
} from '@mui/material'
import GitHubIcon from '@mui/icons-material/GitHub';
import DescriptionIcon from '@mui/icons-material/Description';
// components
import ProjectPage from '../../components/ProjectPage'
import './Publications.css'

// ----------------------------------------------------------------------

const ProjectTypography = Mstyled(Typography)(({ theme, isMobile }) => ({
    marginTop: '40px',
    marginBottom: '10px',
    lineHeight: 1.1,
    fontSize: isMobile ? '2.2rem' : '2.6rem',
    fontWeight: 700,
    [theme.breakpoints.down('md')]: {
      marginTop: '20px',
    },
  }))


// ----------------------------------------------------------------------

export default function QCS() {
  const isMobile = useMediaQuery("(max-width: 900px)");

  useEffect(() => {
    let link = document.querySelector("link[rel~='icon']");
    if (!link) {
      link = document.createElement('link');
      link.rel = 'icon';
      document.getElementsByTagName('head')[0].appendChild(link);
    }
    link.href = '/qcs-favicon.ico';
  }, []);

  return (
    <ProjectPage title='QCS' project={true} >
      <Container maxWidth='lg' sx={{pb: 6, pt: 4}}>
        <ProjectTypography align='center' isMobile={isMobile}>
            Adaptive <span style={{color: '#50aa50'}}>Q</span>-Aid for <span style={{color: '#50aa50'}}>C</span>onditional <span style={{color: '#50aa50'}}>S</span>upervised Learning <br/>in Offline Reinforcement Learning
        </ProjectTypography>
        <Typography align='center' sx={{mt: 2, fontSize: '1.4rem', fontWeight: 700}}>
            NeurIPS 2024
        </Typography>
        <Typography align='center' sx={{mt: 2, fontSize: '1.2rem'}}>
          <a className='qcs' href='/' target='_blank' rel="noreferrer">Jeonghye Kim<sup>1</sup></a>,
          &nbsp;
          <a className='qcs' href='https://suyoung-lee.github.io/' target='_blank' rel="noreferrer">Suyoung Lee<sup>1</sup></a>,
          &nbsp;
          <a className='qcs' href='https://sites.google.com/view/wjkim1202/' target='_blank' rel="noreferrer">Woojun Kim<sup>2</sup></a>,
          &nbsp;
          <a className='qcs' href='https://sites.google.com/view/youngchulsung' target='_blank' rel="noreferrer">Youngchul Sung<sup>1</sup></a>
        </Typography>
        <Typography align='center' sx={{mt: 0.2, fontSize: '1.1rem'}}>
          <sup>1</sup>&nbsp;KAIST&nbsp; <sup>2</sup>&nbsp;Carnegie Mellon University &nbsp;&nbsp;
        </Typography>
        <Box sx={{display: 'flex', flexDirection: 'row', justifyContent: 'center', mt: 2}}>
          <Button variant="contained" startIcon={<DescriptionIcon />} color="qcs" onClick={() => window.open('https://arxiv.org/abs/2402.02017', "_blank")} sx={{ color: 'white', marginRight: 0.5, borderRadius: 0.8 }}>
            PDF
          </Button>
          <Button variant="contained" startIcon={<GitHubIcon />} color="qcs" onClick={() => window.open('bb', "_blank")} sx={{ color: 'white', marginRight: 0, borderRadius: 0.8 }}>
            Code
          </Button>
        </Box>
        <Typography sx={{mt: 5, fontSize: '1.8rem', fontWeight: 700}}>
          Abstract
        </Typography>
        <Typography sx={{fontSize: '1.1rem'}}>
        Offline reinforcement learning (RL) has progressed with return-conditioned supervised learning (RCSL), but its lack of stitching ability remains a limitation. We introduce Q-Aided Conditional Supervised Learning (QCS), which effectively combines the stability of RCSL with the stitching capability of Q-functions. By analyzing Q-function over-generalization, which impairs stable stitching, QCS adaptively integrates Q-aid into RCSL's loss function based on trajectory return. Empirical results show that QCS significantly outperforms RCSL and value-based methods, consistently achieving or exceeding the highest trajectory returns across diverse offline RL benchmarks.
        </Typography>
        <hr className='qcs'/>
        <Typography sx={{mt: 5, fontSize: '1.8rem', fontWeight: 700}}>
          Summary
        </Typography>
        <Typography  style={{ marginTop: '20px', fontSize: '1.1rem' }}><span style={{fontSize: '1.4rem'}}><b>Conceptual idea of QCS:</b></span> <span style={{color: '#50aa50', fontSize: '1.4rem'}}><b>Follow RCSL
when learning from optimal trajectories</b></span> where it
predicts actions confidently but the Q-function
may stitch incorrectly. Conversely, <span style={{color: '#50aa50', fontSize: '1.4rem'}}><b>refer to the Q-function when learning from sub-optimal trajectories</b></span> where RCSL is less certain but the Q-function
is likely accurate.</Typography>
        <div style={{ textAlign: 'center'}}><img width={isMobile ? "100%": "65%"} style={{ marginTop: '15px' }} src='/static/publications/qcs/method.png'/></div>
        <Typography  style={{ marginTop: '60px', fontSize: '1.1rem'}}>Despite its simplicity, the effectiveness of QCS is empirically substantiated across offline RL benchmarks, demonstrating significant advancements over existing SOTA methods, including both RCSL and value-based methods. Especially, QCS surpasses the maximal dataset trajectory
        return across diverse MuJoCo datasets, under varying degrees of sub-optimality.</Typography>
        <img width="100%" style={{ marginTop: '15px' }} src='/static/publications/qcs/performance_summary.png'/>
        <hr className='qcs'/>
        <Typography sx={{mt: 5, fontSize: '1.8rem', fontWeight: 700}}>
          When Is Q-Aid Beneficial for RCSL?
        </Typography>
        <Typography  style={{ marginTop: '20px', fontSize: '1.1rem' }}>We see that the dataset quality favoring RCSL contrasts with that benefiting the
        Q-greedy policy. RCSL tends to perform well by mimicking actions in high-return trajectory datasets. On the
other hand, the Q-greedy policy excels with suboptimal datasets but shows notably poor results
with optimal datasets.</Typography>
        <Box style={{width: "100%", textAlign: "center"}}>
          <img width={isMobile ? "100%": "80%"} style={{ marginTop: '10px' }} src='/static/publications/qcs/dt_q_greedy.png'/>
        </Box>
        <Typography sx={{mt: 5, fontSize: '1.4rem', fontWeight: 500}}>
          <b>Why Does Q-Greedy Policy Struggle with Optimal Datasets?</b>
        </Typography>
        <Typography  style={{ marginTop: '20px', fontSize: '1.1rem' }}>The learned Q-function is prone to overgeneralization because it is trained on near-identical action values from optimal trajectories, leading to similar Q-values being assigned to OOD actions. As a result, the Q-function becomes noise-sensitive, potentially causing incorrect action values and shifts in state distribution during the test phase.</Typography>
        <Box style={{width: "100%", textAlign: "center"}}>
          <img width="100%" style={{ marginTop: '10px' }} src='/static/publications/qcs/toy_exp.png'/>
        </Box>
        <hr className='qcs'/>
        <Typography sx={{mt: 5, fontSize: '1.8rem', fontWeight: 700}}>
          Q-Aided Conditional Supervised Learning
        </Typography>
        <Typography  style={{ marginTop: '20px', fontSize: '1.1rem' }}>Given the complementary relationship where RCSL excels at mimicking optimal narrow datasets and the Q-function becomes a more effective critic when trained on diverse datasets with varied actions and Q-values, we can apply varying degrees of Q-aid based on the trajectory return for each sub-trajectory in RCSL.</Typography>
        <Box style={{width: "100%", textAlign: "center"}}>
          <img width={isMobile ? "100%": "70%"} style={{ marginTop: '10px' }} src='/static/publications/qcs/loss_function.png'/>
        </Box>
        <hr className='qcs'/>
        <Typography sx={{mt: 5, fontSize: '1.8rem', fontWeight: 700}}>
         Results: Overall Performance
        </Typography>

        <Box sx={{ display: !isMobile && 'flex', flexDirection: !isMobile && 'row', textAlign: 'center'}}>
          <Box style={{width: isMobile? '100%' : '50%'}}>
            <Typography sx={{mt: 2.5, fontSize: '1.4rem', fontWeight: 700}}>MuJoCo</Typography>
            <img width="100%" style={{ marginTop: '10px' }} src='/static/publications/qcs/results_mujoco.png'/>
          </Box>
          <Box style={{width: isMobile? '100%' : '50%'}}>
            <Typography sx={{mt: 2.5, fontSize: '1.4rem', fontWeight: 700}}>Antmaze</Typography>
            <img width="100%" style={{ marginTop: '10px' }} src='/static/publications/qcs/results_antmaze.png'/>
          </Box>
        </Box>

        <hr className='dc'/>
        <Typography sx={{mt: 5, fontSize: '1.8rem', fontWeight: 700}}>
          Bibtex
        </Typography>
        <pre style={{ overflowX: 'auto', paddingLeft: '10px', paddingTop: '10px', paddingBottom: '10px', background: '#f1f1f1', borderRadius: '4px'}}>
        {`@inproceedings{
  kim2024adaptive,
  title={Adaptive \$Q\$-Aid for Conditional Supervised Learning in Offline Reinforcement Learning},
  author={Jeonghye Kim and Suyoung Lee and Woojun Kim and Youngchul Sung},
  booktitle={The Thirty-eighth Annual Conference on Neural Information Processing Systems},
  year={2024},
}`}
</pre>

      </Container>
    </ProjectPage>
  )
}