{
    "data": {
        "question": {
            "questionId": "3071",
            "questionFrontendId": "2882",
            "boundTopicId": null,
            "title": "Drop Duplicate Rows",
            "titleSlug": "drop-duplicate-rows",
            "content": "<pre>\nDataFrame customers\n+-------------+--------+\n| Column Name | Type   |\n+-------------+--------+\n| customer_id | int    |\n| name        | object |\n| email       | object |\n+-------------+--------+\n</pre>\n\n<p>There are some duplicate rows in the DataFrame based on the <code>email</code> column.</p>\n\n<p>Write a solution to remove these duplicate rows and keep only the <strong>first</strong> occurrence.</p>\n\n<p>The result format is in the following example.</p>\n\n<p>&nbsp;</p>\n<pre>\n<strong class=\"example\">Example 1:</strong>\n<strong>Input:</strong>\n+-------------+---------+---------------------+\n| customer_id | name    | email               |\n+-------------+---------+---------------------+\n| 1           | Ella    | emily@example.com   |\n| 2           | David   | michael@example.com |\n| 3           | Zachary | sarah@example.com   |\n| 4           | Alice   | john@example.com    |\n| 5           | Finn    | john@example.com    |\n| 6           | Violet  | alice@example.com   |\n+-------------+---------+---------------------+\n<strong>Output: </strong> \n+-------------+---------+---------------------+\n| customer_id | name    | email               |\n+-------------+---------+---------------------+\n| 1           | Ella    | emily@example.com   |\n| 2           | David   | michael@example.com |\n| 3           | Zachary | sarah@example.com   |\n| 4           | Alice   | john@example.com    |\n| 6           | Violet  | alice@example.com   |\n+-------------+---------+---------------------+\n<strong>Explanation:</strong>\nAlic (customer_id = 4) and Finn (customer_id = 5) both use john@example.com, so only the first occurrence of this email is retained.\n</pre>\n",
            "translatedTitle": null,
            "translatedContent": null,
            "isPaidOnly": false,
            "difficulty": "Easy",
            "likes": 34,
            "dislikes": 3,
            "isLiked": null,
            "similarQuestions": "[]",
            "exampleTestcases": "{\"headers\":{\"customers\":[\"customer_id\",\"name\",\"email\"]},\"rows\":{\"customers\":[[1,\"Ella\",\"emily@example.com\"],[2,\"David\",\"michael@example.com\"],[3,\"Zachary\",\"sarah@example.com\"],[4,\"Alice\",\"john@example.com\"],[5,\"Finn\",\"john@example.com\"],[6,\"Violet\",\"alice@example.com\"]]}}",
            "categoryTitle": "pandas",
            "contributors": [],
            "topicTags": [],
            "companyTagStats": null,
            "codeSnippets": [
                {
                    "lang": "Pandas",
                    "langSlug": "pythondata",
                    "code": "import pandas as pd\n\ndef dropDuplicateEmails(customers: pd.DataFrame) -> pd.DataFrame:\n    ",
                    "__typename": "CodeSnippetNode"
                }
            ],
            "stats": "{\"totalAccepted\": \"10.5K\", \"totalSubmission\": \"12.6K\", \"totalAcceptedRaw\": 10488, \"totalSubmissionRaw\": 12643, \"acRate\": \"83.0%\"}",
            "hints": [
                "Consider using a build-in function in pandas library to remove the duplicate rows based on specified data."
            ],
            "solution": {
                "id": "2114",
                "canSeeDetail": true,
                "paidOnly": false,
                "hasVideoSolution": false,
                "paidOnlyVideo": true,
                "__typename": "ArticleNode"
            },
            "status": null,
            "sampleTestCase": "{\"headers\":{\"customers\":[\"customer_id\",\"name\",\"email\"]},\"rows\":{\"customers\":[[1,\"Ella\",\"emily@example.com\"],[2,\"David\",\"michael@example.com\"],[3,\"Zachary\",\"sarah@example.com\"],[4,\"Alice\",\"john@example.com\"],[5,\"Finn\",\"john@example.com\"],[6,\"Violet\",\"alice@example.com\"]]}}",
            "metaData": "{\n  \"pythondata\": [\n    \"customers = pd.DataFrame([], columns=['customer_id', 'name', 'email']).astype({'customer_id':'Int64', 'name':'object', 'email':'object'})\"\n  ],\n  \"database\": true,\n  \"name\": \"dropDuplicateEmails\",\n  \"languages\": [\n    \"pythondata\"\n  ]\n}",
            "judgerAvailable": true,
            "judgeType": "large",
            "mysqlSchemas": [],
            "enableRunCode": true,
            "enableTestMode": false,
            "enableDebugger": false,
            "envInfo": "{\"pythondata\": [\"Pandas\", \"<p>Python 3.10 with Pandas 2.0.2 and NumPy 1.25.0</p>\"]}",
            "libraryUrl": null,
            "adminUrl": null,
            "challengeQuestion": null,
            "__typename": "QuestionNode"
        }
    }
}